Merge pull request #10 from antmicro:renode-update-interface PiperOrigin-RevId: 746487798 Change-Id: If24dd90cec280cdb3552fb2dcd4f5d21f7974c55
diff --git a/mpact/sim/decoder/BUILD b/mpact/sim/decoder/BUILD index 75511a5..536ba70 100644 --- a/mpact/sim/decoder/BUILD +++ b/mpact/sim/decoder/BUILD
@@ -117,6 +117,7 @@ ":antlr_parser_wrapper", ":decoder_error_listener", ":format_name", + "@com_google_absl//absl/base:no_destructor", "@com_google_absl//absl/container:btree", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set",
diff --git a/mpact/sim/decoder/InstructionSet.g4 b/mpact/sim/decoder/InstructionSet.g4 index b8e71e5..1c5951e 100644 --- a/mpact/sim/decoder/InstructionSet.g4 +++ b/mpact/sim/decoder/InstructionSet.g4
@@ -142,11 +142,19 @@ : INCLUDE STRING_LITERAL ; -// A bundle_declaration has a name and specifies the set of bundles and/or -// slots contained within. At least one slot or bundle has to be specified. - +// A bundle_declaration has a name and specifies a semantic function, which is +// is responsible for dispatching the instructions for the bundles and/or slots +// It also specifies the bundles and/or slots the set of bundles and/or slots +// it contains. At least one slot or bundle has to be specified. bundle_declaration - : BUNDLE bundle_name=IDENT '{' bundle_list? slot_list? '}' + : BUNDLE bundle_name=IDENT '{' bundle_parts* '}' + ; + +bundle_parts + : include_file_list + | bundle_list + | slot_list + | semfunc_spec ';' ; // A bundle list is a non-empty list of bundle identifiers
diff --git a/mpact/sim/decoder/bin_format_visitor.cc b/mpact/sim/decoder/bin_format_visitor.cc index 39ba2dc..97351f3 100644 --- a/mpact/sim/decoder/bin_format_visitor.cc +++ b/mpact/sim/decoder/bin_format_visitor.cc
@@ -62,35 +62,25 @@ template <typename T> static inline T ExtractBits(const uint8_t *data, int data_size, int msb, int width) { - if (width == 0) return 0; - - int byte_low = data_size - ((msb - width) >> 3) - 1; - int byte_high = data_size - (msb >> 3) - 1; - int high_bit = msb & 0x7; - - // If it is only from one byte, extract and return. - if (byte_low == byte_high) { - uint8_t mask = (1 << (high_bit + 1)) - 1; - return (mask & data[byte_high]) >> (high_bit - width + 1); - } - - // Extract from the high order byte. T val = 0; - uint8_t mask = 0xff >> (7 - high_bit); - val = (mask & data[byte_high++]); - int remainder = width - (1 + high_bit); - while (remainder >= 8) { - val = (val << 8) | data[byte_high++]; - remainder -= 8; - } + if (width == 0) return val; - // Extract any remaining bits from the high end of the last byte. - if (remainder > 0) { - val <<= remainder; - int shift = 8 - remainder; - uint8_t mask = 0xff << shift; - val |= (data[byte_high] & mask) >> shift; + int lsb = msb - width + 1; + int byte_low = data_size - (lsb >> 3) - 1; + + + int blsb = lsb & 0x7; + int bits_left = width; + int bits_extracted = 0; + while (bits_left > 0) { + int bwidth = std::min(8 - blsb, bits_left); + uint8_t bmask = ((1 << bwidth) - 1) << blsb; + val |= ((data[byte_low] & bmask) >> blsb) << bits_extracted; + blsb = 0; + bits_left -= bwidth; + bits_extracted += bwidth; + byte_low--; } return val; } @@ -99,6 +89,38 @@ )foo"; +constexpr char kTemplatedInsertBits[] = R"foo( +namespace internal { + +// This function inserts a bitfield width bits wide into the byte vector, +// starting at bit_index bits from the end of data. The lsb has index 0. The +// byte vector is data_size bytes long. There is no error checking that T +// can hold width bits. +template <typename T> +static inline void InsertBits(uint8_t *data, int data_size, int msb, int width, + T val) { + if (width == 0) return; + + int lsb = msb - width + 1; + int byte_low = data_size - (lsb >> 3) - 1; + int blsb = lsb & 0x7; + while (width > 0) { + int bwidth = std::min(8 - blsb, width); + T bmask = (1 << bwidth) - 1; + uint8_t bval = (val & bmask); + bmask <<= blsb; + bval <<= blsb; + val >>= bwidth; + data[byte_low] = (data[byte_low] & ~bmask) | (bval & bmask); + blsb = 0; + width -= bwidth; + byte_low--; + } +} + +} // namespace internal +)foo"; + BinFormatVisitor::BinFormatVisitor() { constraint_string_to_type_.emplace("==", ConstraintType::kEq); constraint_string_to_type_.emplace("!=", ConstraintType::kNe); @@ -237,6 +259,7 @@ "#include <cstdint>\n" "\n" "#include \"absl/functional/any_invocable.h\"\n" + "#include \"absl/log/log.h\"\n" "\n\n"); for (auto const &include_file : encoding_info->include_files()) { absl::StrAppend(&h_string, "#include ", include_file, "\n"); @@ -327,14 +350,15 @@ "#include <iostream>\n" "#include <cstdint>\n\n" "#include \"absl/base/no_destructor.h\"\n" - "#include \"absl/container/flat_hash_map.h\"\n\n" + "#include \"absl/container/flat_hash_map.h\"\n" + "#include \"absl/log/log.h\"\n\n" "#include \"", enum_h_name, "\"\n"); absl::StrAppend(&cc_string, "#include \"", dot_h_name, "\"\n\n" "#include <cstdint>\n\n" "#include \"absl/base/no_destructor.h\"\n" - "#include \"absl/container/flat_hash_map.h\"\n\n" + "#include \"absl/container/flat_hash_map.h\"\n" "#include \"", enum_h_name, "\"\n"); for (auto &name_space : encoding_info->decoder()->namespaces()) { @@ -342,7 +366,8 @@ absl::StrAppend(&cc_string, name_space_str); absl::StrAppend(&h_string, name_space_str); } - absl::StrAppend(&h_string, "\n"); + // Write out the templated extractor function used by the other methods. + absl::StrAppend(&h_string, "\n", kTemplatedInsertBits, "\n"); absl::StrAppend(&cc_string, "\n"); return std::tie(h_string, cc_string); }
diff --git a/mpact/sim/decoder/bundle.h b/mpact/sim/decoder/bundle.h index ae58095..315cd42 100644 --- a/mpact/sim/decoder/bundle.h +++ b/mpact/sim/decoder/bundle.h
@@ -66,6 +66,10 @@ InstructionSet *instruction_set() const { return instruction_set_; } bool is_marked() const { return is_marked_; } void set_is_marked(bool value) { is_marked_ = value; } + std::string semfunc_code_string() const { return semfunc_code_string_; } + void set_semfunc_code_string(std::string code_string) { + semfunc_code_string_ = std::move(code_string); + } private: BundleDeclCtx *ctx_; @@ -76,6 +80,8 @@ std::string name_; // Name in PascalCase. std::string pascal_name_; + // Semantic function code string. + std::string semfunc_code_string_; // The slots contained within this bundle, including instance indices. std::vector<std::pair<std::string, const std::vector<int>>> slot_uses_; // The bundles contained within this bundle.
diff --git a/mpact/sim/decoder/encoding_group.cc b/mpact/sim/decoder/encoding_group.cc index 30d203f..f722d84 100644 --- a/mpact/sim/decoder/encoding_group.cc +++ b/mpact/sim/decoder/encoding_group.cc
@@ -159,7 +159,7 @@ encoding_group->AddEncoding(enc); } // Avoid useless groups and infinite recursion by deleting any groups that - // are empty and where the all the encodings ended up in the same subgroup. + // are empty and where all the encodings ended up in the same subgroup. if (encoding_group->encoding_vec().empty()) { delete encoding_group; continue; @@ -439,17 +439,20 @@ // For each instruction in the encoding vec, generate the if statement // to see if the instruction is matched. absl::flat_hash_set<std::string> extracted; - // For equal constraints, some can be ignored because those bits are wholly - // considered by the parent groups or the discriminator. + int count = 0; + // For equal constraints, some can be ignored because those bits are + // wholly considered by the parent groups or the discriminator. for (auto *encoding : encoding_vec_) { for (auto *constraint : encoding->equal_constraints()) { ProcessConstraint(extracted, constraint, definitions_ptr); } - EmitEncodingIfStatement(/*indent*/ 0, encoding, opcode_enum, extracted, - definitions_ptr); + count += EmitEncodingIfStatement(/*indent*/ 0, encoding, opcode_enum, + extracted, definitions_ptr); } - absl::StrAppend(definitions_ptr, " return std::make_pair(", opcode_enum, - "::kNone, FormatEnum::kNone);\n"); + if (count > 0) { + absl::StrAppend(definitions_ptr, " return std::make_pair(", opcode_enum, + "::kNone, FormatEnum::kNone);\n"); + } } void EncodingGroup::ProcessConstraint(
diff --git a/mpact/sim/decoder/format.cc b/mpact/sim/decoder/format.cc index c602995..0ea3491 100644 --- a/mpact/sim/decoder/format.cc +++ b/mpact/sim/decoder/format.cc
@@ -143,19 +143,27 @@ } // Return the string containing the integer type used to contain the current -// format. If it is greater than 64 bits, will use a byte array (int8_t *). +// format. If it is greater than 128 bits, will use a byte array (int8_t *). +// If it is 65 to 128 bits, will use absl::[u]int128. +std::string Format::GetUIntType(int bitwidth) const { + if (bitwidth > 128) return "uint8_t *"; + if (bitwidth > 64) return "absl::uint128"; + return absl::StrCat("uint", GetIntTypeBitWidth(bitwidth), "_t"); +} + std::string Format::GetIntType(int bitwidth) const { - if (bitwidth > 64) return "int8_t *"; + if (bitwidth > 128) return "int8_t *"; + if (bitwidth > 64) return "absl::int128"; return absl::StrCat("int", GetIntTypeBitWidth(bitwidth), "_t"); } -// Return the int type byte width (1, 2, 4, 8) or (-1 if it's bigger), of the -// integer type that would fit this format. +// Return the int type byte width (1, 2, 4, 8, 16) or (-1 if it's bigger), of +// the integer type that would fit this format. int Format::GetIntTypeBitWidth(int bitwidth) const { auto shift = absl::bit_width(static_cast<unsigned>(bitwidth)) - 1; if (absl::popcount(static_cast<unsigned>(bitwidth)) > 1) shift++; shift = std::max(shift, 3); - if (shift > 6) return -1; + if (shift > 7) return -1; return 1 << shift; } @@ -208,7 +216,8 @@ } field_or_format->set_high(declared_width_ - computed_width_ - 1); computed_width_ += format->declared_width() * field_or_format->size(); - extractors_.insert(std::make_pair(format->name(), field_or_format)); + extractors_.insert( + std::make_pair(field_or_format->format_alias(), field_or_format)); } if (computed_width_ != declared_width_) { return absl::InternalError(absl::StrCat( @@ -350,17 +359,17 @@ std::string h_output; int return_width = GetIntTypeBitWidth(field->width); std::string result_type_name = - absl::StrCat(field->is_signed ? "" : "u", GetIntType(return_width)); - std::string argument_type_name = - absl::StrCat("u", GetIntType(computed_width_)); - std::string signature = - absl::StrCat(result_type_name, " Extract", ToPascalCase(field->name), "(", - argument_type_name, " value)"); + field->is_signed ? GetIntType(return_width) : GetUIntType(return_width); + std::string argument_type_name = GetUIntType(computed_width_); + std::string signature = absl::StrCat( + result_type_name, " Extract", ToPascalCase(field->name), "(", + computed_width_ > 128 ? "const " : "", argument_type_name, " value)"); absl::StrAppend(&h_output, "inline ", signature, " {\n"); // Generate extraction function. For fields it's a simple shift and mask if - // the source format width <= 64 bits. + // the source format width <= 64 bits. Slightly more involved with format + // <= 128 bits. For larger formats use the templated extract helper function. std::string expr; if (declared_width_ <= 64) { uint64_t mask = (1ULL << field->width) - 1; @@ -369,21 +378,50 @@ } else { expr = absl::StrCat(" (value >> ", field->low, ") & 0x", absl::Hex(mask)); } + } else if (declared_width_ <= 128) { + absl::StrAppend(&h_output, + " absl::uint128 mask = 1;\n" + " mask = (mask << ", + field->width, ") - 1;\n"); + if (field->low == 0) { + expr = absl::StrCat("value & mask"); + } else { + expr = absl::StrCat(" (value >> ", field->low, ") & mask"); + } } else { - // For format width > 64 bits, use the templated extract helper function. + // For format width > 128 bits, use the templated extract helper function. int byte_size = (declared_width_ + 7) / 8; expr = absl::StrCat("internal::ExtractBits<", result_type_name, ">(value, ", byte_size, ", ", field->high, ", ", field->width, ")"); } // Add sign-extension if the field is signed. + std::string sign_extension; if (field->is_signed) { int shift = return_width - field->width; - absl::StrAppend(&h_output, " ", result_type_name, " result = (", expr, - ") << ", shift, ";\n result = result >> ", shift, ";\n", - " return result;\n}\n\n"); + sign_extension = + absl::StrCat(" ", result_type_name, " result = (", expr, ") << ", + shift, ";\n result = result >> ", shift, ";\n"); + expr = "result"; + } + if (declared_width_ <= 64) { + absl::StrAppend(&h_output, sign_extension, " return ", expr, ";\n}\n\n"); + } else if ((declared_width_ <= 128) && (return_width <= 64)) { + absl::StrAppend(&h_output, sign_extension, " return absl::Uint128Low64(", + expr, ");\n}\n\n"); } else { - absl::StrAppend(&h_output, " return ", expr, ";\n}\n\n"); + absl::StrAppend(&h_output, sign_extension, " return ", expr, ";\n}\n\n"); + } + // If the parent format size is not a power of two, also create an extractor + // that takes a uint8_t * parameter. + if ((declared_width_ <= 128) && + (absl::popcount(static_cast<unsigned>(declared_width_)) > 1)) { + absl::StrAppend(&h_output, "inline ", result_type_name, " Extract", + ToPascalCase(field->name), "(const uint8_t *value) {\n"); + int byte_size = (declared_width_ + 7) / 8; + absl::StrAppend(&h_output, " return internal::ExtractBits<", + result_type_name, ">(value, ", byte_size, ", ", field->high, + ", ", field->width, ");\n}\n\n"); } return h_output; } @@ -393,9 +431,16 @@ // it into the right place in the instruction word. std::string Format::GenerateFieldInserter(const Field *field) const { std::string h_output; - absl::StrAppend(&h_output, "static inline uint64_t Insert", - ToPascalCase(field->name), - "(uint64_t value, uint64_t inst_word) {\n"); + std::string field_type_name; + std::string inst_word_type_name = GetUIntType(computed_width_); + if (declared_width_ <= 128) { + field_type_name = inst_word_type_name; + } else { + field_type_name = GetUIntType(field->width); + } + absl::StrAppend(&h_output, "static inline ", inst_word_type_name, " Insert", + ToPascalCase(field->name), "(", field_type_name, " value, ", + inst_word_type_name, " inst_word) {\n"); if (declared_width_ <= 64) { uint64_t mask = ((1ULL << field->width) - 1) << field->low; std::string shift; @@ -404,15 +449,36 @@ } absl::StrAppend(&h_output, " inst_word = (inst_word & ~0x", absl::Hex(mask), "ULL)", " | ((value", shift, ") & 0x", - absl::Hex(mask), "ULL);\n"); + absl::Hex(mask), + "ULL);\n" + " return inst_word;\n" + "}\n"); + } else if (declared_width_ <= 128) { + absl::StrAppend(&h_output, + " absl::uint128 mask = 1;\n" + " mask = (mask << ", + field->width, ") - 1;\n"); + if (field->low != 0) { + absl::StrAppend(&h_output, " mask = mask << ", field->low, ";\n"); + } + absl::StrAppend(&h_output, + " inst_word = (inst_word & ~mask) | (value & mask);\n" + " return inst_word;\n" + "}\n"); + } else if (field->width <= 128) { + int byte_size = (declared_width_ + 7) / 8; + absl::StrAppend(&h_output, " internal::InsertBits(inst_word, ", byte_size, + ", ", field->high, ", ", field->width, + ", value);\n" + " return inst_word;\n" + "}\n"); } else { absl::StrAppend( &h_output, - " #error Support for formats > 64 bits not implemented - yet."); + " LOG(FATAL) << \" Support for fields > 128 bits not implemented - " + "yet.\";\n" + " return 0;\n}\n"); } - absl::StrAppend(&h_output, - " return inst_word;\n" - "}\n"); return h_output; } @@ -421,46 +487,78 @@ // insert its components into the right places in the instruction word. std::string Format::GenerateOverlayInserter(Overlay *overlay) const { std::string h_output; - absl::StrAppend(&h_output, "static inline uint64_t Insert", - ToPascalCase(overlay->name()), - "(uint64_t value, uint64_t inst_word) {\n"); + std::string result_type_name = GetUIntType(computed_width_); + std::string overlay_type_name; + if (computed_width_ <= 128) { + overlay_type_name = result_type_name; + } else { + overlay_type_name = GetUIntType(overlay->declared_width()); + } + absl::StrAppend(&h_output, "static inline ", result_type_name, " Insert", + ToPascalCase(overlay->name()), "(", overlay_type_name, + " value, ", result_type_name, " inst_word) {\n"); // Mark error if either the overlay or the format is > 64 bits. - if (overlay->declared_width() > 64) { - absl::StrAppend(&h_output, - " #error Support for overlays > 64 bits not implemented - " - "yet.\n}\n"); + if (overlay->declared_width() > 128) { + absl::StrAppend( + &h_output, + " LOG(FATAL) << \" Support for overlays > 128 bits not implemented - " + "yet.\";\n" + " return 0;\n}\n"); return h_output; } - if (computed_width_ > 64) { - absl::StrAppend(&h_output, - " #error Support for formats > 64 bits not implemented - " - "yet.\n}\n"); - return h_output; + bool use_mask_variable = false; + int remaining = overlay->declared_width(); + int byte_size = (declared_width_ + 7) / 8; + if (declared_width_ <= 128) { + absl::StrAppend(&h_output, " ", result_type_name, " tmp;\n"); + // Track the leftmost bit in the overlay. + if (declared_width_ > 64) { + absl::StrAppend(&h_output, " absl::uint128 mask;\n"); + use_mask_variable = true; + } + } else { + absl::StrAppend(&h_output, " ", overlay_type_name, " tmp;\n"); + if (overlay->declared_width() > 64) { + absl::StrAppend(&h_output, " absl::uint128 mask;\n"); + use_mask_variable = true; + } } - absl::StrAppend(&h_output, " uint64_t tmp;\n"); - // Track the leftmost bit in the overlay. - int left = overlay->declared_width(); for (auto &bits_or_field : overlay->component_vec()) { int width = bits_or_field->width(); // Ignore the bit fields in the overlay. if (bits_or_field->high() < 0) { - left -= width; + remaining -= width; continue; } - uint64_t mask = ((1ULL << width) - 1); std::string shift; - if (left - width > 0) { - shift = absl::StrCat(" >> ", left - width); + if (remaining - width > 0) { + shift = absl::StrCat(" >> ", remaining - width); } - // Extract the bits from the overlay value for the current component. - absl::StrAppend(&h_output, " tmp = (value ", shift, ") & 0x", - absl::Hex(mask), "ULL;\n"); + if (use_mask_variable) { + absl::StrAppend(&h_output, + " mask = 1;\n" + " mask = (mask << ", + width, ") - 1;\n"); + absl::StrAppend(&h_output, " tmp = (value ", shift, ") & mask;\n"); + } else { + uint64_t mask = ((1ULL << width) - 1); + // Extract the bits from the overlay value for the current component. + absl::StrAppend(&h_output, " tmp = (value ", shift, ") & 0x", + absl::Hex(mask), "ULL;\n"); + } shift.clear(); if (bits_or_field->low() != 0) { shift = absl::StrCat(" << ", bits_or_field->low()); } - absl::StrAppend(&h_output, " inst_word |= (tmp ", shift, ");\n"); - left -= width; + if (declared_width_ <= 128) { + absl::StrAppend(&h_output, " inst_word |= (tmp ", shift, ");\n"); + } else { + absl::StrAppend(&h_output, " internal::InsertBits(inst_word, ", + byte_size, ", ", bits_or_field->high(), ", ", width, + ", tmp);\n"); + } + + remaining -= width; } absl::StrAppend(&h_output, " return inst_word;\n}\n"); return h_output; @@ -472,27 +570,116 @@ std::string Format::GenerateFormatInserter(std::string_view format_alias, const Format *format, int high, int size) const { + if (size > 1) { + return GenerateReplicatedFormatInserter(format_alias, format, high, size); + } + return GenerateSingleFormatInserter(format_alias, format, high); +} + +std::string Format::GenerateReplicatedFormatInserter( + std::string_view format_alias, const Format *format, int high, + int size) const { std::string h_output; - std::string target_type_name = absl::StrCat("u", GetIntType(computed_width_)); - absl::StrAppend(&h_output, "static inline uint64_tInsert", - ToPascalCase(format_alias), - "(uint64_t value, uint64_t inst_word) {\n"); - if (declared_width_ > 64) { - absl::StrAppend(&h_output, - " #error Support for formats > 64 bits not implemented - " - "yet.\n}\n"); + std::string target_type_name = GetUIntType(declared_width_); + std::string format_type_name; + + if (declared_width_ <= 128) { + format_type_name = target_type_name; + } else { + format_type_name = GetUIntType(format->declared_width()); + } + absl::StrAppend(&h_output, "static inline ", target_type_name, " Insert", + ToPascalCase(format_alias), "(", "int index, ", + format_type_name, " value, ", target_type_name, + " inst_word) {\n"); + if (format->declared_width() > 128) { + absl::StrAppend( + &h_output, + " LOG(FATAL) << \" Support for formats > 128 bits not implemented - " + "yet.\";\n" + " return 0;\n}\n"); return h_output; } int width = format->declared_width(); int low = high - width + 1; - uint64_t mask = (1ULL << width) << low; + if (declared_width_ <= 64) { + uint64_t mask = (1ULL << width) - 1; + absl::StrAppend(&h_output, " int low = ", low, " - (index * ", width, + ");\n" + " return (inst_word & (~0x", + absl::Hex(mask), "ULL << low))", " | ((value << low) & (0x", + absl::Hex(mask), "ULL << low));\n}\n"); + } else if (declared_width_ <= 128) { + absl::StrAppend( + &h_output, " int low = ", low, " - (index * ", width, + ");\n" + " absl::uint128 mask = 1;\n" + " mask = (mask << ", + width, + ") - 1;\n" + " mask <<= low;\n" + " return (inst_word & ~mask) | (value << low) & mask;\n}\n"); + } else { + int byte_size = (declared_width_ + 7) / 8; + absl::StrAppend(&h_output, " internal::InsertBits(inst_word, ", byte_size, + ", ", high, " - (index * ", width, "), ", width, + ", value);\n" + " return inst_word;\n}\n"); + } + return h_output; +} + +std::string Format::GenerateSingleFormatInserter(std::string_view format_alias, + const Format *format, + int high) const { + std::string h_output; + std::string target_type_name = GetUIntType(declared_width_); + std::string format_type_name; + if (declared_width_ <= 128) { + format_type_name = target_type_name; + } else { + format_type_name = GetUIntType(format->declared_width()); + } + + absl::StrAppend(&h_output, "static inline ", target_type_name, " Insert", + ToPascalCase(format_alias), "(", format_type_name, " value, ", + target_type_name, " inst_word) {\n"); + if (format->declared_width() > 128) { + absl::StrAppend( + &h_output, + " LOG(FATAL) << \" Support for formats > 128 bits not implemented - " + "yet.\";\n" + " return 0;\n}\n"); + return h_output; + } + int width = format->declared_width(); + int low = high - width + 1; std::string shift; if (low != 0) { shift = absl::StrCat(" << ", low); } - absl::StrAppend(&h_output, " return (inst_word & (~0x", absl::Hex(mask), - "ULL))", " | ((value ", shift, ") & 0x", absl::Hex(mask), - "ULL);\n}\n"); + if (declared_width_ <= 64) { + uint64_t mask = ((1ULL << width) - 1) << low; + absl::StrAppend(&h_output, " return (inst_word & (~0x", absl::Hex(mask), + "ULL))", " | ((value ", shift, ") & 0x", absl::Hex(mask), + "ULL);\n}\n"); + } else if (declared_width_ <= 128) { + absl::StrAppend(&h_output, + " absl::uint128 mask = 1;\n" + " mask = (mask << ", + width, ") - 1;\n"); + if (low > 0) { + absl::StrAppend(&h_output, " mask = mask << ", low, ";\n"); + } + absl::StrAppend(&h_output, " return (inst_word & ~mask) | (value ", shift, + ") & mask;\n}\n"); + } else { + int byte_size = (declared_width_ + 7) / 8; + absl::StrAppend(&h_output, " internal::InsertBits(inst_word, ", byte_size, + ", ", high, ", ", width, + ", value);\n" + " return inst_word;\n}\n"); + } return h_output; } @@ -504,23 +691,23 @@ std::string h_output; // For each format generate am extractor. int width = format->declared_width(); // An extraction can only be for 64 bits or less. - if (width > 64) { + if (width > 128) { encoding_info_->error_listener()->semanticError( nullptr, absl::StrCat("Cannot generate a format extractor for format '", - format->name(), "': format is wider than 64 bits")); + format->name(), "': format is wider than 128 bits")); return ""; } - std::string return_type = absl::StrCat("u", GetIntType(width)); + std::string return_type = GetUIntType(width); std::string signature = absl::StrCat("inline ", return_type, " Extract", ToPascalCase(format_alias), "("); - if (declared_width_ <= 64) { - // If the source format is <= 64 bits, then use an int type. - std::string arg_type = absl::StrCat("u", GetIntType(declared_width_)); + if (declared_width_ <= 128) { + // If the source format is <= 128 bits, then use an int type. + std::string arg_type = GetUIntType(declared_width_); absl::StrAppend(&signature, arg_type, " value"); } else { // Otherwise use a pointer to uint8_t type. - absl::StrAppend(&signature, "uint8_t *value"); + absl::StrAppend(&signature, "const uint8_t *value"); } // If the format has multiple instances add an index parameter. if (size > 1) { @@ -530,18 +717,33 @@ // Now start the body. absl::StrAppend(&h_output, signature, " {\n"); std::string expr; - if (declared_width_ <= 64) { - // If the source format can be stored in a uint64_t or smaller. - uint64_t mask = (1ULL << width) - 1; + if (declared_width_ <= 128) { + // If the source format can be stored in a uint128 or smaller. int low = high - width + 1; - int shift_amount = GetIntTypeBitWidth(declared_width_) - low; std::string shift; if (size > 1) { - shift = absl::StrCat("(", shift_amount, " - (index - 1) * ", width, ")"); + shift = absl::StrCat("(", low, " + (index - 1) * ", width, ")"); } else { - shift = absl::StrCat(shift_amount); + shift = absl::StrCat(low); } - expr = absl::StrCat("(value >> ", shift, ") & 0x", absl::Hex(mask), ";\n"); + if (declared_width_ <= 64) { + uint64_t mask = (1ULL << width) - 1; + expr = + absl::StrCat("(value >> ", shift, ") & 0x", absl::Hex(mask), ";\n"); + absl::StrAppend(&h_output, " return ", expr, ";\n}\n\n"); + } else { + absl::StrAppend(&h_output, + " absl::uint128 mask = 1;\n" + " mask = (mask << ", + width, ") - 1;\n"); + expr = absl::StrCat("(value >> ", shift, ") & mask"); + if (width <= 64) { + absl::StrAppend(&h_output, " return absl::Uint128Low64(", expr, + ");\n}\n\n"); + } else { + absl::StrAppend(&h_output, " return ", expr, ";\n}\n\n"); + } + } } else { // If the source format is stored in uint8_t[]. int byte_size = (declared_width_ + 7) / 8; @@ -551,8 +753,23 @@ absl::StrAppend(&expr, " - (index * ", width, ")"); } absl::StrAppend(&expr, ", ", width, ")"); + absl::StrAppend(&h_output, " return ", expr, ";\n}\n\n"); } - absl::StrAppend(&h_output, " return ", expr, ";\n}\n\n"); + // If the parent format size is not a power of two, also create an extractor + // that takes a uint8_t * parameter. + if ((declared_width_ <= 128) && + (absl::popcount(static_cast<unsigned>(declared_width_)) > 1)) { + absl::StrAppend(&h_output, "inline ", return_type, " Extract", + ToPascalCase(format_alias), "(const uint8_t *value) {\n"); + int byte_size = (declared_width_ + 7) / 8; + expr = absl::StrCat("internal::ExtractBits<", return_type, ">(value, ", + byte_size, ", ", high); + if (size > 1) { + absl::StrAppend(&expr, " - (index * ", width, ")"); + } + absl::StrAppend(&expr, ", ", width, ")"); + absl::StrAppend(&h_output, " return ", expr, ";\n}\n\n"); + } return h_output; } @@ -560,9 +777,11 @@ std::string Format::GenerateOverlayExtractor(Overlay *overlay) const { std::string h_output; - std::string return_type = absl::StrCat(overlay->is_signed() ? "" : "u", - GetIntType(overlay->declared_width())); - std::string arg_type = absl::StrCat("u", GetIntType(declared_width_)); + std::string return_type = overlay->is_signed() + ? GetIntType(overlay->declared_width()) + : GetUIntType(overlay->declared_width()); + + std::string arg_type = GetUIntType(declared_width_); std::string signature = absl::StrCat("inline ", return_type, " Extract", ToPascalCase(overlay->name()), "(", arg_type, " value)"); @@ -587,9 +806,15 @@ " result = result >> ", shift, ";\n"); } - absl::StrAppend(&h_output, - " return result;\n" - "}\n\n"); + if ((declared_width_ > 64) && (overlay->declared_width() <= 64)) { + absl::StrAppend(&h_output, + " return UInt128Low64(result();\n" + "}\n\n"); + } else { + absl::StrAppend(&h_output, + " return result;\n" + "}\n\n"); + } return h_output; } @@ -631,11 +856,16 @@ } class_output = absl::StrCat("class ", ToPascalCase(name()), " {\n public:\n", - " ", ToPascalCase(name()), "() = default;\n"); + " ", ToPascalCase(name()), "() = default;\n\n"); // Use a separate namespace for each format. h_output = absl::StrCat("namespace ", ToSnakeCase(name()), " {\n\n"); + std::string get_size = absl::StrCat("constexpr int k", ToPascalCase(name()), + "Size = ", declared_width(), ";\n\n"); + absl::StrAppend(&h_output, get_size); + absl::StrAppend(&class_output, "static ", get_size); + // First fields and formats. for (auto &[unused, field_or_format_ptr] : extractors_) { if (field_or_format_ptr->is_field()) {
diff --git a/mpact/sim/decoder/format.h b/mpact/sim/decoder/format.h index 1ebfbbe..c986510 100644 --- a/mpact/sim/decoder/format.h +++ b/mpact/sim/decoder/format.h
@@ -15,6 +15,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include <map> #include <string> #include <tuple> #include <vector> @@ -171,9 +172,16 @@ std::string GenerateFormatInserter(std::string_view format_alias, const Format *format, int high, int size) const; + std::string GenerateReplicatedFormatInserter(std::string_view format_alias, + const Format *format, int high, + int size) const; + std::string GenerateSingleFormatInserter(std::string_view format_alias, + const Format *format, + int high) const; std::string GenerateOverlayInserter(Overlay *overlay) const; // Return string representation of the int type that contains bitwidth bits. std::string GetIntType(int bitwidth) const; + std::string GetUIntType(int bitwidth) const; int GetIntTypeBitWidth(int bitwidth) const; std::string name_;
diff --git a/mpact/sim/decoder/instruction_set.cc b/mpact/sim/decoder/instruction_set.cc index 93c61d8..a6d91cf 100644 --- a/mpact/sim/decoder/instruction_set.cc +++ b/mpact/sim/decoder/instruction_set.cc
@@ -343,6 +343,9 @@ Indent(absl::StrCat(" ", pascal_name(), "InstructionSet(")), factory_class_name, " *factory);\n" + " virtual ~", + pascal_name(), + "InstructionSet();\n" " Instruction *Decode(uint64_t address, ", encoding_type, " *encoding);\n" @@ -387,6 +390,10 @@ ToPascalCase(slot_name), "Slot(arch_state_);\n"); } absl::StrAppend(&output, "}\n"); + // Destructor. + absl::StrAppend(&output, class_name, "::~", class_name, "() {\n"); + absl::StrAppend(&output, " // empty for now.\n"); + absl::StrAppend(&output, "}\n"); // Generate the top level decode function body. absl::StrAppend(&output, "Instruction *", class_name, "::Decode(uint64_t address, ", encoding_type, @@ -887,7 +894,9 @@ position, ") {\n" " return absl::InvalidArgumentError(\n" - " absl::StrCat(\"Invalid number of operands (\", " + " absl::StrCat(\"", + opcode->pascal_name(), + ": Invalid number of operands (\", " "num_args, \") - expected ", position, "\"));\n"
diff --git a/mpact/sim/decoder/instruction_set_visitor.cc b/mpact/sim/decoder/instruction_set_visitor.cc index dae84eb..406c6dd 100644 --- a/mpact/sim/decoder/instruction_set_visitor.cc +++ b/mpact/sim/decoder/instruction_set_visitor.cc
@@ -191,7 +191,7 @@ instruction_set->namespaces()); dec_dot_h_file.close(); // Decoder .cc file. - dec_dot_cc_file << GenerateCcFileProlog(dec_dot_h_name, + dec_dot_cc_file << GenerateCcFileProlog(dec_dot_h_name, /*use_includes=*/true, instruction_set->namespaces()); dec_dot_cc_file << instruction_set->GenerateClassDefinitions( dec_dot_h_name, encoding_type_name); @@ -201,7 +201,7 @@ // Enum files. enum_h_file << GenerateSimpleHdrProlog(ToHeaderGuard(enum_h_name), instruction_set->namespaces()); - enum_cc_file << GenerateCcFileProlog(enum_h_name, + enum_cc_file << GenerateCcFileProlog(enum_h_name, /*use_includes=*/false, instruction_set->namespaces()); auto [h_output, cc_output] = instruction_set->GenerateEnums(enum_h_name); enum_h_file << h_output; @@ -575,10 +575,70 @@ Bundle *bundle = new Bundle(ctx->bundle_name->getText(), instruction_set, ctx); instruction_set->AddBundle(bundle); - context_file_map_[ctx->bundle_list()] = context_file_map_.at(ctx); - VisitBundleList(ctx->bundle_list(), bundle); - context_file_map_[ctx->slot_list()] = context_file_map_.at(ctx); - VisitSlotList(ctx->slot_list(), bundle); + int num_slot_lists = 0; + int num_bundle_lists = 0; + int num_include_file_lists = 0; + int num_semfunc_specs = 0; + for (auto *part : ctx->bundle_parts()) { + if (part->slot_list() != nullptr) { + if (num_slot_lists > 0) { + error_listener()->semanticError(file_names_[context_file_map_.at(ctx)], + part->start, + "Multiple slot lists in bundle"); + return; + } + context_file_map_[part->slot_list()] = context_file_map_.at(ctx); + VisitSlotList(part->slot_list(), bundle); + num_slot_lists++; + continue; + } + if (part->bundle_list() != nullptr) { + if (num_bundle_lists > 0) { + error_listener()->semanticError(file_names_[context_file_map_.at(ctx)], + part->start, + "Multiple bundle lists in bundle"); + return; + } + context_file_map_[part->bundle_list()] = context_file_map_.at(ctx); + VisitBundleList(part->bundle_list(), bundle); + num_bundle_lists++; + continue; + } + if (part->include_file_list() != nullptr) { + if (num_include_file_lists > 0) { + error_listener()->semanticError( + file_names_[context_file_map_.at(ctx)], part->start, + "Multiple include file lists in bundle"); + return; + } + for (auto *include_file : part->include_file_list()->include_file()) { + // Insert the string - the call will always succeed, but the insertion + // does not happen if it already exists. + include_files_.insert(include_file->STRING_LITERAL()->getText()); + } + num_include_file_lists++; + continue; + } + if (part->semfunc_spec() != nullptr) { + if (num_semfunc_specs > 0) { + error_listener()->semanticError(file_names_[context_file_map_.at(ctx)], + part->start, + "Multiple semfunc specs in bundle"); + return; + } + std::string string_literal = + part->semfunc_spec()->STRING_LITERAL(0)->getText(); + // Strip double quotes. + std::string code_string = + string_literal.substr(1, string_literal.length() - 2); + bundle->set_semfunc_code_string(code_string); + num_semfunc_specs++; + continue; + } + error_listener()->semanticError(file_names_[context_file_map_.at(ctx)], + part->start, "Unhandled bundle part type"); + return; + } } void InstructionSetVisitor::VisitSlotDeclaration( @@ -2235,7 +2295,7 @@ " virtual ResourceOperandInterface * " "GetComplexResourceOperand", "(SlotEnum slot, int entry, OpcodeEnum opcode, ComplexResourceEnum " - "resource_op, int begin, int end) { return {}; }\n"); + "resource_op, int begin, int end) { return nullptr; }\n"); absl::StrAppend( &output, " virtual std::vector<ResourceOperandInterface *> " @@ -2319,6 +2379,7 @@ "#include <string>\n" "#include <vector>\n" "\n" + "#include \"absl/container/flat_hash_map.h\"\n" "#include \"absl/status/status.h\"\n" "#include \"absl/status/statusor.h\"\n" "#include \"absl/strings/string_view.h\"\n" @@ -2370,7 +2431,7 @@ } std::string InstructionSetVisitor::GenerateCcFileProlog( - absl::string_view hdr_file_name, + absl::string_view hdr_file_name, bool use_includes, const std::vector<std::string> &namespaces) { std::string output; // Include files. @@ -2378,9 +2439,10 @@ absl::StrAppend(&output, "\n#include <array>\n\n" "#include \"absl/strings/str_format.h\"\n\n"); - - for (auto &include_file : include_files_) { - absl::StrAppend(&output, "#include ", include_file, "\n"); + if (use_includes) { + for (auto &include_file : include_files_) { + absl::StrAppend(&output, "#include ", include_file, "\n"); + } } absl::StrAppend(&output, "\n"); // Namespaces.
diff --git a/mpact/sim/decoder/instruction_set_visitor.h b/mpact/sim/decoder/instruction_set_visitor.h index e0f19b7..a07edc5 100644 --- a/mpact/sim/decoder/instruction_set_visitor.h +++ b/mpact/sim/decoder/instruction_set_visitor.h
@@ -187,6 +187,7 @@ std::string GenerateHdrFileEpilog(absl::string_view guard_name, const std::vector<std::string> &namespaces); std::string GenerateCcFileProlog(absl::string_view hdr_file_name, + bool use_includes, const std::vector<std::string> &namespaces); std::string GenerateNamespaceEpilog( const std::vector<std::string> &namespaces);
diff --git a/mpact/sim/decoder/mpact_sim_isa.bzl b/mpact/sim/decoder/mpact_sim_isa.bzl index 5a86d18..db01738 100644 --- a/mpact/sim/decoder/mpact_sim_isa.bzl +++ b/mpact/sim/decoder/mpact_sim_isa.bzl
@@ -198,6 +198,8 @@ lib_deps.append("@com_google_absl//absl/container:flat_hash_map") if "@com_google_absl//absl/functional:any_invocable" not in deps: lib_deps.append("@com_google_absl//absl/functional:any_invocable") + if "@com_google_absl//absl/log" not in deps: + lib_deps.append("@com_google_absl//absl/log") if "@com_google_absl//absl/strings:str_format" not in deps: lib_deps.append("@com_google_absl//absl/strings:str_format") if "@com_google_mpact-sim//mpact/sim/generic:arch_state" not in deps:
diff --git a/mpact/sim/decoder/slot.cc b/mpact/sim/decoder/slot.cc index 63372ff..06b48a8 100644 --- a/mpact/sim/decoder/slot.cc +++ b/mpact/sim/decoder/slot.cc
@@ -26,7 +26,9 @@ #include <variant> #include <vector> +#include "absl/base/no_destructor.h" #include "absl/container/btree_set.h" +#include "absl/container/flat_hash_map.h" #include "absl/container/flat_hash_set.h" #include "absl/status/status.h" #include "absl/status/statusor.h" @@ -46,6 +48,15 @@ namespace machine_description { namespace instruction_set { +absl::NoDestructor<absl::flat_hash_map<std::string, std::string>> + Slot::operand_setter_name_map_; +absl::NoDestructor<absl::flat_hash_map<std::string, std::string>> + Slot::disasm_setter_name_map_; +absl::NoDestructor<absl::flat_hash_map<std::string, std::string>> + Slot::resource_setter_name_map_; +absl::NoDestructor<absl::flat_hash_map<std::string, std::string>> + Slot::attribute_setter_name_map_; + // This function translates the location specification into a set of '->' // references starting with 'inst->' to get to the operand that is implied. static absl::StatusOr<std::string> TranslateLocator( @@ -274,12 +285,12 @@ // instruction. If no such appropriate function exists, create one. std::string Slot::GenerateAttributeSetter(const Instruction *inst) { auto key = CreateAttributeLookupKey(inst); - auto iter = attribute_setter_name_map_.find(key); - if (iter == attribute_setter_name_map_.end()) { - auto index = attribute_setter_name_map_.size(); + auto iter = attribute_setter_name_map_->find(key); + if (iter == attribute_setter_name_map_->end()) { + auto index = attribute_setter_name_map_->size(); std::string func_name = absl::StrCat(pascal_name(), "Slot", "SetAttributes", index); - iter = attribute_setter_name_map_.emplace(key, func_name).first; + iter = attribute_setter_name_map_->emplace(key, func_name).first; absl::StrAppend(&setter_functions_, GenerateAttributeSetterFcn(func_name, inst)); } @@ -477,7 +488,8 @@ encoder, " *encoder_;\n" " std::vector<RE2 *> regex_vec_;\n" - " RE2::Set regex_set_;\n"); + " RE2::Set regex_set_;\n" + " absl::flat_hash_map<int, int> index_to_opcode_map_;\n"); absl::StrAppend(&cc_output, class_name, "::", class_name, "(", instruction_set_->pascal_name(), "EncoderInterfaceBase *encoder) :\n" @@ -503,12 +515,16 @@ for (auto const &[name, inst_ptr] : instruction_map_) { auto [regex, opnd_locators] = GenerateRegEx(inst_ptr, formats); max_args = std::max(max_args, opnd_locators.size()); + std::string opcode_name = + absl::StrCat("OpcodeEnum::k", ToPascalCase(inst_ptr->opcode()->name())); absl::StrAppend(&cc_output, " regex_vec_.push_back(new RE2(", regex, "));\n" " index = regex_set_.Add(", regex, ", &error);\n" - " if (index == -1) return absl::InternalError(error);\n"); + " if (index == -1) return absl::InternalError(error);\n" + " index_to_opcode_map_.insert({index, static_cast<int>(", + opcode_name, ")", "});\n"); } absl::StrAppend(&h_output, " std::string args[", max_args, "];\n" @@ -565,12 +581,13 @@ for (auto index : matches) { std::vector<std::string> values; if (!Extract(text, index, values)) continue; + int opcode_index = index_to_opcode_map_.at(index); )", - " auto result = encode_fcns[index](encoder_, SlotEnum::k", + " auto result = encode_fcns[opcode_index](encoder_, SlotEnum::k", pascal_name(), ", entry, \n" " " - "static_cast<OpcodeEnum>(index), address, values, resolver, " + "static_cast<OpcodeEnum>(opcode_index), address, values, resolver, " "relocations);\n", R"( if (!result.status().ok()) { @@ -719,9 +736,9 @@ absl::StrAppend(&key, ":", CreateOperandLookupKey(inst->opcode())); std::string func_name = absl::StrCat( pascal_name(), "Slot", inst->opcode()->pascal_name(), "SetDisasm"); - auto iter = disasm_setter_name_map_.find(key); - if (iter == disasm_setter_name_map_.end()) { - iter = disasm_setter_name_map_.emplace(key, func_name).first; + auto iter = disasm_setter_name_map_->find(key); + if (iter == disasm_setter_name_map_->end()) { + iter = disasm_setter_name_map_->emplace(key, func_name).first; absl::StrAppend(&setter_functions_, GenerateDisasmSetterFcn(func_name, inst)); } @@ -849,12 +866,12 @@ absl::string_view encoding_type) { std::string key = CreateResourceKey(inst->resource_use_vec()); absl::StrAppend(&key, ":", CreateResourceKey(inst->resource_acquire_vec())); - auto iter = resource_setter_name_map_.find(key); - if (iter == resource_setter_name_map_.end()) { - auto index = resource_setter_name_map_.size(); + auto iter = resource_setter_name_map_->find(key); + if (iter == resource_setter_name_map_->end()) { + auto index = resource_setter_name_map_->size(); std::string func_name = absl::StrCat(pascal_name(), "Slot", "SetResources", index); - iter = resource_setter_name_map_.emplace(key, func_name).first; + iter = resource_setter_name_map_->emplace(key, func_name).first; absl::StrAppend(&setter_functions_, GenerateResourceSetterFcn(func_name, inst, encoding_type)); } @@ -1224,14 +1241,15 @@ absl::StrCat(pascal_name(), "SlotSetOperandsNull"), encoding_type, default_instruction_->opcode())); absl::StrAppend( - &output, " {OperandSetter{", pascal_name(), + &output, " {static_cast<int>(OpcodeEnum::kNone), {OperandSetter{", + pascal_name(), "SlotSetOperandsNull},\n" " ", GenerateDisassemblySetter(default_instruction_), ",\n", " ", GenerateResourceSetter(default_instruction_, encoding_type), ",\n", " ", GenerateAttributeSetter(default_instruction_), ",\n", " SemFuncSetter{", default_instruction_->semfunc_code_string(), "}, ", - default_instruction_->opcode()->instruction_size(), "},\n"); + default_instruction_->opcode()->instruction_size(), "}},\n"); for (auto const &[unused, inst_ptr] : instruction_map_) { auto *instruction = inst_ptr; std::string opcode_name = instruction->opcode()->pascal_name(); @@ -1246,18 +1264,19 @@ inst = inst->child()) { // Construct operand getter lookup key. std::string key = CreateOperandLookupKey(inst->opcode()); - auto iter = operand_setter_name_map_.find(key); + auto iter = operand_setter_name_map_->find(key); // If the key is not found, create a new getter function, otherwise // reuse the existing one. - if (iter == operand_setter_name_map_.end()) { - auto index = operand_setter_name_map_.size(); + if (iter == operand_setter_name_map_->end()) { + auto index = operand_setter_name_map_->size(); std::string setter_name = absl::StrCat(class_name, "SetOperands", index); absl::StrAppend(&setter_functions_, GenerateOperandSetterFcn(setter_name, encoding_type, inst->opcode())); - iter = operand_setter_name_map_.insert(std::make_pair(key, setter_name)) - .first; + iter = + operand_setter_name_map_->insert(std::make_pair(key, setter_name)) + .first; } absl::StrAppend(&operands_str, sep, iter->second); if (inst->semfunc_code_string().empty()) { @@ -1269,12 +1288,13 @@ } sep = ", "; } - absl::StrAppend(&output, " {OperandSetter{", operands_str, "},\n", - " ", GenerateDisassemblySetter(instruction), ",\n", - " ", GenerateResourceSetter(instruction, encoding_type), - ",\n", " ", GenerateAttributeSetter(instruction), ",\n", + absl::StrAppend(&output, " {static_cast<int>(OpcodeEnum::k", opcode_name, + "), {OperandSetter{", operands_str, "},\n", " ", + GenerateDisassemblySetter(instruction), ",\n", " ", + GenerateResourceSetter(instruction, encoding_type), ",\n", + " ", GenerateAttributeSetter(instruction), ",\n", " SemFuncSetter{", code_str, "}, ", - instruction->opcode()->instruction_size(), "},\n"); + instruction->opcode()->instruction_size(), "}},\n"); } return output; } @@ -1293,18 +1313,19 @@ class_name, "(ArchState *arch_state);\n"); // Emit Decode function generated that decodes the slot and creates and // initializes an instruction object, as well as private data members. - absl::StrAppend(&output, " Instruction *Decode(uint64_t address, ", - encoding_type, "* isa_encoding, SlotEnum, int entry);\n", - "\n" - " private:\n" - " ArchState *arch_state_;\n" - " std::array<InstructionInfo, ", - instruction_map_.size() + 1, "> instruction_info_", ";\n", - " static constexpr SlotEnum slot_ = SlotEnum::k", - pascal_name(), - ";\n" - "};\n" - "\n"); + absl::StrAppend( + &output, " Instruction *Decode(uint64_t address, ", encoding_type, + "* isa_encoding, SlotEnum, int entry);\n", + "\n" + " private:\n" + " ArchState *arch_state_;\n" + " absl::flat_hash_map<int, InstructionInfo> instruction_info_;\n", + //" std::array<InstructionInfo, ", + // instruction_map_.size() + 1, "> instruction_info_", ";\n", + " static constexpr SlotEnum slot_ = SlotEnum::k", pascal_name(), + ";\n" + "};\n" + "\n"); return output; }
diff --git a/mpact/sim/decoder/slot.h b/mpact/sim/decoder/slot.h index cc1df51..1f074d0 100644 --- a/mpact/sim/decoder/slot.h +++ b/mpact/sim/decoder/slot.h
@@ -20,6 +20,7 @@ #include <tuple> #include <vector> +#include "absl/base/no_destructor.h" #include "absl/container/btree_map.h" #include "absl/container/flat_hash_map.h" #include "absl/container/flat_hash_set.h" @@ -219,11 +220,16 @@ // Pointer to slot it inherits from. std::vector<BaseSlot> base_slots_; absl::flat_hash_set<const Slot *> predecessor_set_; - // Map from operand getter key to operand getter function name. - absl::flat_hash_map<std::string, std::string> operand_setter_name_map_; - absl::flat_hash_map<std::string, std::string> disasm_setter_name_map_; - absl::flat_hash_map<std::string, std::string> resource_setter_name_map_; - absl::flat_hash_map<std::string, std::string> attribute_setter_name_map_; + // Map from operand getter key to operand getter function name. These are + // static so that they can be shared across different slots. + static absl::NoDestructor<absl::flat_hash_map<std::string, std::string>> + operand_setter_name_map_; + static absl::NoDestructor<absl::flat_hash_map<std::string, std::string>> + disasm_setter_name_map_; + static absl::NoDestructor<absl::flat_hash_map<std::string, std::string>> + resource_setter_name_map_; + static absl::NoDestructor<absl::flat_hash_map<std::string, std::string>> + attribute_setter_name_map_; std::string setter_functions_; // Used to list the unique getters for the operands. absl::flat_hash_set<std::string> pred_operand_getters_;
diff --git a/mpact/sim/generic/BUILD b/mpact/sim/generic/BUILD index 95c31fb..e198eeb 100644 --- a/mpact/sim/generic/BUILD +++ b/mpact/sim/generic/BUILD
@@ -95,7 +95,6 @@ ":core", ":internal", ":program_error", - "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/functional:any_invocable", @@ -271,16 +270,13 @@ "breakpoint_manager.h", ], deps = [ - ":core", ":core_debug_interface", "@com_google_absl//absl/container:btree", - "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/functional:any_invocable", "@com_google_absl//absl/functional:bind_front", "@com_google_absl//absl/log", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", - "@com_google_absl//absl/types:span", ], )
diff --git a/mpact/sim/generic/decoder_interface.h b/mpact/sim/generic/decoder_interface.h index f555f26..86173eb 100644 --- a/mpact/sim/generic/decoder_interface.h +++ b/mpact/sim/generic/decoder_interface.h
@@ -27,7 +27,7 @@ class DecoderInterface { public: // Return a decoded instruction for the given address. If there are errors - // in the instruciton decoding, the decoder should still produce an + // in the instruction decoding, the decoder should still produce an // instruction that can be executed, but its semantic action function should // set an error condition in the simulation when executed. virtual Instruction *DecodeInstruction(uint64_t address) = 0;
diff --git a/mpact/sim/generic/fifo.cc b/mpact/sim/generic/fifo.cc index 7b543ac..ee22930 100644 --- a/mpact/sim/generic/fifo.cc +++ b/mpact/sim/generic/fifo.cc
@@ -17,6 +17,7 @@ #include <string> #include <vector> +#include "absl/status/status.h" #include "absl/strings/string_view.h" #include "mpact/sim/generic/arch_state.h" #include "mpact/sim/generic/component.h"
diff --git a/mpact/sim/generic/fifo.h b/mpact/sim/generic/fifo.h index f5e841b..35cd9a8 100644 --- a/mpact/sim/generic/fifo.h +++ b/mpact/sim/generic/fifo.h
@@ -335,7 +335,7 @@ std::string op_name_; }; -// This is a parial specialization of the Source operand class. This is used +// This is a partial specialization of the Source operand class. This is used // when the element type stored in the data buffer is not an integral type. This // is primarily for when the fifo element type really doesn't model a register // value per se, but a more complex structure such as a dma descriptor. In this
diff --git a/mpact/sim/generic/instruction.h b/mpact/sim/generic/instruction.h index 60ab5d8..af4e7eb 100644 --- a/mpact/sim/generic/instruction.h +++ b/mpact/sim/generic/instruction.h
@@ -176,7 +176,7 @@ disasm_string_ = std::move(disasm); } - std::string AsString() const; + virtual std::string AsString() const; // Setter and getter for the integer attributes. absl::Span<const int> Attributes() const { return attributes_; }
diff --git a/mpact/sim/generic/instruction_helpers.h b/mpact/sim/generic/instruction_helpers.h index 47ba835..b3d3b31 100644 --- a/mpact/sim/generic/instruction_helpers.h +++ b/mpact/sim/generic/instruction_helpers.h
@@ -34,7 +34,7 @@ // destination operand. This version supports different types for the result and // each of the two source operands. template <typename Result, typename Argument1, typename Argument2> -inline void BinaryOp(Instruction *instruction, +inline void BinaryOp(const Instruction *instruction, std::function<Result(Argument1, Argument2)> operation) { Argument1 lhs = generic::GetInstructionSource<Argument1>(instruction, 0); Argument2 rhs = generic::GetInstructionSource<Argument2>(instruction, 1); @@ -49,7 +49,7 @@ // destination operand. This version supports different types for the result // and the operands, but the two source operands must have the same type. template <typename Result, typename Argument> -inline void BinaryOp(Instruction *instruction, +inline void BinaryOp(const Instruction *instruction, std::function<Result(Argument, Argument)> operation) { Argument lhs = generic::GetInstructionSource<Argument>(instruction, 0); Argument rhs = generic::GetInstructionSource<Argument>(instruction, 1); @@ -64,7 +64,7 @@ // destination operand. This version requires both result and source operands // to have the same type. template <typename Result> -inline void BinaryOp(Instruction *instruction, +inline void BinaryOp(const Instruction *instruction, std::function<Result(Result, Result)> operation) { Result lhs = generic::GetInstructionSource<Result>(instruction, 0); Result rhs = generic::GetInstructionSource<Result>(instruction, 1); @@ -79,7 +79,7 @@ // destination operand. This version supports the result and argument having // different types. template <typename Result, typename Argument> -inline void UnaryOp(Instruction *instruction, +inline void UnaryOp(const Instruction *instruction, std::function<Result(Argument)> operation) { Argument lhs = generic::GetInstructionSource<Argument>(instruction, 0); Result dest_value = operation(lhs); @@ -93,7 +93,7 @@ // destination operand. This version requires that the result and argument have // the same type. template <typename Result> -inline void UnaryOp(Instruction *instruction, +inline void UnaryOp(const Instruction *instruction, std::function<Result(Result)> operation) { Result lhs = generic::GetInstructionSource<Result>(instruction, 0); Result dest_value = operation(lhs); @@ -107,7 +107,7 @@ template <typename Result, typename Argument1, typename Argument2, typename Argument3> inline void TernaryVectorOp( - Instruction *instruction, + const Instruction *instruction, std::function<Result(Argument1, Argument2, Argument3)> operation) { auto *dst = instruction->Destination(0); auto *db = dst->AllocateDataBuffer(); @@ -131,7 +131,7 @@ // the arguments have to all have the same type. template <typename Result, typename Argument> inline void TernaryVectorOp( - Instruction *instruction, + const Instruction *instruction, std::function<Result(Argument, Argument, Argument)> operation) { auto *dst = instruction->Destination(0); auto *db = dst->AllocateDataBuffer(); @@ -151,7 +151,7 @@ // requires the result and arguments to have the same type. template <typename Result> inline void TernaryVectorOp( - Instruction *instruction, + const Instruction *instruction, std::function<Result(Result, Result, Result)> operation) { auto *dst = instruction->Destination(0); auto *db = dst->AllocateDataBuffer(); @@ -171,7 +171,7 @@ // allows for different types for the result and each argument. template <typename Result, typename Argument1, typename Argument2> inline void BinaryVectorOp( - Instruction *instruction, + const Instruction *instruction, std::function<Result(Argument1, Argument2)> operation) { auto *dst = instruction->Destination(0); auto *db = dst->AllocateDataBuffer(); @@ -191,7 +191,7 @@ // the arguments have to have the same type. template <typename Result, typename Argument> inline void BinaryVectorOp( - Instruction *instruction, + const Instruction *instruction, std::function<Result(Argument, Argument)> operation) { auto *dst = instruction->Destination(0); auto *db = dst->AllocateDataBuffer(); @@ -209,7 +209,7 @@ // two operand vector instruction semantic functions. This version // requires the result and arguments to have the same type. template <typename Result> -inline void BinaryVectorOp(Instruction *instruction, +inline void BinaryVectorOp(const Instruction *instruction, std::function<Result(Result, Result)> operation) { auto *dst = instruction->Destination(0); auto *db = dst->AllocateDataBuffer(); @@ -227,7 +227,7 @@ // single operand vector instruction semantic functions. This version // allows the result and argument to have different types. template <typename Result, typename Argument> -inline void UnaryVectorOp(Instruction *instruction, +inline void UnaryVectorOp(const Instruction *instruction, std::function<Result(Argument)> operation) { auto *dst = instruction->Destination(0); auto *db = dst->AllocateDataBuffer(); @@ -244,7 +244,7 @@ // single operand vector instruction semantic functions. This version // requires the result and argument to have the same type. template <typename Result> -inline void UnaryVectorOp(Instruction *instruction, +inline void UnaryVectorOp(const Instruction *instruction, std::function<Result(Result)> operation) { auto *dst = instruction->Destination(0); auto *db = dst->AllocateDataBuffer();
diff --git a/mpact/sim/generic/operand_interface.h b/mpact/sim/generic/operand_interface.h index 60714f8..e39adc5 100644 --- a/mpact/sim/generic/operand_interface.h +++ b/mpact/sim/generic/operand_interface.h
@@ -32,7 +32,7 @@ namespace sim { namespace generic { -// The predicte operand interface is intended primarily as the interface to +// The predicate operand interface is intended primarily as the interface to // read the value of instruction predicates. It is separated from source // predicates to avoid mixing it in with the source operands needed for modeling // the instruction semantics.
diff --git a/mpact/sim/generic/register.cc b/mpact/sim/generic/register.cc index 1d852fd..99c976e 100644 --- a/mpact/sim/generic/register.cc +++ b/mpact/sim/generic/register.cc
@@ -18,10 +18,10 @@ #include <cstring> #include <vector> -#include "absl/base/macros.h" #include "absl/strings/string_view.h" -#include "mpact/sim/generic/ref_count.h" +#include "mpact/sim/generic/data_buffer.h" #include "mpact/sim/generic/simple_resource.h" +#include "mpact/sim/generic/state_item_base.h" namespace mpact { namespace sim {
diff --git a/mpact/sim/util/asm/BUILD b/mpact/sim/util/asm/BUILD index 1620657..1ba2ac2 100644 --- a/mpact/sim/util/asm/BUILD +++ b/mpact/sim/util/asm/BUILD
@@ -21,17 +21,19 @@ cc_library( name = "asm", + srcs = ["resolver.cc"], hdrs = [ "opcode_assembler_interface.h", + "resolver.h", "resolver_interface.h", ], deps = [ "@com_github_serge1_elfio//:elfio", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/functional:any_invocable", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", - "@com_google_absl//absl/strings:str_format", ], ) @@ -44,7 +46,6 @@ "@com_github_serge1_elfio//:elfio", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", - "@com_google_absl//absl/functional:any_invocable", "@com_google_absl//absl/functional:bind_front", "@com_google_absl//absl/log", "@com_google_absl//absl/status",
diff --git a/mpact/sim/util/asm/opcode_assembler_interface.h b/mpact/sim/util/asm/opcode_assembler_interface.h index 80782d1..1663787 100644 --- a/mpact/sim/util/asm/opcode_assembler_interface.h +++ b/mpact/sim/util/asm/opcode_assembler_interface.h
@@ -15,12 +15,14 @@ #ifndef MPACT_SIM_UTIL_ASM_OPCODE_ASSEMBLER_INTERFACE_H_ #define MPACT_SIM_UTIL_ASM_OPCODE_ASSEMBLER_INTERFACE_H_ +#include <cstddef> #include <cstdint> #include <string> #include <vector> #include "absl/functional/any_invocable.h" #include "absl/status/status.h" +#include "absl/status/statusor.h" #include "absl/strings/string_view.h" #include "elfio/elf_types.hpp" #include "mpact/sim/util/asm/resolver_interface.h" @@ -59,12 +61,13 @@ // Takes the current address, the text for the assembly instruction (including // any label definitions), and a symbol resolver interface.Return ok status if // the text is successfully encoded into the bytes vector. Symbols for any - // labels are added using the callback function interface. - virtual absl::Status Encode(uint64_t address, absl::string_view text, - AddSymbolCallback add_symbol_callback, - ResolverInterface *resolver, - std::vector<uint8_t> &bytes, - std::vector<RelocationInfo> &relocations) = 0; + // labels are added using the callback function interface. The method returns + // the increment to the address after the instruction is encoded. + virtual absl::StatusOr<size_t> Encode( + uint64_t address, absl::string_view text, + AddSymbolCallback add_symbol_callback, ResolverInterface *resolver, + std::vector<uint8_t> &bytes, + std::vector<RelocationInfo> &relocations) = 0; }; } // namespace assembler
diff --git a/mpact/sim/util/asm/resolver.cc b/mpact/sim/util/asm/resolver.cc new file mode 100644 index 0000000..f8ec781 --- /dev/null +++ b/mpact/sim/util/asm/resolver.cc
@@ -0,0 +1,52 @@ +#include "mpact/sim/util/asm/resolver.h" + +#include <cstdint> +#include <string> + +#include "absl/container/flat_hash_map.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" +#include "elfio/elf_types.hpp" +#include "elfio/elfio_section.hpp" + +namespace mpact::sim::util::assembler { + +absl::StatusOr<uint64_t> ZeroResolver::Resolve(absl::string_view text) { + // Any symbol name should be added to the symbol table as an undefined + // symbol if it is not already there. When the symbol is defined, the + // symbol table will be updated. In the case of generating an executable + // ELF file, any unresolved symbols will result in an error. When generating + // an object file, any unresolved symbols will remain in the symbol table + // and must be handled by the linker. + add_symbol_fcn_(text); + // Return 0 for any symbol name. + return 0; +} + +SymbolResolver::SymbolResolver( + int elf_file_class, ELFIO::section *symtab, + const absl::flat_hash_map<std::string, ELFIO::Elf_Word> &symbol_indices) + : elf_file_class_(elf_file_class), + symtab_(symtab), + symbol_indices_(symbol_indices) {} + +absl::StatusOr<uint64_t> SymbolResolver::Resolve(absl::string_view text) { + auto iter = symbol_indices_.find(text); + if (iter == symbol_indices_.end()) { + return absl::InvalidArgumentError( + absl::StrCat("SymbolResolver: Symbol '", text, "' not found")); + } + auto index = iter->second; + if (elf_file_class_ == ELFCLASS64) { + auto *sym = reinterpret_cast<const ELFIO::Elf64_Sym *>(symtab_->get_data()); + return sym[index].st_value; + } else if (elf_file_class_ == ELFCLASS32) { + auto *sym = reinterpret_cast<const ELFIO::Elf32_Sym *>(symtab_->get_data()); + return sym[index].st_value; + } + return absl::InternalError("Unsupported ELF file class"); +} + +} // namespace mpact::sim::util::assembler
diff --git a/mpact/sim/util/asm/resolver.h b/mpact/sim/util/asm/resolver.h new file mode 100644 index 0000000..3adb3ff --- /dev/null +++ b/mpact/sim/util/asm/resolver.h
@@ -0,0 +1,67 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MPACT_SIM_UTIL_ASM_RESOLVER_H_ +#define MPACT_SIM_UTIL_ASM_RESOLVER_H_ + +#include <cstdint> +#include <string> + +#include "absl/container/flat_hash_map.h" +#include "absl/functional/any_invocable.h" +#include "absl/status/statusor.h" +#include "absl/strings/string_view.h" +#include "elfio/elf_types.hpp" +#include "elfio/elfio.hpp" // IWYU pragma: keep +#include "elfio/elfio_section.hpp" +#include "mpact/sim/util/asm/resolver_interface.h" + +namespace mpact::sim::util::assembler { + +// A symbol resolver that always returns 0 for any symbol name. This is used +// for the first pass of parsing the assembly code, when we are just creating +// the symbols and computing the sizes of the sections. +class ZeroResolver : public ResolverInterface { + public: + // Constructor takes a callback function that will be called for each symbol + // name encountered so that it can be added to the symbol table. + template <typename T> + ZeroResolver(T add_symbol_fcn) : add_symbol_fcn_(add_symbol_fcn) {} + absl::StatusOr<uint64_t> Resolve(absl::string_view text) override; + + private: + absl::AnyInvocable<void(absl::string_view)> add_symbol_fcn_; +}; + +// A symbol resolver that uses the symbol table and the symbol indices to +// resolve symbol names to values. +class SymbolResolver : public ResolverInterface { + public: + SymbolResolver( + int elf_file_class, ELFIO::section *symtab, + const absl::flat_hash_map<std::string, ELFIO::Elf_Word> &symbol_indices); + absl::StatusOr<uint64_t> Resolve(absl::string_view text) override; + + private: + // Elf file class. + int elf_file_class_ = 0; + // The symbol table ELF section. + ELFIO::section *symtab_; + // Map from symbol name to symbol index in the symbol table. + const absl::flat_hash_map<std::string, ELFIO::Elf_Word> &symbol_indices_; +}; + +} // namespace mpact::sim::util::assembler + +#endif // MPACT_SIM_UTIL_ASM_RESOLVER_H_
diff --git a/mpact/sim/util/asm/simple_assembler.cc b/mpact/sim/util/asm/simple_assembler.cc index 1622c79..aff0218 100644 --- a/mpact/sim/util/asm/simple_assembler.cc +++ b/mpact/sim/util/asm/simple_assembler.cc
@@ -17,15 +17,14 @@ #include <cstddef> #include <cstdint> #include <cstring> +#include <functional> #include <istream> #include <ostream> #include <string> -#include <utility> #include <vector> #include "absl/container/flat_hash_map.h" #include "absl/container/flat_hash_set.h" -#include "absl/functional/any_invocable.h" #include "absl/functional/bind_front.h" #include "absl/log/log.h" #include "absl/status/status.h" @@ -34,11 +33,13 @@ #include "absl/strings/string_view.h" #include "absl/types/span.h" #include "elfio/elf_types.hpp" +#include "elfio/elfio.hpp" // IWYU pragma: keep #include "elfio/elfio_section.hpp" #include "elfio/elfio_segment.hpp" #include "elfio/elfio_strings.hpp" #include "elfio/elfio_symbols.hpp" #include "mpact/sim/util/asm/opcode_assembler_interface.h" +#include "mpact/sim/util/asm/resolver.h" #include "mpact/sim/util/asm/resolver_interface.h" #include "re2/re2.h" @@ -47,69 +48,6 @@ namespace util { namespace assembler { -// A symbol resolver that always returns 0 for any symbol name. This is used -// for the first pass of parsing the assembly code, when we are just creating -// the symbols and computing the sizes of the sections. -class ZeroResolver : public ResolverInterface { - public: - // Constructor takes a callback function that will be called for each symbol - // name encountered so that it can be added to the symbol table. - template <typename T> - ZeroResolver(T add_symbol_fcn) : add_symbol_fcn_(add_symbol_fcn) {} - absl::StatusOr<uint64_t> Resolve(absl::string_view text) override { - // Any symbol name should be added to the symbol table as an undefined - // symbol if it is not already there. When the symbol is defined, the - // symbol table will be updated. In the case of generating an executable - // ELF file, any unresolved symbols will result in an error. When generating - // an object file, any unresolved symbols will remain in the symbol table - // and must be handled by the linker. - add_symbol_fcn_(text); - // Return 0 for any symbol name. - return 0; - } - - private: - absl::AnyInvocable<void(absl::string_view)> add_symbol_fcn_; -}; - -// A symbol resolver that uses the symbol table and the symbol indices to -// resolve symbol names to values. -class SymbolResolver : public ResolverInterface { - public: - SymbolResolver( - int elf_file_class, ELFIO::section *symtab, - const absl::flat_hash_map<std::string, ELFIO::Elf_Word> &symbol_indices) - : elf_file_class_(elf_file_class), - symtab_(symtab), - symbol_indices_(symbol_indices) {} - absl::StatusOr<uint64_t> Resolve(absl::string_view text) override { - auto iter = symbol_indices_.find(text); - if (iter == symbol_indices_.end()) { - return absl::InvalidArgumentError( - absl::StrCat("SymbolResolver: Symbol '", text, "' not found")); - } - auto index = iter->second; - if (elf_file_class_ == ELFCLASS64) { - auto *sym = - reinterpret_cast<const ELFIO::Elf64_Sym *>(symtab_->get_data()); - return sym[index].st_value; - } else if (elf_file_class_ == ELFCLASS32) { - auto *sym = - reinterpret_cast<const ELFIO::Elf32_Sym *>(symtab_->get_data()); - return sym[index].st_value; - } - return absl::InternalError("Unsupported ELF file class"); - } - - private: - // Elf file class. - int elf_file_class_ = 0; - // The symbol table ELF section. - ELFIO::section *symtab_; - // Map from symbol name to symbol index in the symbol table. - const absl::flat_hash_map<std::string, ELFIO::Elf_Word> &symbol_indices_; -}; - // Helper functions for parsing the assembly code. namespace { @@ -238,7 +176,7 @@ absl::StatusOr<std::vector<T>> GetValues( absl::string_view remainder, ResolverInterface *resolver = nullptr) { std::vector<T> values; - static RE2 value_re("(0x[0-9a-fA-F]+|-?[0-9]+)\\s*(?:,|$)"); + static RE2 value_re("\\s*(0x[0-9a-fA-F]+|-?[0-9]+)\\s*(?:,|$)"); std::string match; while (RE2::Consume(&remainder, value_re, &match)) { auto result = SimpleTextToInt<typename AtoIType<T>::type>(match); @@ -254,7 +192,7 @@ absl::StatusOr<std::vector<char>> GetValues<char>(absl::string_view remainder, ResolverInterface *resolver) { std::vector<char> values; - static RE2 value_re("'(.{1,2})'\\s*(?:,|$)"); + static RE2 value_re("\\s*'(.{1,2})'\\s*(?:,|$)"); std::string match; while (RE2::Consume(&remainder, value_re, &match)) { auto expanded = ExpandEscapes(match); @@ -272,7 +210,7 @@ absl::string_view remainder, ResolverInterface *resolver) { std::vector<std::string> values; std::string match; - static RE2 value_re("\"([^\"]*)\"\\s*(?:,|$)"); + static RE2 value_re("\\s*\"([^\"]*)\"\\s*(?:,|$)"); while (RE2::Consume(&remainder, value_re, &match)) { values.push_back(ExpandEscapes(match)); } @@ -285,7 +223,7 @@ absl::string_view remainder) { std::vector<std::string> values; std::string match; - static RE2 label_re("([a-zA-Z_][a-zA-Z0-9_]*)\\s*(?:,|$)"); + static RE2 label_re("\\s*([a-zA-Z_][a-zA-Z0-9_]*)\\s*(?:,|$)"); while (RE2::Consume(&remainder, label_re, &match)) { values.push_back(match); } @@ -302,7 +240,7 @@ } u; for (auto value : values) { u.i = value; - for (int i = sizeof(T) - 1; i >= 0; i--) { + for (int i = 0; i < sizeof(T); i++) { bytes.push_back(u.b[i]); } } @@ -354,15 +292,23 @@ } SimpleAssembler::~SimpleAssembler() { - delete symbol_resolver_; delete symbol_accessor_; + symbol_accessor_ = nullptr; delete string_accessor_; + string_accessor_ = nullptr; } -absl::Status SimpleAssembler::Parse(std::istream &is) { +absl::Status SimpleAssembler::Parse(std::istream &is, + ResolverInterface *zero_resolver) { // A trivial symbol resolver that always returns 0. - ZeroResolver zero_resolver( - absl::bind_front(&SimpleAssembler::SimpleAddSymbol, this)); + bool own_zero_resolver = false; + std::function<void()> cleanup = []() {}; + if (zero_resolver == nullptr) { + zero_resolver = new ZeroResolver( + absl::bind_front(&SimpleAssembler::SimpleAddSymbol, this)); + own_zero_resolver = true; + cleanup = [zero_resolver]() { delete zero_resolver; }; + } // First pass of parsing the input stream. This will add symbols to the symbol // table and compute the sizes of all instructions and the sections. The // section_address_map_ will keep track of the current location within each @@ -409,10 +355,10 @@ // Pass the full line into the parse functions, they are responsible // for handling the labels in pass one. if (statement[0] == '.') { - status = ParseAsmDirective(line, address, &zero_resolver, byte_vector, + status = ParseAsmDirective(line, address, zero_resolver, byte_vector, relo_vector); } else { - status = ParseAsmStatement(line, address, &zero_resolver, byte_vector, + status = ParseAsmStatement(line, address, zero_resolver, byte_vector, relo_vector); } if (!status.ok()) return status; @@ -421,22 +367,32 @@ lines_.push_back(statement); } else if (!label.empty()) { // This is just a single label definition. Add it to the symbol table. - auto status = - AddSymbolToCurrentSection(label, address, 0, STT_NOTYPE, 0, 0); + uint64_t symbol_address = address; + if ((current_section_ == data_section_) || + (current_section_ == bss_section_)) { + symbol_address = address / data_address_unit_; + } + auto status = AddSymbolToCurrentSection(label, symbol_address, 0, + STT_NOTYPE, 0, 0); if (!status.ok()) return status; } continue; } // Parse failure. + cleanup(); return absl::AbortedError(absl::StrCat("Parse failure: '", line, "'")); } - if (!is.eof()) return absl::InternalError("Input stream entered bad state"); + if (!is.eof()) { + cleanup(); + return absl::InternalError("Input stream entered bad state"); + } // Add undefined symbols to the symbol table. for (auto const &symbol : undefined_symbols_) { auto status = AddSymbol(symbol, 0, 0, STT_NOTYPE, 0, 0, nullptr); if (!status.ok()) { + cleanup(); return absl::InternalError(absl::StrCat( "Failed to add undefined symbol '", symbol, "': ", status.message())); } @@ -446,12 +402,15 @@ if (bss_section_ != nullptr) { bss_section_->set_size(section_address_map_[bss_section_]); } + cleanup(); return absl::OkStatus(); } -absl::Status SimpleAssembler::CreateExecutable(uint64_t base_address, - uint64_t entry_point) { - return CreateExecutable(base_address, absl::StrCat(entry_point)); +absl::Status SimpleAssembler::CreateExecutable( + uint64_t base_address, uint64_t entry_point, + ResolverInterface *symbol_resolver) { + return CreateExecutable(base_address, absl::StrCat(entry_point), + symbol_resolver); } // Helper function to update the symbol table entries for an executable file. @@ -501,8 +460,9 @@ delete[] symbols; } -absl::Status SimpleAssembler::CreateExecutable(uint64_t base_address, - const std::string &entry_point) { +absl::Status SimpleAssembler::CreateExecutable( + uint64_t base_address, const std::string &entry_point, + ResolverInterface *symbol_resolver) { if (!undefined_symbols_.empty()) { std::string message; absl::StrAppend( @@ -525,7 +485,10 @@ uint64_t text_segment_start = 0; if (text_section_ != nullptr) { text_segment_start = base_address & ~4095ULL; - ELFIO::segment *text_segment = writer_.segments.add(); + text_segment = writer_.segments.add(); + if (text_segment == nullptr) { + return absl::InternalError("Failed to create elf segment for text"); + } text_segment->set_type(PT_LOAD); text_segment->set_virtual_address(text_segment_start); text_segment->set_physical_address(text_segment_start); @@ -541,7 +504,10 @@ (text_segment_start + section_address_map_[text_section_] + 4095) & ~4095ULL; - ELFIO::segment *data_segment = writer_.segments.add(); + data_segment = writer_.segments.add(); + if (data_segment == nullptr) { + return absl::InternalError("Failed to create elf segment for data"); + } data_segment->set_type(PT_LOAD); data_segment->set_virtual_address(data_segment_start); data_segment->set_physical_address(data_segment_start); @@ -575,12 +541,21 @@ section_address_map_[data_section_] = data_segment_start; section_address_map_[bss_section_] = bss_segment_start; + std::function<void()> cleanup = []() {}; + if (symbol_resolver == nullptr) { + symbol_resolver = + new SymbolResolver(elf_file_class_, symtab_, symbol_indices_); + cleanup = [symbol_resolver]() { delete symbol_resolver; }; + } // Pass in the relocation vector to the second pass of parsing, but ignore // the values, since we are creating an executable file, and all the symbols // are resolved. std::vector<RelocationInfo> relo_vector; - auto status = ParsePassTwo(relo_vector); - if (!status.ok()) return status; + auto status = ParsePassTwo(relo_vector, symbol_resolver); + if (!status.ok()) { + cleanup(); + return status; + } // Add sections to the segments. First segment gets the text section. The // second segment gets the data and bss sections. @@ -595,12 +570,16 @@ bss_section_->get_addr_align()); } - auto res = SimpleTextToInt<uint64_t>(entry_point, symbol_resolver_); - if (!res.ok()) return res.status(); + auto res = SimpleTextToInt<uint64_t>(entry_point, symbol_resolver); + if (!res.ok()) { + cleanup(); + return res.status(); + } uint64_t entry_point_value = res.value(); symbol_accessor_->arrange_local_symbols(); writer_.set_entry(entry_point_value); + cleanup(); return absl::OkStatus(); } @@ -648,7 +627,8 @@ symtab_->set_info(last_local + 1); } -absl::Status SimpleAssembler::CreateRelocatable() { +absl::Status SimpleAssembler::CreateRelocatable( + ResolverInterface *symbol_resolver) { writer_.set_type(ET_REL); // Reset the section address map to zero since we are creating a relocatable // file. @@ -679,10 +659,19 @@ UpdateSymtabHeaderInfo<ELFIO::Elf32_Sym>(); } + std::function<void()> cleanup = []() {}; + if (symbol_resolver == nullptr) { + symbol_resolver = + new SymbolResolver(elf_file_class_, symtab_, symbol_indices_); + cleanup = [symbol_resolver]() { delete symbol_resolver; }; + } // Parse the source again, collect relocations. std::vector<RelocationInfo> relo_vector; - auto status = ParsePassTwo(relo_vector); - if (!status.ok()) return status; + auto status = ParsePassTwo(relo_vector, symbol_resolver); + if (!status.ok()) { + cleanup(); + return status; + } // Handle relocations if there are any. if (!relo_vector.empty()) { @@ -694,10 +683,12 @@ } for (auto const &[section_index, relo_vec] : relo_map) { if (section_index == 0) { + cleanup(); return absl::InternalError( "Relocation entry with section index 0 not supported"); } if (!section_index_map_.contains(section_index)) { + cleanup(); return absl::InternalError( absl::StrCat("Section index not found: ", section_index)); } @@ -722,22 +713,23 @@ status = AddRelocationEntries<ELFIO::Elf32_Rela>( relo_vec, symbol_indices_, rela_section); } else { + cleanup(); return absl::InternalError( absl::StrCat("Unsupported ELF file class: ", elf_file_class_)); } - if (!status.ok()) return status; + if (!status.ok()) { + cleanup(); + return status; + } } } + cleanup(); return absl::OkStatus(); } absl::Status SimpleAssembler::ParsePassTwo( - std::vector<RelocationInfo> &relo_vector) { - // For the second pass, we need a symbol resolver that uses the symbol - // table and the symbol indices. - symbol_resolver_ = - new SymbolResolver(elf_file_class_, symtab_, symbol_indices_); - + std::vector<RelocationInfo> &relo_vector, + ResolverInterface *symbol_resolver) { // Now fill in the sections. Parse each of the lines saved in the first // pass. for (auto const &line : lines_) { @@ -747,10 +739,10 @@ auto relo_size = relo_vector.size(); auto address = section_address_map_[section]; if (line[0] == '.') { - auto status = ParseAsmDirective(line, address, symbol_resolver_, + auto status = ParseAsmDirective(line, address, symbol_resolver, byte_vector, relo_vector); } else { - auto status = ParseAsmStatement(line, address, symbol_resolver_, + auto status = ParseAsmStatement(line, address, symbol_resolver, byte_vector, relo_vector); } if (!status.ok()) return status; @@ -763,6 +755,9 @@ if (byte_vector.empty()) continue; // Add data to the section, but first make sure it's not bss. if (section != bss_section_) { + if (section == nullptr) { + return absl::InternalError("Data is added to a null section"); + } section->append_data(reinterpret_cast<const char *>(byte_vector.data()), byte_vector.size()); } @@ -861,7 +856,7 @@ auto values = res.value(); size = values.size() * sizeof(int64_t); ConvertToBytes<int64_t>(values, byte_values); - } else if (match == "section") { + } else if (match == "sect") { // .section <name>,<type> // TODO(torerik): Implement. return absl::UnimplementedError("Section directive not implemented"); @@ -958,12 +953,12 @@ std::vector<uint8_t> &byte_values, std::vector<RelocationInfo> &relocations) { // Call the target specific assembler to encode the statement. - auto status = opcode_assembler_if_->Encode( + auto result = opcode_assembler_if_->Encode( address, line, absl::bind_front(&SimpleAssembler::AddSymbolToCurrentSection, this), resolver, byte_values, relocations); - if (!status.ok()) return status; - section_address_map_[current_section_] += byte_values.size(); + if (!result.ok()) return result.status(); + section_address_map_[current_section_] += result.value(); return absl::OkStatus(); }
diff --git a/mpact/sim/util/asm/simple_assembler.h b/mpact/sim/util/asm/simple_assembler.h index e711fc7..1f705c5 100644 --- a/mpact/sim/util/asm/simple_assembler.h +++ b/mpact/sim/util/asm/simple_assembler.h
@@ -74,7 +74,8 @@ virtual ~SimpleAssembler(); // Parse the input stream as assembly. - absl::Status Parse(std::istream &is); + absl::Status Parse(std::istream &is, + ResolverInterface *zero_resolver = nullptr); // Add the symbol to the symbol table for the current section. See ELFIO // documentation for details of the meaning of the parameters. absl::Status AddSymbolToCurrentSection(const std::string &name, @@ -90,15 +91,30 @@ // The text segment will be laid out starting at base address, followed by // the data segment. absl::Status CreateExecutable(uint64_t base_address, - const std::string &entry_point); - absl::Status CreateExecutable(uint64_t base_address, uint64_t entry_point); + const std::string &entry_point, + ResolverInterface *symbol_resolver = nullptr); + absl::Status CreateExecutable(uint64_t base_address, uint64_t entry_point, + ResolverInterface *symbol_resolver = nullptr); // Create a relocatable ELF file. - absl::Status CreateRelocatable(); + absl::Status CreateRelocatable(ResolverInterface *symbol_resolver = nullptr); // Write the ELF file to the given output stream. absl::Status Write(std::ostream &os); // Access the ELF writer. ELFIO::elfio &writer() { return writer_; } + // Add a symbol reference to the symbol table if it is not already defined. + void SimpleAddSymbol(absl::string_view name); + + // Getters and setters. + absl::flat_hash_map<std::string, ELFIO::Elf_Word> &symbol_indices() { + return symbol_indices_; + } + ELFIO::section *symtab() { return symtab_; } + unsigned data_address_unit() { return data_address_unit_; } + void set_data_address_unit(unsigned data_address_unit) { + data_address_unit_ = data_address_unit; + } + private: // Helper function to update the symbol table entries. template <typename SymbolType> @@ -110,7 +126,8 @@ template <typename SymbolType> void UpdateSymtabHeaderInfo(); // Perform second pass of parsing. - absl::Status ParsePassTwo(std::vector<RelocationInfo> &relo_vector); + absl::Status ParsePassTwo(std::vector<RelocationInfo> &relo_vector, + ResolverInterface *symbol_resolver); // Parse and process an assembly directive. absl::Status ParseAsmDirective(absl::string_view line, uint64_t address, ResolverInterface *resolver, @@ -125,8 +142,6 @@ absl::Status AddSymbol(const std::string &name, ELFIO::Elf64_Addr value, ELFIO::Elf_Xword size, uint8_t type, uint8_t binding, uint8_t other, ELFIO::section *section); - // Add a symbol reference to the symbol table if it is not already defined. - void SimpleAddSymbol(absl::string_view name); // Append the data to the current section. absl::Status AppendData(const char *data, size_t size); @@ -157,9 +172,6 @@ // Map that tracks the current address of each section. absl::flat_hash_map<ELFIO::section *, uint64_t> section_address_map_; - // Current symbol resolver (looks up symbols in the symbol table and returns - // their values). - ResolverInterface *symbol_resolver_ = nullptr; std::vector<std::string> lines_; // Section pointers. ELFIO::section *text_section_ = nullptr; @@ -175,6 +187,8 @@ absl::flat_hash_map<std::string, ELFIO::Elf_Word> symbol_indices_; // Set of undefined symbols. absl::flat_hash_set<std::string> undefined_symbols_; + // Data address unit - by default 1 for byte addressable. + unsigned data_address_unit_ = 1; }; } // namespace assembler
diff --git a/mpact/sim/util/asm/test/BUILD b/mpact/sim/util/asm/test/BUILD index 3c5e77a..692c7fe 100644 --- a/mpact/sim/util/asm/test/BUILD +++ b/mpact/sim/util/asm/test/BUILD
@@ -74,9 +74,6 @@ "//mpact/sim/util/asm", "@com_google_absl//absl/base:no_destructor", "@com_google_absl//absl/container:flat_hash_map", - "@com_google_absl//absl/functional:any_invocable", - "@com_google_absl//absl/log", - "@com_google_absl//absl/log:check", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", @@ -89,7 +86,6 @@ size = "small", srcs = ["riscv64x_asm_test.cc"], deps = [ - ":riscv64x_bin_fmt", ":riscv64x_encoder", ":riscv64x_isa", "//mpact/sim/util/asm", @@ -97,10 +93,8 @@ "@com_github_serge1_elfio//:elfio", "@com_google_absl//absl/base:no_destructor", "@com_google_absl//absl/container:flat_hash_map", - "@com_google_absl//absl/log", "@com_google_absl//absl/log:check", "@com_google_absl//absl/status", - "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:span", "@com_google_googletest//:gtest_main",
diff --git a/mpact/sim/util/asm/test/riscv64x_asm_test.cc b/mpact/sim/util/asm/test/riscv64x_asm_test.cc index 91d9d93..3091c09 100644 --- a/mpact/sim/util/asm/test/riscv64x_asm_test.cc +++ b/mpact/sim/util/asm/test/riscv64x_asm_test.cc
@@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include <cstddef> #include <cstdint> #include <sstream> #include <string> @@ -54,10 +55,11 @@ RiscV64XAssembler(Riscv64xSlotMatcher* matcher) : label_re_("^(\\S+)\\s*:"), matcher_(matcher) {}; ~RiscV64XAssembler() override = default; - absl::Status Encode(uint64_t address, absl::string_view text, - AddSymbolCallback add_symbol_callback, - ResolverInterface* resolver, std::vector<uint8_t>& bytes, - std::vector<RelocationInfo>& relocations) override { + absl::StatusOr<size_t> Encode( + uint64_t address, absl::string_view text, + AddSymbolCallback add_symbol_callback, ResolverInterface* resolver, + std::vector<uint8_t>& bytes, + std::vector<RelocationInfo>& relocations) override { // First check to see if there is a label, if so, add it to the symbol table // with the current address. std::string label; @@ -78,7 +80,7 @@ for (int i = 0; i < size / 8; ++i) { bytes.push_back(u.b[i]); } - return absl::OkStatus(); + return bytes.size(); } private:
diff --git a/mpact/sim/util/program_loader/elf_program_loader.cc b/mpact/sim/util/program_loader/elf_program_loader.cc index 2ee8544..8371a9c 100644 --- a/mpact/sim/util/program_loader/elf_program_loader.cc +++ b/mpact/sim/util/program_loader/elf_program_loader.cc
@@ -18,8 +18,6 @@ #include <cstdint> #include <cstring> -#include <fstream> -#include <ios> #include <string> #include <utility>
diff --git a/mpact/sim/util/renode/renode_cs/MpactCPU.cs b/mpact/sim/util/renode/renode_cs/MpactCPU.cs index 8e44d8b..4e46507 100644 --- a/mpact/sim/util/renode/renode_cs/MpactCPU.cs +++ b/mpact/sim/util/renode/renode_cs/MpactCPU.cs
@@ -805,3 +805,4 @@ } // namespace Antmicro.Renode.Peripherals.MpactCPU +