Updated assembler to support architectures where the increment in address in the text section is different from the number of bytes added. Updated the decoder generators to better handle slots where instructions have been "deleted" from one or more base slots it inherits from. Fixed minor typos and oversights. PiperOrigin-RevId: 744051094 Change-Id: I1bfa248a885dd30eb9625cd2c2d367c5bda27444
diff --git a/mpact/sim/decoder/InstructionSet.g4 b/mpact/sim/decoder/InstructionSet.g4 index b8e71e5..1c5951e 100644 --- a/mpact/sim/decoder/InstructionSet.g4 +++ b/mpact/sim/decoder/InstructionSet.g4
@@ -142,11 +142,19 @@ : INCLUDE STRING_LITERAL ; -// A bundle_declaration has a name and specifies the set of bundles and/or -// slots contained within. At least one slot or bundle has to be specified. - +// A bundle_declaration has a name and specifies a semantic function, which is +// is responsible for dispatching the instructions for the bundles and/or slots +// It also specifies the bundles and/or slots the set of bundles and/or slots +// it contains. At least one slot or bundle has to be specified. bundle_declaration - : BUNDLE bundle_name=IDENT '{' bundle_list? slot_list? '}' + : BUNDLE bundle_name=IDENT '{' bundle_parts* '}' + ; + +bundle_parts + : include_file_list + | bundle_list + | slot_list + | semfunc_spec ';' ; // A bundle list is a non-empty list of bundle identifiers
diff --git a/mpact/sim/decoder/bin_format_visitor.cc b/mpact/sim/decoder/bin_format_visitor.cc index 39ba2dc..97351f3 100644 --- a/mpact/sim/decoder/bin_format_visitor.cc +++ b/mpact/sim/decoder/bin_format_visitor.cc
@@ -62,35 +62,25 @@ template <typename T> static inline T ExtractBits(const uint8_t *data, int data_size, int msb, int width) { - if (width == 0) return 0; - - int byte_low = data_size - ((msb - width) >> 3) - 1; - int byte_high = data_size - (msb >> 3) - 1; - int high_bit = msb & 0x7; - - // If it is only from one byte, extract and return. - if (byte_low == byte_high) { - uint8_t mask = (1 << (high_bit + 1)) - 1; - return (mask & data[byte_high]) >> (high_bit - width + 1); - } - - // Extract from the high order byte. T val = 0; - uint8_t mask = 0xff >> (7 - high_bit); - val = (mask & data[byte_high++]); - int remainder = width - (1 + high_bit); - while (remainder >= 8) { - val = (val << 8) | data[byte_high++]; - remainder -= 8; - } + if (width == 0) return val; - // Extract any remaining bits from the high end of the last byte. - if (remainder > 0) { - val <<= remainder; - int shift = 8 - remainder; - uint8_t mask = 0xff << shift; - val |= (data[byte_high] & mask) >> shift; + int lsb = msb - width + 1; + int byte_low = data_size - (lsb >> 3) - 1; + + + int blsb = lsb & 0x7; + int bits_left = width; + int bits_extracted = 0; + while (bits_left > 0) { + int bwidth = std::min(8 - blsb, bits_left); + uint8_t bmask = ((1 << bwidth) - 1) << blsb; + val |= ((data[byte_low] & bmask) >> blsb) << bits_extracted; + blsb = 0; + bits_left -= bwidth; + bits_extracted += bwidth; + byte_low--; } return val; } @@ -99,6 +89,38 @@ )foo"; +constexpr char kTemplatedInsertBits[] = R"foo( +namespace internal { + +// This function inserts a bitfield width bits wide into the byte vector, +// starting at bit_index bits from the end of data. The lsb has index 0. The +// byte vector is data_size bytes long. There is no error checking that T +// can hold width bits. +template <typename T> +static inline void InsertBits(uint8_t *data, int data_size, int msb, int width, + T val) { + if (width == 0) return; + + int lsb = msb - width + 1; + int byte_low = data_size - (lsb >> 3) - 1; + int blsb = lsb & 0x7; + while (width > 0) { + int bwidth = std::min(8 - blsb, width); + T bmask = (1 << bwidth) - 1; + uint8_t bval = (val & bmask); + bmask <<= blsb; + bval <<= blsb; + val >>= bwidth; + data[byte_low] = (data[byte_low] & ~bmask) | (bval & bmask); + blsb = 0; + width -= bwidth; + byte_low--; + } +} + +} // namespace internal +)foo"; + BinFormatVisitor::BinFormatVisitor() { constraint_string_to_type_.emplace("==", ConstraintType::kEq); constraint_string_to_type_.emplace("!=", ConstraintType::kNe); @@ -237,6 +259,7 @@ "#include <cstdint>\n" "\n" "#include \"absl/functional/any_invocable.h\"\n" + "#include \"absl/log/log.h\"\n" "\n\n"); for (auto const &include_file : encoding_info->include_files()) { absl::StrAppend(&h_string, "#include ", include_file, "\n"); @@ -327,14 +350,15 @@ "#include <iostream>\n" "#include <cstdint>\n\n" "#include \"absl/base/no_destructor.h\"\n" - "#include \"absl/container/flat_hash_map.h\"\n\n" + "#include \"absl/container/flat_hash_map.h\"\n" + "#include \"absl/log/log.h\"\n\n" "#include \"", enum_h_name, "\"\n"); absl::StrAppend(&cc_string, "#include \"", dot_h_name, "\"\n\n" "#include <cstdint>\n\n" "#include \"absl/base/no_destructor.h\"\n" - "#include \"absl/container/flat_hash_map.h\"\n\n" + "#include \"absl/container/flat_hash_map.h\"\n" "#include \"", enum_h_name, "\"\n"); for (auto &name_space : encoding_info->decoder()->namespaces()) { @@ -342,7 +366,8 @@ absl::StrAppend(&cc_string, name_space_str); absl::StrAppend(&h_string, name_space_str); } - absl::StrAppend(&h_string, "\n"); + // Write out the templated extractor function used by the other methods. + absl::StrAppend(&h_string, "\n", kTemplatedInsertBits, "\n"); absl::StrAppend(&cc_string, "\n"); return std::tie(h_string, cc_string); }
diff --git a/mpact/sim/decoder/bundle.h b/mpact/sim/decoder/bundle.h index ae58095..315cd42 100644 --- a/mpact/sim/decoder/bundle.h +++ b/mpact/sim/decoder/bundle.h
@@ -66,6 +66,10 @@ InstructionSet *instruction_set() const { return instruction_set_; } bool is_marked() const { return is_marked_; } void set_is_marked(bool value) { is_marked_ = value; } + std::string semfunc_code_string() const { return semfunc_code_string_; } + void set_semfunc_code_string(std::string code_string) { + semfunc_code_string_ = std::move(code_string); + } private: BundleDeclCtx *ctx_; @@ -76,6 +80,8 @@ std::string name_; // Name in PascalCase. std::string pascal_name_; + // Semantic function code string. + std::string semfunc_code_string_; // The slots contained within this bundle, including instance indices. std::vector<std::pair<std::string, const std::vector<int>>> slot_uses_; // The bundles contained within this bundle.
diff --git a/mpact/sim/decoder/encoding_group.cc b/mpact/sim/decoder/encoding_group.cc index 30d203f..f722d84 100644 --- a/mpact/sim/decoder/encoding_group.cc +++ b/mpact/sim/decoder/encoding_group.cc
@@ -159,7 +159,7 @@ encoding_group->AddEncoding(enc); } // Avoid useless groups and infinite recursion by deleting any groups that - // are empty and where the all the encodings ended up in the same subgroup. + // are empty and where all the encodings ended up in the same subgroup. if (encoding_group->encoding_vec().empty()) { delete encoding_group; continue; @@ -439,17 +439,20 @@ // For each instruction in the encoding vec, generate the if statement // to see if the instruction is matched. absl::flat_hash_set<std::string> extracted; - // For equal constraints, some can be ignored because those bits are wholly - // considered by the parent groups or the discriminator. + int count = 0; + // For equal constraints, some can be ignored because those bits are + // wholly considered by the parent groups or the discriminator. for (auto *encoding : encoding_vec_) { for (auto *constraint : encoding->equal_constraints()) { ProcessConstraint(extracted, constraint, definitions_ptr); } - EmitEncodingIfStatement(/*indent*/ 0, encoding, opcode_enum, extracted, - definitions_ptr); + count += EmitEncodingIfStatement(/*indent*/ 0, encoding, opcode_enum, + extracted, definitions_ptr); } - absl::StrAppend(definitions_ptr, " return std::make_pair(", opcode_enum, - "::kNone, FormatEnum::kNone);\n"); + if (count > 0) { + absl::StrAppend(definitions_ptr, " return std::make_pair(", opcode_enum, + "::kNone, FormatEnum::kNone);\n"); + } } void EncodingGroup::ProcessConstraint(
diff --git a/mpact/sim/decoder/format.cc b/mpact/sim/decoder/format.cc index c602995..5e8de59 100644 --- a/mpact/sim/decoder/format.cc +++ b/mpact/sim/decoder/format.cc
@@ -143,19 +143,27 @@ } // Return the string containing the integer type used to contain the current -// format. If it is greater than 64 bits, will use a byte array (int8_t *). +// format. If it is greater than 128 bits, will use a byte array (int8_t *). +// If it is 65 to 128 bits, will use absl::[u]int128. +std::string Format::GetUIntType(int bitwidth) const { + if (bitwidth > 128) return "uint8_t *"; + if (bitwidth > 64) return "absl::uint128"; + return absl::StrCat("uint", GetIntTypeBitWidth(bitwidth), "_t"); +} + std::string Format::GetIntType(int bitwidth) const { - if (bitwidth > 64) return "int8_t *"; + if (bitwidth > 128) return "int8_t *"; + if (bitwidth > 64) return "absl::int128"; return absl::StrCat("int", GetIntTypeBitWidth(bitwidth), "_t"); } -// Return the int type byte width (1, 2, 4, 8) or (-1 if it's bigger), of the -// integer type that would fit this format. +// Return the int type byte width (1, 2, 4, 8, 16) or (-1 if it's bigger), of +// the integer type that would fit this format. int Format::GetIntTypeBitWidth(int bitwidth) const { auto shift = absl::bit_width(static_cast<unsigned>(bitwidth)) - 1; if (absl::popcount(static_cast<unsigned>(bitwidth)) > 1) shift++; shift = std::max(shift, 3); - if (shift > 6) return -1; + if (shift > 7) return -1; return 1 << shift; } @@ -208,7 +216,8 @@ } field_or_format->set_high(declared_width_ - computed_width_ - 1); computed_width_ += format->declared_width() * field_or_format->size(); - extractors_.insert(std::make_pair(format->name(), field_or_format)); + extractors_.insert( + std::make_pair(field_or_format->format_alias(), field_or_format)); } if (computed_width_ != declared_width_) { return absl::InternalError(absl::StrCat( @@ -350,17 +359,17 @@ std::string h_output; int return_width = GetIntTypeBitWidth(field->width); std::string result_type_name = - absl::StrCat(field->is_signed ? "" : "u", GetIntType(return_width)); - std::string argument_type_name = - absl::StrCat("u", GetIntType(computed_width_)); - std::string signature = - absl::StrCat(result_type_name, " Extract", ToPascalCase(field->name), "(", - argument_type_name, " value)"); + field->is_signed ? GetIntType(return_width) : GetUIntType(return_width); + std::string argument_type_name = GetUIntType(computed_width_); + std::string signature = absl::StrCat( + result_type_name, " Extract", ToPascalCase(field->name), "(", + computed_width_ > 128 ? "const " : "", argument_type_name, " value)"); absl::StrAppend(&h_output, "inline ", signature, " {\n"); // Generate extraction function. For fields it's a simple shift and mask if - // the source format width <= 64 bits. + // the source format width <= 64 bits. Slightly more involved with format + // <= 128 bits. For larger formats use the templated extract helper function. std::string expr; if (declared_width_ <= 64) { uint64_t mask = (1ULL << field->width) - 1; @@ -369,21 +378,50 @@ } else { expr = absl::StrCat(" (value >> ", field->low, ") & 0x", absl::Hex(mask)); } + } else if (declared_width_ <= 128) { + absl::StrAppend(&h_output, + " absl::uint128 mask = 1;\n" + " mask = (mask << ", + field->width, ") - 1;\n"); + if (field->low == 0) { + expr = absl::StrCat("value & mask"); + } else { + expr = absl::StrCat(" (value >> ", field->low, ") & mask"); + } } else { - // For format width > 64 bits, use the templated extract helper function. + // For format width > 128 bits, use the templated extract helper function. int byte_size = (declared_width_ + 7) / 8; expr = absl::StrCat("internal::ExtractBits<", result_type_name, ">(value, ", byte_size, ", ", field->high, ", ", field->width, ")"); } // Add sign-extension if the field is signed. + std::string sign_extension; if (field->is_signed) { int shift = return_width - field->width; - absl::StrAppend(&h_output, " ", result_type_name, " result = (", expr, - ") << ", shift, ";\n result = result >> ", shift, ";\n", - " return result;\n}\n\n"); + sign_extension = + absl::StrCat(" ", result_type_name, " result = (", expr, ") << ", + shift, ";\n result = result >> ", shift, ";\n"); + expr = "result"; + } + if (declared_width_ <= 64) { + absl::StrAppend(&h_output, sign_extension, " return ", expr, ";\n}\n\n"); + } else if ((declared_width_ <= 128) && (return_width <= 64)) { + absl::StrAppend(&h_output, sign_extension, " return absl::Uint128Low64(", + expr, ");\n}\n\n"); } else { - absl::StrAppend(&h_output, " return ", expr, ";\n}\n\n"); + absl::StrAppend(&h_output, sign_extension, " return ", expr, ";\n}\n\n"); + } + // If the parent format size is not a power of two, also create an extractor + // that takes a uint8_t * parameter. + if ((declared_width_ <= 128) && + (absl::popcount(static_cast<unsigned>(declared_width_)) > 1)) { + absl::StrAppend(&h_output, "inline ", result_type_name, " Extract", + ToPascalCase(field->name), "(const uint8_t *value) {\n"); + int byte_size = (declared_width_ + 7) / 8; + absl::StrAppend(&h_output, " return internal::ExtractBits<", + result_type_name, ">(value, ", byte_size, ", ", field->high, + ", ", field->width, ");\n}\n\n"); } return h_output; } @@ -393,9 +431,16 @@ // it into the right place in the instruction word. std::string Format::GenerateFieldInserter(const Field *field) const { std::string h_output; - absl::StrAppend(&h_output, "static inline uint64_t Insert", - ToPascalCase(field->name), - "(uint64_t value, uint64_t inst_word) {\n"); + std::string field_type_name; + std::string inst_word_type_name = GetUIntType(computed_width_); + if (declared_width_ <= 128) { + field_type_name = inst_word_type_name; + } else { + field_type_name = GetUIntType(field->width); + } + absl::StrAppend(&h_output, "static inline ", inst_word_type_name, " Insert", + ToPascalCase(field->name), "(", field_type_name, " value, ", + inst_word_type_name, " inst_word) {\n"); if (declared_width_ <= 64) { uint64_t mask = ((1ULL << field->width) - 1) << field->low; std::string shift; @@ -404,15 +449,36 @@ } absl::StrAppend(&h_output, " inst_word = (inst_word & ~0x", absl::Hex(mask), "ULL)", " | ((value", shift, ") & 0x", - absl::Hex(mask), "ULL);\n"); + absl::Hex(mask), + "ULL);\n" + " return inst_word;\n" + "}\n"); + } else if (declared_width_ <= 128) { + absl::StrAppend(&h_output, + " absl::uint128 mask = 1;\n" + " mask = (mask << ", + field->width, ") - 1;\n"); + if (field->low != 0) { + absl::StrAppend(&h_output, " mask = mask << ", field->low, ";\n"); + } + absl::StrAppend(&h_output, + " inst_word = (inst_word & ~mask) | (value & mask);\n" + " return inst_word;\n" + "}\n"); + } else if (field->width <= 128) { + int byte_size = (declared_width_ + 7) / 8; + absl::StrAppend(&h_output, " internal::InsertBits(inst_word, ", byte_size, + ", ", field->high, ", ", field->width, + ", value);\n" + " return inst_word;\n" + "}\n"); } else { absl::StrAppend( &h_output, - " #error Support for formats > 64 bits not implemented - yet."); + " LOG(FATAL) << \" Support for fields > 128 bits not implemented - " + "yet.\";\n" + " return 0;\n}\n"); } - absl::StrAppend(&h_output, - " return inst_word;\n" - "}\n"); return h_output; } @@ -421,46 +487,78 @@ // insert its components into the right places in the instruction word. std::string Format::GenerateOverlayInserter(Overlay *overlay) const { std::string h_output; - absl::StrAppend(&h_output, "static inline uint64_t Insert", - ToPascalCase(overlay->name()), - "(uint64_t value, uint64_t inst_word) {\n"); + std::string result_type_name = GetUIntType(computed_width_); + std::string overlay_type_name; + if (computed_width_ <= 128) { + overlay_type_name = result_type_name; + } else { + overlay_type_name = GetUIntType(overlay->declared_width()); + } + absl::StrAppend(&h_output, "static inline ", result_type_name, " Insert", + ToPascalCase(overlay->name()), "(", overlay_type_name, + " value, ", result_type_name, " inst_word) {\n"); // Mark error if either the overlay or the format is > 64 bits. - if (overlay->declared_width() > 64) { - absl::StrAppend(&h_output, - " #error Support for overlays > 64 bits not implemented - " - "yet.\n}\n"); + if (overlay->declared_width() > 128) { + absl::StrAppend( + &h_output, + " LOG(FATAL) << \" Support for overlays > 128 bits not implemented - " + "yet.\";\n" + " return 0;\n}\n"); return h_output; } - if (computed_width_ > 64) { - absl::StrAppend(&h_output, - " #error Support for formats > 64 bits not implemented - " - "yet.\n}\n"); - return h_output; + bool use_mask_variable = false; + int remaining = overlay->declared_width(); + int byte_size = (declared_width_ + 7) / 8; + if (declared_width_ <= 128) { + absl::StrAppend(&h_output, " ", result_type_name, " tmp;\n"); + // Track the leftmost bit in the overlay. + if (declared_width_ > 64) { + absl::StrAppend(&h_output, " absl::uint128 mask;\n"); + use_mask_variable = true; + } + } else { + absl::StrAppend(&h_output, " ", overlay_type_name, " tmp;\n"); + if (overlay->declared_width() > 64) { + absl::StrAppend(&h_output, " absl::uint128 mask;\n"); + use_mask_variable = true; + } } - absl::StrAppend(&h_output, " uint64_t tmp;\n"); - // Track the leftmost bit in the overlay. - int left = overlay->declared_width(); for (auto &bits_or_field : overlay->component_vec()) { int width = bits_or_field->width(); // Ignore the bit fields in the overlay. if (bits_or_field->high() < 0) { - left -= width; + remaining -= width; continue; } - uint64_t mask = ((1ULL << width) - 1); std::string shift; - if (left - width > 0) { - shift = absl::StrCat(" >> ", left - width); + if (remaining - width > 0) { + shift = absl::StrCat(" >> ", remaining - width); } - // Extract the bits from the overlay value for the current component. - absl::StrAppend(&h_output, " tmp = (value ", shift, ") & 0x", - absl::Hex(mask), "ULL;\n"); + if (use_mask_variable) { + absl::StrAppend(&h_output, + " mask = 1;\n" + " mask = (mask << ", + width, ") - 1;\n"); + absl::StrAppend(&h_output, " tmp = (value ", shift, ") & mask;\n"); + } else { + uint64_t mask = ((1ULL << width) - 1); + // Extract the bits from the overlay value for the current component. + absl::StrAppend(&h_output, " tmp = (value ", shift, ") & 0x", + absl::Hex(mask), "ULL;\n"); + } shift.clear(); if (bits_or_field->low() != 0) { shift = absl::StrCat(" << ", bits_or_field->low()); } - absl::StrAppend(&h_output, " inst_word |= (tmp ", shift, ");\n"); - left -= width; + if (declared_width_ <= 128) { + absl::StrAppend(&h_output, " inst_word |= (tmp ", shift, ");\n"); + } else { + absl::StrAppend(&h_output, " internal::InsertBits(inst_word, ", + byte_size, ", ", bits_or_field->high(), ", ", width, + ", tmp);\n"); + } + + remaining -= width; } absl::StrAppend(&h_output, " return inst_word;\n}\n"); return h_output; @@ -472,27 +570,116 @@ std::string Format::GenerateFormatInserter(std::string_view format_alias, const Format *format, int high, int size) const { + if (size > 1) { + return GenerateReplicatedFormatInserter(format_alias, format, high, size); + } + return GenerateSingleFormatInserter(format_alias, format, high); +} + +std::string Format::GenerateReplicatedFormatInserter( + std::string_view format_alias, const Format *format, int high, + int size) const { std::string h_output; - std::string target_type_name = absl::StrCat("u", GetIntType(computed_width_)); - absl::StrAppend(&h_output, "static inline uint64_tInsert", - ToPascalCase(format_alias), - "(uint64_t value, uint64_t inst_word) {\n"); - if (declared_width_ > 64) { - absl::StrAppend(&h_output, - " #error Support for formats > 64 bits not implemented - " - "yet.\n}\n"); + std::string target_type_name = GetUIntType(declared_width_); + std::string format_type_name; + + if (declared_width_ <= 128) { + format_type_name = target_type_name; + } else { + format_type_name = GetUIntType(format->declared_width()); + } + absl::StrAppend(&h_output, "static inline ", target_type_name, " Insert", + ToPascalCase(format_alias), "(", "int index, ", + format_type_name, " value, ", target_type_name, + " inst_word) {\n"); + if (format->declared_width() > 128) { + absl::StrAppend( + &h_output, + " LOG(FATAL) << \" Support for formats > 128 bits not implemented - " + "yet.\";\n" + " return 0;\n}\n"); return h_output; } int width = format->declared_width(); int low = high - width + 1; - uint64_t mask = (1ULL << width) << low; + if (declared_width_ <= 64) { + uint64_t mask = (1ULL << width) - 1; + absl::StrAppend(&h_output, " int low = ", low, " - (index * ", width, + ");\n" + " return (inst_word & (~0x", + absl::Hex(mask), "ULL << low))", " | ((value << low) & (0x", + absl::Hex(mask), "ULL << low));\n}\n"); + } else if (declared_width_ <= 128) { + absl::StrAppend( + &h_output, " int low = ", low, " - (index * ", width, + ");\n" + " absl::uint128 mask = 1;\n" + " mask = (mask << ", + width, + ") - 1;\n" + " mask <<= low;\n" + " return (inst_word & ~mask) | (value << low) & mask;\n}\n"); + } else { + int byte_size = (declared_width_ + 7) / 8; + absl::StrAppend(&h_output, " internal::InsertBits(inst_word, ", byte_size, + ", ", high, " - (index * ", width, "), ", width, + ", value);\n" + " return inst_word;\n}\n"); + } + return h_output; +} + +std::string Format::GenerateSingleFormatInserter(std::string_view format_alias, + const Format *format, + int high) const { + std::string h_output; + std::string target_type_name = GetUIntType(declared_width_); + std::string format_type_name; + if (declared_width_ <= 128) { + format_type_name = target_type_name; + } else { + format_type_name = GetUIntType(format->declared_width()); + } + + absl::StrAppend(&h_output, "static inline ", target_type_name, " Insert", + ToPascalCase(format_alias), "(", format_type_name, " value, ", + target_type_name, " inst_word) {\n"); + if (format->declared_width() > 128) { + absl::StrAppend( + &h_output, + " LOG(FATAL) << \" Support for formats > 128 bits not implemented - " + "yet.\";\n" + " return 0;\n}\n"); + return h_output; + } + int width = format->declared_width(); + int low = high - width + 1; std::string shift; if (low != 0) { shift = absl::StrCat(" << ", low); } - absl::StrAppend(&h_output, " return (inst_word & (~0x", absl::Hex(mask), - "ULL))", " | ((value ", shift, ") & 0x", absl::Hex(mask), - "ULL);\n}\n"); + if (declared_width_ <= 64) { + uint64_t mask = ((1ULL << width) - 1) << low; + absl::StrAppend(&h_output, " return (inst_word & (~0x", absl::Hex(mask), + "ULL))", " | ((value ", shift, ") & 0x", absl::Hex(mask), + "ULL);\n}\n"); + } else if (declared_width_ <= 128) { + absl::StrAppend(&h_output, + " absl::uint128 mask = 1;\n" + " mask = (mask << ", + width, ") - 1;\n"); + if (low > 0) { + absl::StrAppend(&h_output, " mask = mask << ", low, ";\n"); + } + absl::StrAppend(&h_output, " return (inst_word & ~mask) | (value ", shift, + ") & mask;\n}\n"); + } else { + int byte_size = (declared_width_ + 7) / 8; + absl::StrAppend(&h_output, " internal::InsertBits(inst_word, ", byte_size, + ", ", high, ", ", width, + ", value);\n" + " return inst_word;\n}\n"); + } return h_output; } @@ -504,23 +691,23 @@ std::string h_output; // For each format generate am extractor. int width = format->declared_width(); // An extraction can only be for 64 bits or less. - if (width > 64) { + if (width > 128) { encoding_info_->error_listener()->semanticError( nullptr, absl::StrCat("Cannot generate a format extractor for format '", - format->name(), "': format is wider than 64 bits")); + format->name(), "': format is wider than 128 bits")); return ""; } - std::string return_type = absl::StrCat("u", GetIntType(width)); + std::string return_type = GetUIntType(width); std::string signature = absl::StrCat("inline ", return_type, " Extract", ToPascalCase(format_alias), "("); - if (declared_width_ <= 64) { - // If the source format is <= 64 bits, then use an int type. - std::string arg_type = absl::StrCat("u", GetIntType(declared_width_)); + if (declared_width_ <= 128) { + // If the source format is <= 128 bits, then use an int type. + std::string arg_type = GetUIntType(declared_width_); absl::StrAppend(&signature, arg_type, " value"); } else { // Otherwise use a pointer to uint8_t type. - absl::StrAppend(&signature, "uint8_t *value"); + absl::StrAppend(&signature, "const uint8_t *value"); } // If the format has multiple instances add an index parameter. if (size > 1) { @@ -530,18 +717,33 @@ // Now start the body. absl::StrAppend(&h_output, signature, " {\n"); std::string expr; - if (declared_width_ <= 64) { - // If the source format can be stored in a uint64_t or smaller. - uint64_t mask = (1ULL << width) - 1; + if (declared_width_ <= 128) { + // If the source format can be stored in a uint128 or smaller. int low = high - width + 1; - int shift_amount = GetIntTypeBitWidth(declared_width_) - low; std::string shift; if (size > 1) { - shift = absl::StrCat("(", shift_amount, " - (index - 1) * ", width, ")"); + shift = absl::StrCat("(", low, " + (index - 1) * ", width, ")"); } else { - shift = absl::StrCat(shift_amount); + shift = absl::StrCat(low); } - expr = absl::StrCat("(value >> ", shift, ") & 0x", absl::Hex(mask), ";\n"); + if (declared_width_ <= 64) { + uint64_t mask = (1ULL << width) - 1; + expr = + absl::StrCat("(value >> ", shift, ") & 0x", absl::Hex(mask), ";\n"); + absl::StrAppend(&h_output, " return ", expr, ";\n}\n\n"); + } else { + absl::StrAppend(&h_output, + " absl::uint128 mask = 1;\n" + " mask = (mask << ", + width, ") - 1;\n"); + expr = absl::StrCat("(value >> ", shift, ") & mask"); + if (width <= 64) { + absl::StrAppend(&h_output, " return absl::Uint128Low64(", expr, + ");\n}\n\n"); + } else { + absl::StrAppend(&h_output, " return ", expr, ";\n}\n\n"); + } + } } else { // If the source format is stored in uint8_t[]. int byte_size = (declared_width_ + 7) / 8; @@ -551,8 +753,23 @@ absl::StrAppend(&expr, " - (index * ", width, ")"); } absl::StrAppend(&expr, ", ", width, ")"); + absl::StrAppend(&h_output, " return ", expr, ";\n}\n\n"); } - absl::StrAppend(&h_output, " return ", expr, ";\n}\n\n"); + // If the parent format size is not a power of two, also create an extractor + // that takes a uint8_t * parameter. + if ((declared_width_ <= 128) && + (absl::popcount(static_cast<unsigned>(declared_width_)) > 1)) { + absl::StrAppend(&h_output, "inline ", return_type, " Extract", + ToPascalCase(format_alias), "(const uint8_t *value) {\n"); + int byte_size = (declared_width_ + 7) / 8; + expr = absl::StrCat("internal::ExtractBits<", return_type, ">(value, ", + byte_size, ", ", high); + if (size > 1) { + absl::StrAppend(&expr, " - (index * ", width, ")"); + } + absl::StrAppend(&expr, ", ", width, ")"); + absl::StrAppend(&h_output, " return ", expr, ";\n}\n\n"); + } return h_output; } @@ -560,9 +777,11 @@ std::string Format::GenerateOverlayExtractor(Overlay *overlay) const { std::string h_output; - std::string return_type = absl::StrCat(overlay->is_signed() ? "" : "u", - GetIntType(overlay->declared_width())); - std::string arg_type = absl::StrCat("u", GetIntType(declared_width_)); + std::string return_type = overlay->is_signed() + ? GetIntType(overlay->declared_width()) + : GetUIntType(overlay->declared_width()); + + std::string arg_type = GetUIntType(declared_width_); std::string signature = absl::StrCat("inline ", return_type, " Extract", ToPascalCase(overlay->name()), "(", arg_type, " value)"); @@ -587,9 +806,15 @@ " result = result >> ", shift, ";\n"); } - absl::StrAppend(&h_output, - " return result;\n" - "}\n\n"); + if ((declared_width_ > 64) && (overlay->declared_width() <= 64)) { + absl::StrAppend(&h_output, + " return UInt128Low64(result();\n" + "}\n\n"); + } else { + absl::StrAppend(&h_output, + " return result;\n" + "}\n\n"); + } return h_output; }
diff --git a/mpact/sim/decoder/format.h b/mpact/sim/decoder/format.h index 1ebfbbe..c986510 100644 --- a/mpact/sim/decoder/format.h +++ b/mpact/sim/decoder/format.h
@@ -15,6 +15,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include <map> #include <string> #include <tuple> #include <vector> @@ -171,9 +172,16 @@ std::string GenerateFormatInserter(std::string_view format_alias, const Format *format, int high, int size) const; + std::string GenerateReplicatedFormatInserter(std::string_view format_alias, + const Format *format, int high, + int size) const; + std::string GenerateSingleFormatInserter(std::string_view format_alias, + const Format *format, + int high) const; std::string GenerateOverlayInserter(Overlay *overlay) const; // Return string representation of the int type that contains bitwidth bits. std::string GetIntType(int bitwidth) const; + std::string GetUIntType(int bitwidth) const; int GetIntTypeBitWidth(int bitwidth) const; std::string name_;
diff --git a/mpact/sim/decoder/instruction_set.cc b/mpact/sim/decoder/instruction_set.cc index 93c61d8..a6d91cf 100644 --- a/mpact/sim/decoder/instruction_set.cc +++ b/mpact/sim/decoder/instruction_set.cc
@@ -343,6 +343,9 @@ Indent(absl::StrCat(" ", pascal_name(), "InstructionSet(")), factory_class_name, " *factory);\n" + " virtual ~", + pascal_name(), + "InstructionSet();\n" " Instruction *Decode(uint64_t address, ", encoding_type, " *encoding);\n" @@ -387,6 +390,10 @@ ToPascalCase(slot_name), "Slot(arch_state_);\n"); } absl::StrAppend(&output, "}\n"); + // Destructor. + absl::StrAppend(&output, class_name, "::~", class_name, "() {\n"); + absl::StrAppend(&output, " // empty for now.\n"); + absl::StrAppend(&output, "}\n"); // Generate the top level decode function body. absl::StrAppend(&output, "Instruction *", class_name, "::Decode(uint64_t address, ", encoding_type, @@ -887,7 +894,9 @@ position, ") {\n" " return absl::InvalidArgumentError(\n" - " absl::StrCat(\"Invalid number of operands (\", " + " absl::StrCat(\"", + opcode->pascal_name(), + ": Invalid number of operands (\", " "num_args, \") - expected ", position, "\"));\n"
diff --git a/mpact/sim/decoder/instruction_set_visitor.cc b/mpact/sim/decoder/instruction_set_visitor.cc index dae84eb..406c6dd 100644 --- a/mpact/sim/decoder/instruction_set_visitor.cc +++ b/mpact/sim/decoder/instruction_set_visitor.cc
@@ -191,7 +191,7 @@ instruction_set->namespaces()); dec_dot_h_file.close(); // Decoder .cc file. - dec_dot_cc_file << GenerateCcFileProlog(dec_dot_h_name, + dec_dot_cc_file << GenerateCcFileProlog(dec_dot_h_name, /*use_includes=*/true, instruction_set->namespaces()); dec_dot_cc_file << instruction_set->GenerateClassDefinitions( dec_dot_h_name, encoding_type_name); @@ -201,7 +201,7 @@ // Enum files. enum_h_file << GenerateSimpleHdrProlog(ToHeaderGuard(enum_h_name), instruction_set->namespaces()); - enum_cc_file << GenerateCcFileProlog(enum_h_name, + enum_cc_file << GenerateCcFileProlog(enum_h_name, /*use_includes=*/false, instruction_set->namespaces()); auto [h_output, cc_output] = instruction_set->GenerateEnums(enum_h_name); enum_h_file << h_output; @@ -575,10 +575,70 @@ Bundle *bundle = new Bundle(ctx->bundle_name->getText(), instruction_set, ctx); instruction_set->AddBundle(bundle); - context_file_map_[ctx->bundle_list()] = context_file_map_.at(ctx); - VisitBundleList(ctx->bundle_list(), bundle); - context_file_map_[ctx->slot_list()] = context_file_map_.at(ctx); - VisitSlotList(ctx->slot_list(), bundle); + int num_slot_lists = 0; + int num_bundle_lists = 0; + int num_include_file_lists = 0; + int num_semfunc_specs = 0; + for (auto *part : ctx->bundle_parts()) { + if (part->slot_list() != nullptr) { + if (num_slot_lists > 0) { + error_listener()->semanticError(file_names_[context_file_map_.at(ctx)], + part->start, + "Multiple slot lists in bundle"); + return; + } + context_file_map_[part->slot_list()] = context_file_map_.at(ctx); + VisitSlotList(part->slot_list(), bundle); + num_slot_lists++; + continue; + } + if (part->bundle_list() != nullptr) { + if (num_bundle_lists > 0) { + error_listener()->semanticError(file_names_[context_file_map_.at(ctx)], + part->start, + "Multiple bundle lists in bundle"); + return; + } + context_file_map_[part->bundle_list()] = context_file_map_.at(ctx); + VisitBundleList(part->bundle_list(), bundle); + num_bundle_lists++; + continue; + } + if (part->include_file_list() != nullptr) { + if (num_include_file_lists > 0) { + error_listener()->semanticError( + file_names_[context_file_map_.at(ctx)], part->start, + "Multiple include file lists in bundle"); + return; + } + for (auto *include_file : part->include_file_list()->include_file()) { + // Insert the string - the call will always succeed, but the insertion + // does not happen if it already exists. + include_files_.insert(include_file->STRING_LITERAL()->getText()); + } + num_include_file_lists++; + continue; + } + if (part->semfunc_spec() != nullptr) { + if (num_semfunc_specs > 0) { + error_listener()->semanticError(file_names_[context_file_map_.at(ctx)], + part->start, + "Multiple semfunc specs in bundle"); + return; + } + std::string string_literal = + part->semfunc_spec()->STRING_LITERAL(0)->getText(); + // Strip double quotes. + std::string code_string = + string_literal.substr(1, string_literal.length() - 2); + bundle->set_semfunc_code_string(code_string); + num_semfunc_specs++; + continue; + } + error_listener()->semanticError(file_names_[context_file_map_.at(ctx)], + part->start, "Unhandled bundle part type"); + return; + } } void InstructionSetVisitor::VisitSlotDeclaration( @@ -2235,7 +2295,7 @@ " virtual ResourceOperandInterface * " "GetComplexResourceOperand", "(SlotEnum slot, int entry, OpcodeEnum opcode, ComplexResourceEnum " - "resource_op, int begin, int end) { return {}; }\n"); + "resource_op, int begin, int end) { return nullptr; }\n"); absl::StrAppend( &output, " virtual std::vector<ResourceOperandInterface *> " @@ -2319,6 +2379,7 @@ "#include <string>\n" "#include <vector>\n" "\n" + "#include \"absl/container/flat_hash_map.h\"\n" "#include \"absl/status/status.h\"\n" "#include \"absl/status/statusor.h\"\n" "#include \"absl/strings/string_view.h\"\n" @@ -2370,7 +2431,7 @@ } std::string InstructionSetVisitor::GenerateCcFileProlog( - absl::string_view hdr_file_name, + absl::string_view hdr_file_name, bool use_includes, const std::vector<std::string> &namespaces) { std::string output; // Include files. @@ -2378,9 +2439,10 @@ absl::StrAppend(&output, "\n#include <array>\n\n" "#include \"absl/strings/str_format.h\"\n\n"); - - for (auto &include_file : include_files_) { - absl::StrAppend(&output, "#include ", include_file, "\n"); + if (use_includes) { + for (auto &include_file : include_files_) { + absl::StrAppend(&output, "#include ", include_file, "\n"); + } } absl::StrAppend(&output, "\n"); // Namespaces.
diff --git a/mpact/sim/decoder/instruction_set_visitor.h b/mpact/sim/decoder/instruction_set_visitor.h index e0f19b7..a07edc5 100644 --- a/mpact/sim/decoder/instruction_set_visitor.h +++ b/mpact/sim/decoder/instruction_set_visitor.h
@@ -187,6 +187,7 @@ std::string GenerateHdrFileEpilog(absl::string_view guard_name, const std::vector<std::string> &namespaces); std::string GenerateCcFileProlog(absl::string_view hdr_file_name, + bool use_includes, const std::vector<std::string> &namespaces); std::string GenerateNamespaceEpilog( const std::vector<std::string> &namespaces);
diff --git a/mpact/sim/decoder/mpact_sim_isa.bzl b/mpact/sim/decoder/mpact_sim_isa.bzl index 5a86d18..db01738 100644 --- a/mpact/sim/decoder/mpact_sim_isa.bzl +++ b/mpact/sim/decoder/mpact_sim_isa.bzl
@@ -198,6 +198,8 @@ lib_deps.append("@com_google_absl//absl/container:flat_hash_map") if "@com_google_absl//absl/functional:any_invocable" not in deps: lib_deps.append("@com_google_absl//absl/functional:any_invocable") + if "@com_google_absl//absl/log" not in deps: + lib_deps.append("@com_google_absl//absl/log") if "@com_google_absl//absl/strings:str_format" not in deps: lib_deps.append("@com_google_absl//absl/strings:str_format") if "@com_google_mpact-sim//mpact/sim/generic:arch_state" not in deps:
diff --git a/mpact/sim/decoder/slot.cc b/mpact/sim/decoder/slot.cc index 135539f..06b48a8 100644 --- a/mpact/sim/decoder/slot.cc +++ b/mpact/sim/decoder/slot.cc
@@ -488,7 +488,8 @@ encoder, " *encoder_;\n" " std::vector<RE2 *> regex_vec_;\n" - " RE2::Set regex_set_;\n"); + " RE2::Set regex_set_;\n" + " absl::flat_hash_map<int, int> index_to_opcode_map_;\n"); absl::StrAppend(&cc_output, class_name, "::", class_name, "(", instruction_set_->pascal_name(), "EncoderInterfaceBase *encoder) :\n" @@ -514,12 +515,16 @@ for (auto const &[name, inst_ptr] : instruction_map_) { auto [regex, opnd_locators] = GenerateRegEx(inst_ptr, formats); max_args = std::max(max_args, opnd_locators.size()); + std::string opcode_name = + absl::StrCat("OpcodeEnum::k", ToPascalCase(inst_ptr->opcode()->name())); absl::StrAppend(&cc_output, " regex_vec_.push_back(new RE2(", regex, "));\n" " index = regex_set_.Add(", regex, ", &error);\n" - " if (index == -1) return absl::InternalError(error);\n"); + " if (index == -1) return absl::InternalError(error);\n" + " index_to_opcode_map_.insert({index, static_cast<int>(", + opcode_name, ")", "});\n"); } absl::StrAppend(&h_output, " std::string args[", max_args, "];\n" @@ -576,12 +581,13 @@ for (auto index : matches) { std::vector<std::string> values; if (!Extract(text, index, values)) continue; + int opcode_index = index_to_opcode_map_.at(index); )", - " auto result = encode_fcns[index](encoder_, SlotEnum::k", + " auto result = encode_fcns[opcode_index](encoder_, SlotEnum::k", pascal_name(), ", entry, \n" " " - "static_cast<OpcodeEnum>(index), address, values, resolver, " + "static_cast<OpcodeEnum>(opcode_index), address, values, resolver, " "relocations);\n", R"( if (!result.status().ok()) { @@ -1235,14 +1241,15 @@ absl::StrCat(pascal_name(), "SlotSetOperandsNull"), encoding_type, default_instruction_->opcode())); absl::StrAppend( - &output, " {OperandSetter{", pascal_name(), + &output, " {static_cast<int>(OpcodeEnum::kNone), {OperandSetter{", + pascal_name(), "SlotSetOperandsNull},\n" " ", GenerateDisassemblySetter(default_instruction_), ",\n", " ", GenerateResourceSetter(default_instruction_, encoding_type), ",\n", " ", GenerateAttributeSetter(default_instruction_), ",\n", " SemFuncSetter{", default_instruction_->semfunc_code_string(), "}, ", - default_instruction_->opcode()->instruction_size(), "},\n"); + default_instruction_->opcode()->instruction_size(), "}},\n"); for (auto const &[unused, inst_ptr] : instruction_map_) { auto *instruction = inst_ptr; std::string opcode_name = instruction->opcode()->pascal_name(); @@ -1281,12 +1288,13 @@ } sep = ", "; } - absl::StrAppend(&output, " {OperandSetter{", operands_str, "},\n", - " ", GenerateDisassemblySetter(instruction), ",\n", - " ", GenerateResourceSetter(instruction, encoding_type), - ",\n", " ", GenerateAttributeSetter(instruction), ",\n", + absl::StrAppend(&output, " {static_cast<int>(OpcodeEnum::k", opcode_name, + "), {OperandSetter{", operands_str, "},\n", " ", + GenerateDisassemblySetter(instruction), ",\n", " ", + GenerateResourceSetter(instruction, encoding_type), ",\n", + " ", GenerateAttributeSetter(instruction), ",\n", " SemFuncSetter{", code_str, "}, ", - instruction->opcode()->instruction_size(), "},\n"); + instruction->opcode()->instruction_size(), "}},\n"); } return output; } @@ -1305,18 +1313,19 @@ class_name, "(ArchState *arch_state);\n"); // Emit Decode function generated that decodes the slot and creates and // initializes an instruction object, as well as private data members. - absl::StrAppend(&output, " Instruction *Decode(uint64_t address, ", - encoding_type, "* isa_encoding, SlotEnum, int entry);\n", - "\n" - " private:\n" - " ArchState *arch_state_;\n" - " std::array<InstructionInfo, ", - instruction_map_.size() + 1, "> instruction_info_", ";\n", - " static constexpr SlotEnum slot_ = SlotEnum::k", - pascal_name(), - ";\n" - "};\n" - "\n"); + absl::StrAppend( + &output, " Instruction *Decode(uint64_t address, ", encoding_type, + "* isa_encoding, SlotEnum, int entry);\n", + "\n" + " private:\n" + " ArchState *arch_state_;\n" + " absl::flat_hash_map<int, InstructionInfo> instruction_info_;\n", + //" std::array<InstructionInfo, ", + // instruction_map_.size() + 1, "> instruction_info_", ";\n", + " static constexpr SlotEnum slot_ = SlotEnum::k", pascal_name(), + ";\n" + "};\n" + "\n"); return output; }
diff --git a/mpact/sim/generic/decoder_interface.h b/mpact/sim/generic/decoder_interface.h index f555f26..86173eb 100644 --- a/mpact/sim/generic/decoder_interface.h +++ b/mpact/sim/generic/decoder_interface.h
@@ -27,7 +27,7 @@ class DecoderInterface { public: // Return a decoded instruction for the given address. If there are errors - // in the instruciton decoding, the decoder should still produce an + // in the instruction decoding, the decoder should still produce an // instruction that can be executed, but its semantic action function should // set an error condition in the simulation when executed. virtual Instruction *DecodeInstruction(uint64_t address) = 0;
diff --git a/mpact/sim/generic/fifo.h b/mpact/sim/generic/fifo.h index f5e841b..35cd9a8 100644 --- a/mpact/sim/generic/fifo.h +++ b/mpact/sim/generic/fifo.h
@@ -335,7 +335,7 @@ std::string op_name_; }; -// This is a parial specialization of the Source operand class. This is used +// This is a partial specialization of the Source operand class. This is used // when the element type stored in the data buffer is not an integral type. This // is primarily for when the fifo element type really doesn't model a register // value per se, but a more complex structure such as a dma descriptor. In this
diff --git a/mpact/sim/generic/instruction.h b/mpact/sim/generic/instruction.h index 60ab5d8..af4e7eb 100644 --- a/mpact/sim/generic/instruction.h +++ b/mpact/sim/generic/instruction.h
@@ -176,7 +176,7 @@ disasm_string_ = std::move(disasm); } - std::string AsString() const; + virtual std::string AsString() const; // Setter and getter for the integer attributes. absl::Span<const int> Attributes() const { return attributes_; }
diff --git a/mpact/sim/generic/instruction_helpers.h b/mpact/sim/generic/instruction_helpers.h index 47ba835..b3d3b31 100644 --- a/mpact/sim/generic/instruction_helpers.h +++ b/mpact/sim/generic/instruction_helpers.h
@@ -34,7 +34,7 @@ // destination operand. This version supports different types for the result and // each of the two source operands. template <typename Result, typename Argument1, typename Argument2> -inline void BinaryOp(Instruction *instruction, +inline void BinaryOp(const Instruction *instruction, std::function<Result(Argument1, Argument2)> operation) { Argument1 lhs = generic::GetInstructionSource<Argument1>(instruction, 0); Argument2 rhs = generic::GetInstructionSource<Argument2>(instruction, 1); @@ -49,7 +49,7 @@ // destination operand. This version supports different types for the result // and the operands, but the two source operands must have the same type. template <typename Result, typename Argument> -inline void BinaryOp(Instruction *instruction, +inline void BinaryOp(const Instruction *instruction, std::function<Result(Argument, Argument)> operation) { Argument lhs = generic::GetInstructionSource<Argument>(instruction, 0); Argument rhs = generic::GetInstructionSource<Argument>(instruction, 1); @@ -64,7 +64,7 @@ // destination operand. This version requires both result and source operands // to have the same type. template <typename Result> -inline void BinaryOp(Instruction *instruction, +inline void BinaryOp(const Instruction *instruction, std::function<Result(Result, Result)> operation) { Result lhs = generic::GetInstructionSource<Result>(instruction, 0); Result rhs = generic::GetInstructionSource<Result>(instruction, 1); @@ -79,7 +79,7 @@ // destination operand. This version supports the result and argument having // different types. template <typename Result, typename Argument> -inline void UnaryOp(Instruction *instruction, +inline void UnaryOp(const Instruction *instruction, std::function<Result(Argument)> operation) { Argument lhs = generic::GetInstructionSource<Argument>(instruction, 0); Result dest_value = operation(lhs); @@ -93,7 +93,7 @@ // destination operand. This version requires that the result and argument have // the same type. template <typename Result> -inline void UnaryOp(Instruction *instruction, +inline void UnaryOp(const Instruction *instruction, std::function<Result(Result)> operation) { Result lhs = generic::GetInstructionSource<Result>(instruction, 0); Result dest_value = operation(lhs); @@ -107,7 +107,7 @@ template <typename Result, typename Argument1, typename Argument2, typename Argument3> inline void TernaryVectorOp( - Instruction *instruction, + const Instruction *instruction, std::function<Result(Argument1, Argument2, Argument3)> operation) { auto *dst = instruction->Destination(0); auto *db = dst->AllocateDataBuffer(); @@ -131,7 +131,7 @@ // the arguments have to all have the same type. template <typename Result, typename Argument> inline void TernaryVectorOp( - Instruction *instruction, + const Instruction *instruction, std::function<Result(Argument, Argument, Argument)> operation) { auto *dst = instruction->Destination(0); auto *db = dst->AllocateDataBuffer(); @@ -151,7 +151,7 @@ // requires the result and arguments to have the same type. template <typename Result> inline void TernaryVectorOp( - Instruction *instruction, + const Instruction *instruction, std::function<Result(Result, Result, Result)> operation) { auto *dst = instruction->Destination(0); auto *db = dst->AllocateDataBuffer(); @@ -171,7 +171,7 @@ // allows for different types for the result and each argument. template <typename Result, typename Argument1, typename Argument2> inline void BinaryVectorOp( - Instruction *instruction, + const Instruction *instruction, std::function<Result(Argument1, Argument2)> operation) { auto *dst = instruction->Destination(0); auto *db = dst->AllocateDataBuffer(); @@ -191,7 +191,7 @@ // the arguments have to have the same type. template <typename Result, typename Argument> inline void BinaryVectorOp( - Instruction *instruction, + const Instruction *instruction, std::function<Result(Argument, Argument)> operation) { auto *dst = instruction->Destination(0); auto *db = dst->AllocateDataBuffer(); @@ -209,7 +209,7 @@ // two operand vector instruction semantic functions. This version // requires the result and arguments to have the same type. template <typename Result> -inline void BinaryVectorOp(Instruction *instruction, +inline void BinaryVectorOp(const Instruction *instruction, std::function<Result(Result, Result)> operation) { auto *dst = instruction->Destination(0); auto *db = dst->AllocateDataBuffer(); @@ -227,7 +227,7 @@ // single operand vector instruction semantic functions. This version // allows the result and argument to have different types. template <typename Result, typename Argument> -inline void UnaryVectorOp(Instruction *instruction, +inline void UnaryVectorOp(const Instruction *instruction, std::function<Result(Argument)> operation) { auto *dst = instruction->Destination(0); auto *db = dst->AllocateDataBuffer(); @@ -244,7 +244,7 @@ // single operand vector instruction semantic functions. This version // requires the result and argument to have the same type. template <typename Result> -inline void UnaryVectorOp(Instruction *instruction, +inline void UnaryVectorOp(const Instruction *instruction, std::function<Result(Result)> operation) { auto *dst = instruction->Destination(0); auto *db = dst->AllocateDataBuffer();
diff --git a/mpact/sim/generic/operand_interface.h b/mpact/sim/generic/operand_interface.h index 60714f8..e39adc5 100644 --- a/mpact/sim/generic/operand_interface.h +++ b/mpact/sim/generic/operand_interface.h
@@ -32,7 +32,7 @@ namespace sim { namespace generic { -// The predicte operand interface is intended primarily as the interface to +// The predicate operand interface is intended primarily as the interface to // read the value of instruction predicates. It is separated from source // predicates to avoid mixing it in with the source operands needed for modeling // the instruction semantics.
diff --git a/mpact/sim/util/asm/BUILD b/mpact/sim/util/asm/BUILD index 1620657..1ba2ac2 100644 --- a/mpact/sim/util/asm/BUILD +++ b/mpact/sim/util/asm/BUILD
@@ -21,17 +21,19 @@ cc_library( name = "asm", + srcs = ["resolver.cc"], hdrs = [ "opcode_assembler_interface.h", + "resolver.h", "resolver_interface.h", ], deps = [ "@com_github_serge1_elfio//:elfio", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/functional:any_invocable", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", - "@com_google_absl//absl/strings:str_format", ], ) @@ -44,7 +46,6 @@ "@com_github_serge1_elfio//:elfio", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", - "@com_google_absl//absl/functional:any_invocable", "@com_google_absl//absl/functional:bind_front", "@com_google_absl//absl/log", "@com_google_absl//absl/status",
diff --git a/mpact/sim/util/asm/opcode_assembler_interface.h b/mpact/sim/util/asm/opcode_assembler_interface.h index 80782d1..1663787 100644 --- a/mpact/sim/util/asm/opcode_assembler_interface.h +++ b/mpact/sim/util/asm/opcode_assembler_interface.h
@@ -15,12 +15,14 @@ #ifndef MPACT_SIM_UTIL_ASM_OPCODE_ASSEMBLER_INTERFACE_H_ #define MPACT_SIM_UTIL_ASM_OPCODE_ASSEMBLER_INTERFACE_H_ +#include <cstddef> #include <cstdint> #include <string> #include <vector> #include "absl/functional/any_invocable.h" #include "absl/status/status.h" +#include "absl/status/statusor.h" #include "absl/strings/string_view.h" #include "elfio/elf_types.hpp" #include "mpact/sim/util/asm/resolver_interface.h" @@ -59,12 +61,13 @@ // Takes the current address, the text for the assembly instruction (including // any label definitions), and a symbol resolver interface.Return ok status if // the text is successfully encoded into the bytes vector. Symbols for any - // labels are added using the callback function interface. - virtual absl::Status Encode(uint64_t address, absl::string_view text, - AddSymbolCallback add_symbol_callback, - ResolverInterface *resolver, - std::vector<uint8_t> &bytes, - std::vector<RelocationInfo> &relocations) = 0; + // labels are added using the callback function interface. The method returns + // the increment to the address after the instruction is encoded. + virtual absl::StatusOr<size_t> Encode( + uint64_t address, absl::string_view text, + AddSymbolCallback add_symbol_callback, ResolverInterface *resolver, + std::vector<uint8_t> &bytes, + std::vector<RelocationInfo> &relocations) = 0; }; } // namespace assembler
diff --git a/mpact/sim/util/asm/resolver.cc b/mpact/sim/util/asm/resolver.cc new file mode 100644 index 0000000..f8ec781 --- /dev/null +++ b/mpact/sim/util/asm/resolver.cc
@@ -0,0 +1,52 @@ +#include "mpact/sim/util/asm/resolver.h" + +#include <cstdint> +#include <string> + +#include "absl/container/flat_hash_map.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" +#include "elfio/elf_types.hpp" +#include "elfio/elfio_section.hpp" + +namespace mpact::sim::util::assembler { + +absl::StatusOr<uint64_t> ZeroResolver::Resolve(absl::string_view text) { + // Any symbol name should be added to the symbol table as an undefined + // symbol if it is not already there. When the symbol is defined, the + // symbol table will be updated. In the case of generating an executable + // ELF file, any unresolved symbols will result in an error. When generating + // an object file, any unresolved symbols will remain in the symbol table + // and must be handled by the linker. + add_symbol_fcn_(text); + // Return 0 for any symbol name. + return 0; +} + +SymbolResolver::SymbolResolver( + int elf_file_class, ELFIO::section *symtab, + const absl::flat_hash_map<std::string, ELFIO::Elf_Word> &symbol_indices) + : elf_file_class_(elf_file_class), + symtab_(symtab), + symbol_indices_(symbol_indices) {} + +absl::StatusOr<uint64_t> SymbolResolver::Resolve(absl::string_view text) { + auto iter = symbol_indices_.find(text); + if (iter == symbol_indices_.end()) { + return absl::InvalidArgumentError( + absl::StrCat("SymbolResolver: Symbol '", text, "' not found")); + } + auto index = iter->second; + if (elf_file_class_ == ELFCLASS64) { + auto *sym = reinterpret_cast<const ELFIO::Elf64_Sym *>(symtab_->get_data()); + return sym[index].st_value; + } else if (elf_file_class_ == ELFCLASS32) { + auto *sym = reinterpret_cast<const ELFIO::Elf32_Sym *>(symtab_->get_data()); + return sym[index].st_value; + } + return absl::InternalError("Unsupported ELF file class"); +} + +} // namespace mpact::sim::util::assembler
diff --git a/mpact/sim/util/asm/resolver.h b/mpact/sim/util/asm/resolver.h new file mode 100644 index 0000000..3adb3ff --- /dev/null +++ b/mpact/sim/util/asm/resolver.h
@@ -0,0 +1,67 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MPACT_SIM_UTIL_ASM_RESOLVER_H_ +#define MPACT_SIM_UTIL_ASM_RESOLVER_H_ + +#include <cstdint> +#include <string> + +#include "absl/container/flat_hash_map.h" +#include "absl/functional/any_invocable.h" +#include "absl/status/statusor.h" +#include "absl/strings/string_view.h" +#include "elfio/elf_types.hpp" +#include "elfio/elfio.hpp" // IWYU pragma: keep +#include "elfio/elfio_section.hpp" +#include "mpact/sim/util/asm/resolver_interface.h" + +namespace mpact::sim::util::assembler { + +// A symbol resolver that always returns 0 for any symbol name. This is used +// for the first pass of parsing the assembly code, when we are just creating +// the symbols and computing the sizes of the sections. +class ZeroResolver : public ResolverInterface { + public: + // Constructor takes a callback function that will be called for each symbol + // name encountered so that it can be added to the symbol table. + template <typename T> + ZeroResolver(T add_symbol_fcn) : add_symbol_fcn_(add_symbol_fcn) {} + absl::StatusOr<uint64_t> Resolve(absl::string_view text) override; + + private: + absl::AnyInvocable<void(absl::string_view)> add_symbol_fcn_; +}; + +// A symbol resolver that uses the symbol table and the symbol indices to +// resolve symbol names to values. +class SymbolResolver : public ResolverInterface { + public: + SymbolResolver( + int elf_file_class, ELFIO::section *symtab, + const absl::flat_hash_map<std::string, ELFIO::Elf_Word> &symbol_indices); + absl::StatusOr<uint64_t> Resolve(absl::string_view text) override; + + private: + // Elf file class. + int elf_file_class_ = 0; + // The symbol table ELF section. + ELFIO::section *symtab_; + // Map from symbol name to symbol index in the symbol table. + const absl::flat_hash_map<std::string, ELFIO::Elf_Word> &symbol_indices_; +}; + +} // namespace mpact::sim::util::assembler + +#endif // MPACT_SIM_UTIL_ASM_RESOLVER_H_
diff --git a/mpact/sim/util/asm/simple_assembler.cc b/mpact/sim/util/asm/simple_assembler.cc index 1622c79..d0b4d54 100644 --- a/mpact/sim/util/asm/simple_assembler.cc +++ b/mpact/sim/util/asm/simple_assembler.cc
@@ -17,15 +17,14 @@ #include <cstddef> #include <cstdint> #include <cstring> +#include <functional> #include <istream> #include <ostream> #include <string> -#include <utility> #include <vector> #include "absl/container/flat_hash_map.h" #include "absl/container/flat_hash_set.h" -#include "absl/functional/any_invocable.h" #include "absl/functional/bind_front.h" #include "absl/log/log.h" #include "absl/status/status.h" @@ -34,11 +33,13 @@ #include "absl/strings/string_view.h" #include "absl/types/span.h" #include "elfio/elf_types.hpp" +#include "elfio/elfio.hpp" // IWYU pragma: keep #include "elfio/elfio_section.hpp" #include "elfio/elfio_segment.hpp" #include "elfio/elfio_strings.hpp" #include "elfio/elfio_symbols.hpp" #include "mpact/sim/util/asm/opcode_assembler_interface.h" +#include "mpact/sim/util/asm/resolver.h" #include "mpact/sim/util/asm/resolver_interface.h" #include "re2/re2.h" @@ -47,69 +48,6 @@ namespace util { namespace assembler { -// A symbol resolver that always returns 0 for any symbol name. This is used -// for the first pass of parsing the assembly code, when we are just creating -// the symbols and computing the sizes of the sections. -class ZeroResolver : public ResolverInterface { - public: - // Constructor takes a callback function that will be called for each symbol - // name encountered so that it can be added to the symbol table. - template <typename T> - ZeroResolver(T add_symbol_fcn) : add_symbol_fcn_(add_symbol_fcn) {} - absl::StatusOr<uint64_t> Resolve(absl::string_view text) override { - // Any symbol name should be added to the symbol table as an undefined - // symbol if it is not already there. When the symbol is defined, the - // symbol table will be updated. In the case of generating an executable - // ELF file, any unresolved symbols will result in an error. When generating - // an object file, any unresolved symbols will remain in the symbol table - // and must be handled by the linker. - add_symbol_fcn_(text); - // Return 0 for any symbol name. - return 0; - } - - private: - absl::AnyInvocable<void(absl::string_view)> add_symbol_fcn_; -}; - -// A symbol resolver that uses the symbol table and the symbol indices to -// resolve symbol names to values. -class SymbolResolver : public ResolverInterface { - public: - SymbolResolver( - int elf_file_class, ELFIO::section *symtab, - const absl::flat_hash_map<std::string, ELFIO::Elf_Word> &symbol_indices) - : elf_file_class_(elf_file_class), - symtab_(symtab), - symbol_indices_(symbol_indices) {} - absl::StatusOr<uint64_t> Resolve(absl::string_view text) override { - auto iter = symbol_indices_.find(text); - if (iter == symbol_indices_.end()) { - return absl::InvalidArgumentError( - absl::StrCat("SymbolResolver: Symbol '", text, "' not found")); - } - auto index = iter->second; - if (elf_file_class_ == ELFCLASS64) { - auto *sym = - reinterpret_cast<const ELFIO::Elf64_Sym *>(symtab_->get_data()); - return sym[index].st_value; - } else if (elf_file_class_ == ELFCLASS32) { - auto *sym = - reinterpret_cast<const ELFIO::Elf32_Sym *>(symtab_->get_data()); - return sym[index].st_value; - } - return absl::InternalError("Unsupported ELF file class"); - } - - private: - // Elf file class. - int elf_file_class_ = 0; - // The symbol table ELF section. - ELFIO::section *symtab_; - // Map from symbol name to symbol index in the symbol table. - const absl::flat_hash_map<std::string, ELFIO::Elf_Word> &symbol_indices_; -}; - // Helper functions for parsing the assembly code. namespace { @@ -354,15 +292,23 @@ } SimpleAssembler::~SimpleAssembler() { - delete symbol_resolver_; delete symbol_accessor_; + symbol_accessor_ = nullptr; delete string_accessor_; + string_accessor_ = nullptr; } -absl::Status SimpleAssembler::Parse(std::istream &is) { +absl::Status SimpleAssembler::Parse(std::istream &is, + ResolverInterface *zero_resolver) { // A trivial symbol resolver that always returns 0. - ZeroResolver zero_resolver( - absl::bind_front(&SimpleAssembler::SimpleAddSymbol, this)); + bool own_zero_resolver = false; + std::function<void()> cleanup = []() {}; + if (zero_resolver == nullptr) { + zero_resolver = new ZeroResolver( + absl::bind_front(&SimpleAssembler::SimpleAddSymbol, this)); + own_zero_resolver = true; + cleanup = [zero_resolver]() { delete zero_resolver; }; + } // First pass of parsing the input stream. This will add symbols to the symbol // table and compute the sizes of all instructions and the sections. The // section_address_map_ will keep track of the current location within each @@ -409,10 +355,10 @@ // Pass the full line into the parse functions, they are responsible // for handling the labels in pass one. if (statement[0] == '.') { - status = ParseAsmDirective(line, address, &zero_resolver, byte_vector, + status = ParseAsmDirective(line, address, zero_resolver, byte_vector, relo_vector); } else { - status = ParseAsmStatement(line, address, &zero_resolver, byte_vector, + status = ParseAsmStatement(line, address, zero_resolver, byte_vector, relo_vector); } if (!status.ok()) return status; @@ -428,15 +374,20 @@ continue; } // Parse failure. + cleanup(); return absl::AbortedError(absl::StrCat("Parse failure: '", line, "'")); } - if (!is.eof()) return absl::InternalError("Input stream entered bad state"); + if (!is.eof()) { + cleanup(); + return absl::InternalError("Input stream entered bad state"); + } // Add undefined symbols to the symbol table. for (auto const &symbol : undefined_symbols_) { auto status = AddSymbol(symbol, 0, 0, STT_NOTYPE, 0, 0, nullptr); if (!status.ok()) { + cleanup(); return absl::InternalError(absl::StrCat( "Failed to add undefined symbol '", symbol, "': ", status.message())); } @@ -446,12 +397,15 @@ if (bss_section_ != nullptr) { bss_section_->set_size(section_address_map_[bss_section_]); } + cleanup(); return absl::OkStatus(); } -absl::Status SimpleAssembler::CreateExecutable(uint64_t base_address, - uint64_t entry_point) { - return CreateExecutable(base_address, absl::StrCat(entry_point)); +absl::Status SimpleAssembler::CreateExecutable( + uint64_t base_address, uint64_t entry_point, + ResolverInterface *symbol_resolver) { + return CreateExecutable(base_address, absl::StrCat(entry_point), + symbol_resolver); } // Helper function to update the symbol table entries for an executable file. @@ -501,8 +455,10 @@ delete[] symbols; } -absl::Status SimpleAssembler::CreateExecutable(uint64_t base_address, - const std::string &entry_point) { +absl::Status SimpleAssembler::CreateExecutable( + uint64_t base_address, const std::string &entry_point, + ResolverInterface *symbol_resolver) { + LOG(INFO) << "CreateExecutable"; if (!undefined_symbols_.empty()) { std::string message; absl::StrAppend( @@ -513,6 +469,7 @@ } return absl::InvalidArgumentError(message); } + LOG(INFO) << "set type to ET_EXEC"; writer_.set_type(ET_EXEC); // Section sizes are now known. So let's compute the layout and update all // the symbol values/addresses before the next pass. @@ -525,7 +482,10 @@ uint64_t text_segment_start = 0; if (text_section_ != nullptr) { text_segment_start = base_address & ~4095ULL; - ELFIO::segment *text_segment = writer_.segments.add(); + text_segment = writer_.segments.add(); + if (text_segment == nullptr) { + return absl::InternalError("Failed to create elf segment for text"); + } text_segment->set_type(PT_LOAD); text_segment->set_virtual_address(text_segment_start); text_segment->set_physical_address(text_segment_start); @@ -542,6 +502,9 @@ ~4095ULL; ELFIO::segment *data_segment = writer_.segments.add(); + if (data_segment == nullptr) { + return absl::InternalError("Failed to create elf segment for data"); + } data_segment->set_type(PT_LOAD); data_segment->set_virtual_address(data_segment_start); data_segment->set_physical_address(data_segment_start); @@ -575,12 +538,21 @@ section_address_map_[data_section_] = data_segment_start; section_address_map_[bss_section_] = bss_segment_start; + std::function<void()> cleanup = []() {}; + if (symbol_resolver == nullptr) { + symbol_resolver = + new SymbolResolver(elf_file_class_, symtab_, symbol_indices_); + cleanup = [symbol_resolver]() { delete symbol_resolver; }; + } // Pass in the relocation vector to the second pass of parsing, but ignore // the values, since we are creating an executable file, and all the symbols // are resolved. std::vector<RelocationInfo> relo_vector; - auto status = ParsePassTwo(relo_vector); - if (!status.ok()) return status; + auto status = ParsePassTwo(relo_vector, symbol_resolver); + if (!status.ok()) { + cleanup(); + return status; + } // Add sections to the segments. First segment gets the text section. The // second segment gets the data and bss sections. @@ -595,12 +567,16 @@ bss_section_->get_addr_align()); } - auto res = SimpleTextToInt<uint64_t>(entry_point, symbol_resolver_); - if (!res.ok()) return res.status(); + auto res = SimpleTextToInt<uint64_t>(entry_point, symbol_resolver); + if (!res.ok()) { + cleanup(); + return res.status(); + } uint64_t entry_point_value = res.value(); symbol_accessor_->arrange_local_symbols(); writer_.set_entry(entry_point_value); + cleanup(); return absl::OkStatus(); } @@ -648,7 +624,8 @@ symtab_->set_info(last_local + 1); } -absl::Status SimpleAssembler::CreateRelocatable() { +absl::Status SimpleAssembler::CreateRelocatable( + ResolverInterface *symbol_resolver) { writer_.set_type(ET_REL); // Reset the section address map to zero since we are creating a relocatable // file. @@ -679,10 +656,19 @@ UpdateSymtabHeaderInfo<ELFIO::Elf32_Sym>(); } + std::function<void()> cleanup = []() {}; + if (symbol_resolver == nullptr) { + symbol_resolver = + new SymbolResolver(elf_file_class_, symtab_, symbol_indices_); + cleanup = [symbol_resolver]() { delete symbol_resolver; }; + } // Parse the source again, collect relocations. std::vector<RelocationInfo> relo_vector; - auto status = ParsePassTwo(relo_vector); - if (!status.ok()) return status; + auto status = ParsePassTwo(relo_vector, symbol_resolver); + if (!status.ok()) { + cleanup(); + return status; + } // Handle relocations if there are any. if (!relo_vector.empty()) { @@ -694,10 +680,12 @@ } for (auto const &[section_index, relo_vec] : relo_map) { if (section_index == 0) { + cleanup(); return absl::InternalError( "Relocation entry with section index 0 not supported"); } if (!section_index_map_.contains(section_index)) { + cleanup(); return absl::InternalError( absl::StrCat("Section index not found: ", section_index)); } @@ -722,22 +710,23 @@ status = AddRelocationEntries<ELFIO::Elf32_Rela>( relo_vec, symbol_indices_, rela_section); } else { + cleanup(); return absl::InternalError( absl::StrCat("Unsupported ELF file class: ", elf_file_class_)); } - if (!status.ok()) return status; + if (!status.ok()) { + cleanup(); + return status; + } } } + cleanup(); return absl::OkStatus(); } absl::Status SimpleAssembler::ParsePassTwo( - std::vector<RelocationInfo> &relo_vector) { - // For the second pass, we need a symbol resolver that uses the symbol - // table and the symbol indices. - symbol_resolver_ = - new SymbolResolver(elf_file_class_, symtab_, symbol_indices_); - + std::vector<RelocationInfo> &relo_vector, + ResolverInterface *symbol_resolver) { // Now fill in the sections. Parse each of the lines saved in the first // pass. for (auto const &line : lines_) { @@ -747,10 +736,10 @@ auto relo_size = relo_vector.size(); auto address = section_address_map_[section]; if (line[0] == '.') { - auto status = ParseAsmDirective(line, address, symbol_resolver_, + auto status = ParseAsmDirective(line, address, symbol_resolver, byte_vector, relo_vector); } else { - auto status = ParseAsmStatement(line, address, symbol_resolver_, + auto status = ParseAsmStatement(line, address, symbol_resolver, byte_vector, relo_vector); } if (!status.ok()) return status; @@ -763,6 +752,9 @@ if (byte_vector.empty()) continue; // Add data to the section, but first make sure it's not bss. if (section != bss_section_) { + if (section == nullptr) { + return absl::InternalError("Data is added to a null section"); + } section->append_data(reinterpret_cast<const char *>(byte_vector.data()), byte_vector.size()); } @@ -861,7 +853,7 @@ auto values = res.value(); size = values.size() * sizeof(int64_t); ConvertToBytes<int64_t>(values, byte_values); - } else if (match == "section") { + } else if (match == "sect") { // .section <name>,<type> // TODO(torerik): Implement. return absl::UnimplementedError("Section directive not implemented"); @@ -958,12 +950,12 @@ std::vector<uint8_t> &byte_values, std::vector<RelocationInfo> &relocations) { // Call the target specific assembler to encode the statement. - auto status = opcode_assembler_if_->Encode( + auto result = opcode_assembler_if_->Encode( address, line, absl::bind_front(&SimpleAssembler::AddSymbolToCurrentSection, this), resolver, byte_values, relocations); - if (!status.ok()) return status; - section_address_map_[current_section_] += byte_values.size(); + if (!result.ok()) return result.status(); + section_address_map_[current_section_] += result.value(); return absl::OkStatus(); }
diff --git a/mpact/sim/util/asm/simple_assembler.h b/mpact/sim/util/asm/simple_assembler.h index e711fc7..982b5bf 100644 --- a/mpact/sim/util/asm/simple_assembler.h +++ b/mpact/sim/util/asm/simple_assembler.h
@@ -74,7 +74,8 @@ virtual ~SimpleAssembler(); // Parse the input stream as assembly. - absl::Status Parse(std::istream &is); + absl::Status Parse(std::istream &is, + ResolverInterface *zero_resolver = nullptr); // Add the symbol to the symbol table for the current section. See ELFIO // documentation for details of the meaning of the parameters. absl::Status AddSymbolToCurrentSection(const std::string &name, @@ -90,15 +91,26 @@ // The text segment will be laid out starting at base address, followed by // the data segment. absl::Status CreateExecutable(uint64_t base_address, - const std::string &entry_point); - absl::Status CreateExecutable(uint64_t base_address, uint64_t entry_point); + const std::string &entry_point, + ResolverInterface *symbol_resolver = nullptr); + absl::Status CreateExecutable(uint64_t base_address, uint64_t entry_point, + ResolverInterface *symbol_resolver = nullptr); // Create a relocatable ELF file. - absl::Status CreateRelocatable(); + absl::Status CreateRelocatable(ResolverInterface *symbol_resolver = nullptr); // Write the ELF file to the given output stream. absl::Status Write(std::ostream &os); // Access the ELF writer. ELFIO::elfio &writer() { return writer_; } + // Add a symbol reference to the symbol table if it is not already defined. + void SimpleAddSymbol(absl::string_view name); + + // Getters. + absl::flat_hash_map<std::string, ELFIO::Elf_Word> &symbol_indices() { + return symbol_indices_; + } + ELFIO::section *symtab() { return symtab_; } + private: // Helper function to update the symbol table entries. template <typename SymbolType> @@ -110,7 +122,8 @@ template <typename SymbolType> void UpdateSymtabHeaderInfo(); // Perform second pass of parsing. - absl::Status ParsePassTwo(std::vector<RelocationInfo> &relo_vector); + absl::Status ParsePassTwo(std::vector<RelocationInfo> &relo_vector, + ResolverInterface *symbol_resolver); // Parse and process an assembly directive. absl::Status ParseAsmDirective(absl::string_view line, uint64_t address, ResolverInterface *resolver, @@ -125,8 +138,6 @@ absl::Status AddSymbol(const std::string &name, ELFIO::Elf64_Addr value, ELFIO::Elf_Xword size, uint8_t type, uint8_t binding, uint8_t other, ELFIO::section *section); - // Add a symbol reference to the symbol table if it is not already defined. - void SimpleAddSymbol(absl::string_view name); // Append the data to the current section. absl::Status AppendData(const char *data, size_t size); @@ -157,9 +168,6 @@ // Map that tracks the current address of each section. absl::flat_hash_map<ELFIO::section *, uint64_t> section_address_map_; - // Current symbol resolver (looks up symbols in the symbol table and returns - // their values). - ResolverInterface *symbol_resolver_ = nullptr; std::vector<std::string> lines_; // Section pointers. ELFIO::section *text_section_ = nullptr;
diff --git a/mpact/sim/util/asm/test/BUILD b/mpact/sim/util/asm/test/BUILD index 3c5e77a..692c7fe 100644 --- a/mpact/sim/util/asm/test/BUILD +++ b/mpact/sim/util/asm/test/BUILD
@@ -74,9 +74,6 @@ "//mpact/sim/util/asm", "@com_google_absl//absl/base:no_destructor", "@com_google_absl//absl/container:flat_hash_map", - "@com_google_absl//absl/functional:any_invocable", - "@com_google_absl//absl/log", - "@com_google_absl//absl/log:check", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", @@ -89,7 +86,6 @@ size = "small", srcs = ["riscv64x_asm_test.cc"], deps = [ - ":riscv64x_bin_fmt", ":riscv64x_encoder", ":riscv64x_isa", "//mpact/sim/util/asm", @@ -97,10 +93,8 @@ "@com_github_serge1_elfio//:elfio", "@com_google_absl//absl/base:no_destructor", "@com_google_absl//absl/container:flat_hash_map", - "@com_google_absl//absl/log", "@com_google_absl//absl/log:check", "@com_google_absl//absl/status", - "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:span", "@com_google_googletest//:gtest_main",
diff --git a/mpact/sim/util/asm/test/riscv64x_asm_test.cc b/mpact/sim/util/asm/test/riscv64x_asm_test.cc index 91d9d93..3091c09 100644 --- a/mpact/sim/util/asm/test/riscv64x_asm_test.cc +++ b/mpact/sim/util/asm/test/riscv64x_asm_test.cc
@@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include <cstddef> #include <cstdint> #include <sstream> #include <string> @@ -54,10 +55,11 @@ RiscV64XAssembler(Riscv64xSlotMatcher* matcher) : label_re_("^(\\S+)\\s*:"), matcher_(matcher) {}; ~RiscV64XAssembler() override = default; - absl::Status Encode(uint64_t address, absl::string_view text, - AddSymbolCallback add_symbol_callback, - ResolverInterface* resolver, std::vector<uint8_t>& bytes, - std::vector<RelocationInfo>& relocations) override { + absl::StatusOr<size_t> Encode( + uint64_t address, absl::string_view text, + AddSymbolCallback add_symbol_callback, ResolverInterface* resolver, + std::vector<uint8_t>& bytes, + std::vector<RelocationInfo>& relocations) override { // First check to see if there is a label, if so, add it to the symbol table // with the current address. std::string label; @@ -78,7 +80,7 @@ for (int i = 0; i < size / 8; ++i) { bytes.push_back(u.b[i]); } - return absl::OkStatus(); + return bytes.size(); } private:
diff --git a/mpact/sim/util/program_loader/elf_program_loader.cc b/mpact/sim/util/program_loader/elf_program_loader.cc index 2ee8544..8371a9c 100644 --- a/mpact/sim/util/program_loader/elf_program_loader.cc +++ b/mpact/sim/util/program_loader/elf_program_loader.cc
@@ -18,8 +18,6 @@ #include <cstdint> #include <cstring> -#include <fstream> -#include <ios> #include <string> #include <utility>