Merge pull request #8 from akihikodaki:main PiperOrigin-RevId: 718094652 Change-Id: I8b6a719a9620893f82534c861a9784520f5ce53e
diff --git a/mpact/sim/decoder/BUILD b/mpact/sim/decoder/BUILD index d84909f..75511a5 100644 --- a/mpact/sim/decoder/BUILD +++ b/mpact/sim/decoder/BUILD
@@ -120,6 +120,7 @@ "@com_google_absl//absl/container:btree", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/log", "@com_google_absl//absl/memory", "@com_google_absl//absl/numeric:bits", "@com_google_absl//absl/status",
diff --git a/mpact/sim/decoder/InstructionSet.g4 b/mpact/sim/decoder/InstructionSet.g4 index f26e41e..b8e71e5 100644 --- a/mpact/sim/decoder/InstructionSet.g4 +++ b/mpact/sim/decoder/InstructionSet.g4
@@ -320,10 +320,15 @@ ; source_operand - : source=IDENT + : operand | '[' array_source=IDENT ']' ; +operand + : op_attribute=OP_ATTRIBUTE '(' op_name=IDENT ')' + | op_name=IDENT + ; + // Destination operands may include a latency. dest_list @@ -331,7 +336,7 @@ ; dest_operand - : (dest=IDENT | '[' array_dest=IDENT ']') + : (operand | '[' array_dest=IDENT ']') ( '(' (expression | wildcard='*' ) ')' )? ; @@ -515,6 +520,8 @@ SLOTS : 'slots'; TEMPLATE : 'template'; +OP_ATTRIBUTE : '%reloc'; + // Other tokens. STRING_LITERAL : UNTERMINATED_STRING_LITERAL '"'; fragment UNTERMINATED_STRING_LITERAL : '"' (~["\\\r\n] | '\\' (. | EOF))*;
diff --git a/mpact/sim/decoder/bin_encoding_info.h b/mpact/sim/decoder/bin_encoding_info.h index 0709a89..918198c 100644 --- a/mpact/sim/decoder/bin_encoding_info.h +++ b/mpact/sim/decoder/bin_encoding_info.h
@@ -75,6 +75,7 @@ return include_files_; } BinDecoder *decoder() const { return decoder_; } + std::string opcode_enum() const { return opcode_enum_; } private: std::string opcode_enum_;
diff --git a/mpact/sim/decoder/bin_format_visitor.cc b/mpact/sim/decoder/bin_format_visitor.cc index ccd01d6..39ba2dc 100644 --- a/mpact/sim/decoder/bin_format_visitor.cc +++ b/mpact/sim/decoder/bin_format_visitor.cc
@@ -22,6 +22,7 @@ #include <list> #include <memory> #include <string> +#include <tuple> #include <utility> #include <vector> @@ -175,24 +176,44 @@ ProcessSpecializations(encoding_info.get()); // Create output streams for .h and .cc files. - std::string dot_h_name = absl::StrCat(prefix, "_bin_decoder.h"); - std::string dot_cc_name = absl::StrCat(prefix, "_bin_decoder.cc"); - std::ofstream dot_h_file(absl::StrCat(directory, "/", dot_h_name)); - std::ofstream dot_cc_file(absl::StrCat(directory, "/", dot_cc_name)); + std::string dec_dot_h_name = absl::StrCat(prefix, "_bin_decoder.h"); + std::string dec_dot_cc_name = absl::StrCat(prefix, "_bin_decoder.cc"); + std::string enc_dot_h_name = absl::StrCat(prefix, "_bin_encoder.h"); + std::string enc_dot_cc_name = absl::StrCat(prefix, "_bin_encoder.cc"); + std::string enum_dot_h_name = absl::StrCat(prefix, "_enums.h"); + std::ofstream dec_dot_h_file(absl::StrCat(directory, "/", dec_dot_h_name)); + std::ofstream dec_dot_cc_file(absl::StrCat(directory, "/", dec_dot_cc_name)); + std::ofstream enc_dot_h_file(absl::StrCat(directory, "/", enc_dot_h_name)); + std::ofstream enc_dot_cc_file(absl::StrCat(directory, "/", enc_dot_cc_name)); - auto [h_output, cc_output] = EmitFilePrefix(dot_h_name, encoding_info.get()); - dot_h_file << h_output; - dot_cc_file << cc_output; + auto [dec_h_output, dec_cc_output] = + EmitDecoderFilePrefix(dec_dot_h_name, encoding_info.get()); + dec_dot_h_file << dec_h_output; + dec_dot_cc_file << dec_cc_output; + auto [enc_h_output, enc_cc_output] = EmitEncoderFilePrefix( + enc_dot_h_name, enum_dot_h_name, encoding_info.get()); + enc_dot_h_file << enc_h_output; + enc_dot_cc_file << enc_cc_output; // Output file prefix is the input file name. - auto [h_output2, cc_output2] = EmitCode(encoding_info.get()); - dot_h_file << h_output2; - dot_cc_file << cc_output2; - auto [h_output3, cc_output3] = - EmitFileSuffix(dot_h_name, encoding_info.get()); - dot_h_file << h_output3; - dot_cc_file << cc_output3; - dot_h_file.close(); - dot_cc_file.close(); + auto [dec_h_output2, dec_cc_output2] = EmitDecoderCode(encoding_info.get()); + dec_dot_h_file << dec_h_output2; + dec_dot_cc_file << dec_cc_output2; + auto [dec_h_output3, dec_cc_output3] = + EmitFileSuffix(dec_dot_h_name, encoding_info.get()); + dec_dot_h_file << dec_h_output3; + dec_dot_cc_file << dec_cc_output3; + auto [enc_h_output2, enc_cc_output2] = EmitEncoderCode(encoding_info.get()); + enc_dot_h_file << enc_h_output2; + enc_dot_cc_file << enc_cc_output2; + auto [enc_h_output3, enc_cc_output3] = + EmitFileSuffix(enc_dot_h_name, encoding_info.get()); + enc_dot_h_file << enc_h_output3; + enc_dot_cc_file << enc_cc_output3; + + dec_dot_h_file.close(); + dec_dot_cc_file.close(); + enc_dot_h_file.close(); + enc_dot_cc_file.close(); return absl::OkStatus(); } @@ -200,8 +221,8 @@ encoding->decoder()->CheckEncodings(); } -BinFormatVisitor::StringPair BinFormatVisitor::EmitFilePrefix( - const std::string &dot_h_name, BinEncodingInfo *encoding_info) { +BinFormatVisitor::StringPair BinFormatVisitor::EmitDecoderFilePrefix( + const std::string &dot_h_name, BinEncodingInfo *encoding_info) const { std::string h_string; std::string cc_string; @@ -262,7 +283,7 @@ return {h_string, cc_string}; } -BinFormatVisitor::StringPair BinFormatVisitor::EmitCode( +BinFormatVisitor::StringPair BinFormatVisitor::EmitDecoderCode( BinEncodingInfo *encoding) { std::string h_string; std::string cc_string; @@ -276,11 +297,10 @@ absl::StrAppend(&extractor_class, classes); } absl::StrAppend(&h_string, extractor_class, "};\n\n"); - absl::flat_hash_set<std::string> groups; auto *decoder = encoding->decoder(); // Generate the code for decoders. for (auto *group : decoder->instruction_group_vec()) { - auto [h_decoder, cc_decoder] = group->EmitCode(); + auto [h_decoder, cc_decoder] = group->EmitDecoderCode(); absl::StrAppend(&h_string, h_decoder); absl::StrAppend(&cc_string, cc_decoder); // Write out some summary information about the instruction encodings. @@ -291,6 +311,79 @@ return {h_string, cc_string}; } +std::tuple<std::string, std::string> BinFormatVisitor::EmitEncoderFilePrefix( + const std::string &dot_h_name, const std::string &enum_h_name, + BinEncodingInfo *encoding_info) const { + std::string h_string; + std::string cc_string; + + std::string guard_name = ToHeaderGuard(dot_h_name); + absl::StrAppend(&h_string, "#ifndef ", guard_name, + "\n" + "#define ", + guard_name, + "\n" + "\n" + "#include <iostream>\n" + "#include <cstdint>\n\n" + "#include \"absl/base/no_destructor.h\"\n" + "#include \"absl/container/flat_hash_map.h\"\n\n" + "#include \"", + enum_h_name, "\"\n"); + absl::StrAppend(&cc_string, "#include \"", dot_h_name, + "\"\n\n" + "#include <cstdint>\n\n" + "#include \"absl/base/no_destructor.h\"\n" + "#include \"absl/container/flat_hash_map.h\"\n\n" + "#include \"", + enum_h_name, "\"\n"); + for (auto &name_space : encoding_info->decoder()->namespaces()) { + auto name_space_str = absl::StrCat("namespace ", name_space, " {\n"); + absl::StrAppend(&cc_string, name_space_str); + absl::StrAppend(&h_string, name_space_str); + } + absl::StrAppend(&h_string, "\n"); + absl::StrAppend(&cc_string, "\n"); + return std::tie(h_string, cc_string); +} + +std::tuple<std::string, std::string> BinFormatVisitor::EmitEncoderCode( + BinEncodingInfo *encoding) { + std::string h_string; + std::string cc_string; + // Write out the inline functions for bitfield and overlay encoding. + absl::StrAppend(&h_string, "struct Encoder {\n\n"); + for (auto &[unused, format_ptr] : encoding->format_map()) { + auto functions = format_ptr->GenerateInserters(); + absl::StrAppend(&h_string, functions); + } + absl::StrAppend(&h_string, "}; // struct Encoder\n\n"); + absl::flat_hash_set<std::string> groups; + auto *decoder = encoding->decoder(); + // Generate the code for decoders. + absl::btree_map<std::string, std::tuple<uint64_t, int>> encodings; + for (auto *group : decoder->instruction_group_vec()) { + group->GetInstructionEncodings(encodings); + } + std::string opcode_enum = encoding->opcode_enum(); + absl::StrAppend(&h_string, "extern absl::NoDestructor<absl::flat_hash_map<", + opcode_enum, + ", std::tuple<uint64_t, int>>> kOpcodeEncodings;\n"); + absl::StrAppend(&cc_string, "absl::NoDestructor<absl::flat_hash_map<", + opcode_enum, + ", std::tuple<uint64_t, int>>> kOpcodeEncodings({\n"); + absl::StrAppend(&cc_string, " {", opcode_enum, "::kNone, {0x0ULL, 0}},\n"); + for (auto &[name, pair] : encodings) { + auto [value, width] = pair; + std::string enum_name = + absl::StrCat(opcode_enum, "::k", ToPascalCase(name)); + absl::StrAppend(&cc_string, " {", enum_name, ", {0x", absl::Hex(value), + "ULL, ", width, "}},\n"); + } + absl::StrAppend(&cc_string, "});\n"); + return std::tie(h_string, cc_string); +} + // Parse the range and convert to a BitRange. BitRange BinFormatVisitor::GetBitIndexRange(BitIndexRangeCtx *ctx) { int start = ConvertToInt(ctx->number(0));
diff --git a/mpact/sim/decoder/bin_format_visitor.h b/mpact/sim/decoder/bin_format_visitor.h index f10455a..a5f15fc 100644 --- a/mpact/sim/decoder/bin_format_visitor.h +++ b/mpact/sim/decoder/bin_format_visitor.h
@@ -20,6 +20,7 @@ #include <list> #include <memory> #include <string> +#include <tuple> #include <utility> #include <vector> @@ -93,9 +94,17 @@ void PerformEncodingChecks(BinEncodingInfo *encoding); // Called to generate and emit code for the decoder according to the parsed // input file. - StringPair EmitCode(BinEncodingInfo *encoding); - StringPair EmitFilePrefix(const std::string &dot_h_name, - BinEncodingInfo *encoding_info); + StringPair EmitDecoderCode(BinEncodingInfo *encoding); + StringPair EmitDecoderFilePrefix(const std::string &dot_h_name, + BinEncodingInfo *encoding_info) const; + // Called to generate and emit code for the decoder according to the parsed + // input file. + std::tuple<std::string, std::string> EmitEncoderCode( + BinEncodingInfo *encoding); + std::tuple<std::string, std::string> EmitEncoderFilePrefix( + const std::string &dot_h_name, const std::string &enum_h_name, + BinEncodingInfo *encoding_info) const; + // Generate the file suffixes (namespace closing etc.) StringPair EmitFileSuffix(const std::string &dot_h_name, BinEncodingInfo *encoding_info); // Utility methods to parse certain nodes.
diff --git a/mpact/sim/decoder/bundle.h b/mpact/sim/decoder/bundle.h index a149ebc..ae58095 100644 --- a/mpact/sim/decoder/bundle.h +++ b/mpact/sim/decoder/bundle.h
@@ -15,15 +15,12 @@ #ifndef LMPACT_SIM_DECODER_BUNDLE_H_ #define LMPACT_SIM_DECODER_BUNDLE_H_ -#include <iostream> #include <string> #include <utility> #include <vector> -#include "absl/container/flat_hash_map.h" #include "absl/strings/string_view.h" #include "mpact/sim/decoder/instruction_set_contexts.h" -#include "mpact/sim/decoder/opcode.h" #include "mpact/sim/decoder/slot.h" namespace mpact {
diff --git a/mpact/sim/decoder/format.cc b/mpact/sim/decoder/format.cc index 3997dae..c602995 100644 --- a/mpact/sim/decoder/format.cc +++ b/mpact/sim/decoder/format.cc
@@ -282,7 +282,7 @@ // e.g., two fields were named the same in different formats but referred to // different bits. void Format::PropagateExtractorsDown() { - // Remove the extractor entries with nullptrs and any extractors that + // Remove the extractor entries with null ptrs and any extractors that // have been promoted. auto e_iter = extractors_.begin(); while (e_iter != extractors_.end()) { @@ -299,7 +299,7 @@ continue; } } - // Remove the overlay extractor entries with nullptrs. + // Remove the overlay extractor entries with null ptrs. auto o_iter = overlay_extractors_.begin(); while (o_iter != overlay_extractors_.end()) { auto cur = o_iter++; @@ -388,6 +388,114 @@ return h_output; } +// This method generates the C++ code for field inserters for the current +// format. That is, the generated code will take the value of a field and insert +// it into the right place in the instruction word. +std::string Format::GenerateFieldInserter(const Field *field) const { + std::string h_output; + absl::StrAppend(&h_output, "static inline uint64_t Insert", + ToPascalCase(field->name), + "(uint64_t value, uint64_t inst_word) {\n"); + if (declared_width_ <= 64) { + uint64_t mask = ((1ULL << field->width) - 1) << field->low; + std::string shift; + if (field->low != 0) { + shift = absl::StrCat(" << ", field->low); + } + absl::StrAppend(&h_output, " inst_word = (inst_word & ~0x", + absl::Hex(mask), "ULL)", " | ((value", shift, ") & 0x", + absl::Hex(mask), "ULL);\n"); + } else { + absl::StrAppend( + &h_output, + " #error Support for formats > 64 bits not implemented - yet."); + } + absl::StrAppend(&h_output, + " return inst_word;\n" + "}\n"); + return h_output; +} + +// This method generates the C++ code for overlay inserters for the current +// format. That is, the generated code will take the value of an overlay and +// insert its components into the right places in the instruction word. +std::string Format::GenerateOverlayInserter(Overlay *overlay) const { + std::string h_output; + absl::StrAppend(&h_output, "static inline uint64_t Insert", + ToPascalCase(overlay->name()), + "(uint64_t value, uint64_t inst_word) {\n"); + // Mark error if either the overlay or the format is > 64 bits. + if (overlay->declared_width() > 64) { + absl::StrAppend(&h_output, + " #error Support for overlays > 64 bits not implemented - " + "yet.\n}\n"); + return h_output; + } + if (computed_width_ > 64) { + absl::StrAppend(&h_output, + " #error Support for formats > 64 bits not implemented - " + "yet.\n}\n"); + return h_output; + } + absl::StrAppend(&h_output, " uint64_t tmp;\n"); + // Track the leftmost bit in the overlay. + int left = overlay->declared_width(); + for (auto &bits_or_field : overlay->component_vec()) { + int width = bits_or_field->width(); + // Ignore the bit fields in the overlay. + if (bits_or_field->high() < 0) { + left -= width; + continue; + } + uint64_t mask = ((1ULL << width) - 1); + std::string shift; + if (left - width > 0) { + shift = absl::StrCat(" >> ", left - width); + } + // Extract the bits from the overlay value for the current component. + absl::StrAppend(&h_output, " tmp = (value ", shift, ") & 0x", + absl::Hex(mask), "ULL;\n"); + shift.clear(); + if (bits_or_field->low() != 0) { + shift = absl::StrCat(" << ", bits_or_field->low()); + } + absl::StrAppend(&h_output, " inst_word |= (tmp ", shift, ");\n"); + left -= width; + } + absl::StrAppend(&h_output, " return inst_word;\n}\n"); + return h_output; +} + +// This method generates the C++ code for format inserters for the current +// format. That is, the generated code will take the value of a format and +// insert it into the right place in the instruction word. +std::string Format::GenerateFormatInserter(std::string_view format_alias, + const Format *format, int high, + int size) const { + std::string h_output; + std::string target_type_name = absl::StrCat("u", GetIntType(computed_width_)); + absl::StrAppend(&h_output, "static inline uint64_tInsert", + ToPascalCase(format_alias), + "(uint64_t value, uint64_t inst_word) {\n"); + if (declared_width_ > 64) { + absl::StrAppend(&h_output, + " #error Support for formats > 64 bits not implemented - " + "yet.\n}\n"); + return h_output; + } + int width = format->declared_width(); + int low = high - width + 1; + uint64_t mask = (1ULL << width) << low; + std::string shift; + if (low != 0) { + shift = absl::StrCat(" << ", low); + } + absl::StrAppend(&h_output, " return (inst_word & (~0x", absl::Hex(mask), + "ULL))", " | ((value ", shift, ") & 0x", absl::Hex(mask), + "ULL);\n}\n"); + return h_output; +} + // This method generates the format extractors for the current format (for when // a format contains other formats). std::string Format::GenerateFormatExtractor(absl::string_view format_alias, @@ -485,8 +593,37 @@ return h_output; } +// Top level function called to generate all the inserters for this format. +std::string Format::GenerateInserters() const { + std::string class_output; + std::string h_output; + if (extractors_.empty() && overlay_extractors_.empty()) { + return h_output; + } + absl::StrAppend(&h_output, "struct ", ToPascalCase(name()), " {\n\n"); + // First fields and formats. + for (auto &[unused, field_or_format_ptr] : extractors_) { + if (field_or_format_ptr->is_field()) { + auto inserter = GenerateFieldInserter(field_or_format_ptr->field()); + absl::StrAppend(&h_output, inserter); + } else { + auto inserter = GenerateFormatInserter( + field_or_format_ptr->format_alias(), field_or_format_ptr->format(), + field_or_format_ptr->high(), field_or_format_ptr->size()); + absl::StrAppend(&h_output, inserter); + } + } + // Next the overlays. + for (auto &[unused, overlay_ptr] : overlay_extractors_) { + auto inserter = GenerateOverlayInserter(overlay_ptr); + absl::StrAppend(&h_output, inserter); + } + absl::StrAppend(&h_output, "}; // struct ", ToPascalCase(name()), "\n\n"); + return h_output; +} + // Top level function called to generate all the extractors for this format. -std::tuple<std::string, std::string> Format::GenerateExtractors() { +std::tuple<std::string, std::string> Format::GenerateExtractors() const { std::string class_output; std::string h_output; if (extractors_.empty() && overlay_extractors_.empty()) {
diff --git a/mpact/sim/decoder/format.h b/mpact/sim/decoder/format.h index 76cc38f..1ebfbbe 100644 --- a/mpact/sim/decoder/format.h +++ b/mpact/sim/decoder/format.h
@@ -15,7 +15,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include <map> #include <string> #include <tuple> #include <vector> @@ -140,7 +139,9 @@ void PropagateExtractorsUp(); void PropagateExtractorsDown(); // Generates definitions of the field and overlay extractors in the format. - std::tuple<std::string, std::string> GenerateExtractors(); + std::tuple<std::string, std::string> GenerateExtractors() const; + // Generates definitions of the field and overlay inserters in the format. + std::string GenerateInserters() const; // True if the current format is a descendent of format. bool IsDerivedFrom(const Format *format); @@ -165,6 +166,12 @@ const Format *format, int high, int size) const; std::string GenerateOverlayExtractor(Overlay *overlay) const; + // Inserters. + std::string GenerateFieldInserter(const Field *field) const; + std::string GenerateFormatInserter(std::string_view format_alias, + const Format *format, int high, + int size) const; + std::string GenerateOverlayInserter(Overlay *overlay) const; // Return string representation of the int type that contains bitwidth bits. std::string GetIntType(int bitwidth) const; int GetIntTypeBitWidth(int bitwidth) const;
diff --git a/mpact/sim/decoder/instruction_group.cc b/mpact/sim/decoder/instruction_group.cc index 4b4e394..527c0f4 100644 --- a/mpact/sim/decoder/instruction_group.cc +++ b/mpact/sim/decoder/instruction_group.cc
@@ -21,6 +21,7 @@ #include <tuple> #include <utility> +#include "absl/container/btree_map.h" #include "absl/status/status.h" #include "absl/strings/str_cat.h" #include "antlr4-runtime/Token.h" @@ -28,6 +29,7 @@ #include "mpact/sim/decoder/decoder_error_listener.h" #include "mpact/sim/decoder/encoding_group.h" #include "mpact/sim/decoder/extract.h" +#include "mpact/sim/decoder/format_name.h" #include "mpact/sim/decoder/instruction_encoding.h" namespace mpact { @@ -35,6 +37,8 @@ namespace decoder { namespace bin_format { +using ::mpact::sim::machine_description::instruction_set::ToPascalCase; + InstructionGroup::InstructionGroup(std::string name, int width, std::string format_name, std::string opcode_enum, @@ -174,68 +178,77 @@ } // Emit the code in the form of two strings that are returned in a tuple. -std::tuple<std::string, std::string> InstructionGroup::EmitCode() { +std::tuple<std::string, std::string> InstructionGroup::EmitDecoderCode() { std::string h_string; std::string cc_string; + if (encoding_group_vec_.empty()) return std::make_tuple(h_string, cc_string); + // First sort the encoding group vector according to the value of the // discriminator bits. std::sort(encoding_group_vec_.begin(), encoding_group_vec_.end(), &InstructionGroupLess); - if (!encoding_group_vec_.empty()) { - std::string initializers; - // The signature for the top level decode function for this instruction - // group. - std::string signature = - absl::StrCat(opcode_enum_, " Decode", this->name(), "(", - format_->uint_type_name(), " inst_word)"); - std::string w_format_signature = absl::StrCat( - "std::pair<", opcode_enum_, ", FormatEnum> Decode", this->name(), - "WithFormat(", format_->uint_type_name(), " inst_word)"); - // First part of the definition of the top level decoder function. - std::string top_level_decoder = absl::StrCat(signature, " {\n"); - std::string w_format_top_level_decoder = - absl::StrCat(w_format_signature, " {\n"); - std::string declarations = - absl::StrCat("std::pair<", opcode_enum_, ", FormatEnum> Decode", - this->name(), "None(", format_->uint_type_name(), ");\n"); - std::string definitions = absl::StrCat( - "std::pair<", opcode_enum_, ", FormatEnum> Decode", this->name(), - "None(", format_->uint_type_name(), ") {\n return std::make_pair(", - opcode_enum_, "::kNone, FormatEnum::kNone);\n}\n\n"); - for (size_t i = 0; i < encoding_group_vec_.size(); i++) { - auto *grp = encoding_group_vec_[i]; - std::string name = absl::StrCat(this->name(), "_", absl::Hex(i)); - grp->EmitInitializers(name, &initializers, opcode_enum_); - grp->EmitDecoders(name, &declarations, &definitions, opcode_enum_); - absl::StrAppend(&top_level_decoder, " auto opcode = Decode", name, - "(inst_word).first;\n"); - absl::StrAppend(&w_format_top_level_decoder, - " auto opcode_format = Decode", name, "(inst_word);\n"); - if (encoding_group_vec_.size() > 1) { - absl::StrAppend(&top_level_decoder, " if (opcode != ", opcode_enum_, - "::kNone) return opcode;\n"); - absl::StrAppend(&w_format_top_level_decoder, - " if (opcode_format.first != ", opcode_enum_, - "::kNone) return opcode_format;\n"); - } - } - // Last part of the definition of the top level decoder function. - absl::StrAppend(&top_level_decoder, - " return opcode;\n" - "}\n"); + std::string initializers; + // The signature for the top level decode function for this instruction + // group. + std::string signature = + absl::StrCat(opcode_enum_, " Decode", this->name(), "(", + format_->uint_type_name(), " inst_word)"); + std::string w_format_signature = absl::StrCat( + "std::pair<", opcode_enum_, ", FormatEnum> Decode", this->name(), + "WithFormat(", format_->uint_type_name(), " inst_word)"); + // First part of the definition of the top level decoder function. + std::string top_level_decoder = absl::StrCat(signature, " {\n"); + std::string w_format_top_level_decoder = + absl::StrCat(w_format_signature, " {\n"); + std::string declarations = + absl::StrCat("std::pair<", opcode_enum_, ", FormatEnum> Decode", + this->name(), "None(", format_->uint_type_name(), ");\n"); + std::string definitions = absl::StrCat( + "std::pair<", opcode_enum_, ", FormatEnum> Decode", this->name(), "None(", + format_->uint_type_name(), ") {\n return std::make_pair(", opcode_enum_, + "::kNone, FormatEnum::kNone);\n}\n\n"); + for (size_t i = 0; i < encoding_group_vec_.size(); i++) { + auto *grp = encoding_group_vec_[i]; + std::string name = absl::StrCat(this->name(), "_", absl::Hex(i)); + grp->EmitInitializers(name, &initializers, opcode_enum_); + grp->EmitDecoders(name, &declarations, &definitions, opcode_enum_); + absl::StrAppend(&top_level_decoder, " auto opcode = Decode", name, + "(inst_word).first;\n"); absl::StrAppend(&w_format_top_level_decoder, - " return opcode_format;\n" - "}\n"); - // String the different strings together in order and return. - absl::StrAppend(&cc_string, declarations, initializers, definitions, - top_level_decoder, w_format_top_level_decoder); - absl::StrAppend(&h_string, signature, ";\n", w_format_signature, ";\n"); + " auto opcode_format = Decode", name, "(inst_word);\n"); + if (encoding_group_vec_.size() > 1) { + absl::StrAppend(&top_level_decoder, " if (opcode != ", opcode_enum_, + "::kNone) return opcode;\n"); + absl::StrAppend(&w_format_top_level_decoder, + " if (opcode_format.first != ", opcode_enum_, + "::kNone) return opcode_format;\n"); + } } + // Last part of the definition of the top level decoder function. + absl::StrAppend(&top_level_decoder, + " return opcode;\n" + "}\n"); + absl::StrAppend(&w_format_top_level_decoder, + " return opcode_format;\n" + "}\n"); + // String the different strings together in order and return. + absl::StrAppend(&cc_string, declarations, initializers, definitions, + top_level_decoder, w_format_top_level_decoder); + absl::StrAppend(&h_string, signature, ";\n", w_format_signature, ";\n"); return std::make_tuple(h_string, cc_string); } +// Emit code to encode the instructions in the group. +void InstructionGroup::GetInstructionEncodings( + absl::btree_map<std::string, std::tuple<uint64_t, int>> &encodings) { + for (auto *enc : encoding_vec_) { + encodings.insert(std::make_pair(ToPascalCase(enc->name()), + std::make_tuple(enc->GetValue(), width()))); + } +} + // Write out instruction group information. std::string InstructionGroup::WriteGroup() { std::string output;
diff --git a/mpact/sim/decoder/instruction_group.h b/mpact/sim/decoder/instruction_group.h index 40459e2..f235daa 100644 --- a/mpact/sim/decoder/instruction_group.h +++ b/mpact/sim/decoder/instruction_group.h
@@ -56,7 +56,10 @@ // Check encodings for duplicates etc. void CheckEncodings(); // Generate and emit code for decoding this instruction group. - std::tuple<std::string, std::string> EmitCode(); + std::tuple<std::string, std::string> EmitDecoderCode(); + // Collect the encodings for these instructions. + void GetInstructionEncodings( + absl::btree_map<std::string, std::tuple<uint64_t, int>> &encodings); // Return a string containing information about this instruction group and // how it has been partitioned across encoding groups. std::string WriteGroup();
diff --git a/mpact/sim/decoder/instruction_set.cc b/mpact/sim/decoder/instruction_set.cc index 924a639..93c61d8 100644 --- a/mpact/sim/decoder/instruction_set.cc +++ b/mpact/sim/decoder/instruction_set.cc
@@ -16,6 +16,7 @@ #include <memory> #include <string> +#include <tuple> #include <utility> #include <vector> @@ -456,7 +457,7 @@ } InstructionSet::StringPair InstructionSet::GenerateEnums( - absl::string_view file_name) const { + absl::string_view file_name) { std::string h_output; std::string cc_output; @@ -516,6 +517,7 @@ int pred_count = 0; absl::StrAppend(&h_output, " kNone = ", pred_count++, ",\n"); for (auto const &pred_name : predicate_operands) { + pred_op_map_.insert({pred_name, pred_count}); absl::StrAppend(&h_output, " k", pred_name, " = ", pred_count++, ",\n"); } absl::StrAppend(&h_output, " kPastMaxValue = ", pred_count, @@ -526,6 +528,7 @@ int src_count = 0; absl::StrAppend(&h_output, " kNone = ", src_count++, ",\n"); for (auto const &source_name : source_operands) { + source_op_map_.insert({source_name, src_count}); absl::StrAppend(&h_output, " k", source_name, " = ", src_count++, ",\n"); } absl::StrAppend(&h_output, " kPastMaxValue = ", src_count, @@ -536,6 +539,7 @@ int list_src_count = 0; absl::StrAppend(&h_output, " kNone = ", list_src_count++, ",\n"); for (auto const &source_name : list_source_operands) { + list_source_op_map_.insert({source_name, list_src_count}); absl::StrAppend(&h_output, " k", source_name, " = ", list_src_count++, ",\n"); } @@ -547,6 +551,7 @@ int dst_count = 0; absl::StrAppend(&h_output, " kNone = ", dst_count++, ",\n"); for (auto const &dest_name : dest_operands) { + dest_op_map_.insert({dest_name, dst_count}); absl::StrAppend(&h_output, " k", dest_name, " = ", dst_count++, ",\n"); } absl::StrAppend(&h_output, " kPastMaxValue = ", dst_count, @@ -557,6 +562,7 @@ int list_dst_count = 0; absl::StrAppend(&h_output, " kNone = ", list_dst_count++, ",\n"); for (auto const &dest_name : list_dest_operands) { + list_dest_op_map_.insert({dest_name, list_dst_count}); absl::StrAppend(&h_output, " k", dest_name, " = ", list_dst_count++, ",\n"); } @@ -671,6 +677,236 @@ return {h_output, cc_output}; } +std::string InstructionSet::GenerateOperandEncoder( + int position, absl::string_view op_name, const OperandLocator &locator, + const Opcode *opcode) const { + std::string output; + switch (locator.type) { + case OperandLocator::kPredicate: { + std::string pred_op = + absl::StrCat("PredOpEnum::k", ToPascalCase(op_name)); + absl::StrAppend(&output, " // Predicate operand ", op_name, "\n"); + absl::StrAppend( + &output, " result = encoder->GetPredOpEncoding(address, operands[", + position, + "], slot, " + "entry, opcode, ", + pred_op, ", resolver);\n"); + break; + } + case OperandLocator::kSource: { + std::string source_op = + absl::StrCat("SourceOpEnum::k", ToPascalCase(op_name)); + absl::StrAppend(&output, " // Source operand ", op_name, "\n"); + if (locator.is_reloc) { + absl::StrAppend(&output, + " auto status = encoder->AppendSrcOpRelocation(\n" + " address, operands[", + position, "], slot, entry, opcode, ", source_op, ", ", + locator.instance, + ", resolver, relocations);\n" + " if (!status.ok()) return status;\n"); + } + absl::StrAppend(&output, + " result = encoder->GetSrcOpEncoding(address, operands[", + position, + "], slot, " + "entry, opcode, ", + source_op, ", ", locator.instance, ", resolver);\n"); + break; + } + case OperandLocator::kSourceArray: { + std::string list_source_op = + absl::StrCat("ListSourceOpEnum::k", ToPascalCase(op_name)); + absl::StrAppend(&output, " // Source array operand ", op_name, "\n"); + absl::StrAppend( + &output, + " result = encoder->GetListSrcOpEncoding(address, operands[", + position, + "], slot, " + "entry, opcode, ", + list_source_op, ", ", locator.instance, ", resolver);\n"); + break; + } + case OperandLocator::kDestination: { + std::string dest_op = + absl::StrCat("DestOpEnum::k", ToPascalCase(op_name)); + absl::StrAppend(&output, " // Destination operand ", op_name, "\n"); + if (locator.is_reloc) { + absl::StrAppend(&output, + " auto status = encoder->AppendDestOpRelocation(\n" + " address, operands[", + position, "], slot, entry, opcode, ", dest_op, ", ", + locator.instance, + ", resolver, relocations);\n" + " if (!status.ok()) return status;\n"); + } + absl::StrAppend( + &output, " result = encoder->GetDestOpEncoding(address, operands[", + position, + "], slot, " + "entry, opcode, ", + dest_op, ", ", locator.instance, ", resolver);\n"); + break; + } + case OperandLocator::kDestinationArray: { + std::string list_dest_op = + absl::StrCat("ListDestOpEnum::k", ToPascalCase(op_name)); + absl::StrAppend(&output, " // Destination array operand ", op_name, + "\n"); + absl::StrAppend( + &output, + " result = encoder->GetListDestOpEncoding(addres, operands[", + position, + "], slot, " + "entry, opcode, ", + list_dest_op, ", ", locator.instance, ", resolver);\n"); + break; + } + default: + absl::StrAppend(&output, " #error Unknown operand type ", op_name, "\n"); + break; + } + absl::StrAppend(&output, + " if (!result.ok()) return result.status();\n" + " encoding |= result.value();\n"); + return output; +} + +std::tuple<std::string, std::string> InstructionSet::GenerateEncClasses( + absl::string_view file_name, absl::string_view opcode_file_name, + absl::string_view encoder_type) const { + std::string h_output; + std::string cc_output; + std::string encoder = absl::StrCat(pascal_name(), "EncoderInterfaceBase"); + // Generate the bin encoder base class. + absl::StrAppend(&h_output, + "using ::mpact::sim::util::assembler::RelocationInfo;\n" + "using ::mpact::sim::util::assembler::ResolverInterface;\n" + "\n" + "class ", + encoder, + " {\n" + " public:\n" + " virtual ~", + encoder, + "() = default;\n" + R"( + // Returns the opcode encoding and size (in bits) of the opcode. + virtual absl::StatusOr<std::tuple<uint64_t, int>> GetOpcodeEncoding( + SlotEnum slot, int entry, OpcodeEnum opcode, ResolverInterface *resolver) = 0; + virtual absl::StatusOr<uint64_t> GetSrcOpEncoding(uint64_t address, + absl::string_view text, SlotEnum slot, int entry, OpcodeEnum opcode, + SourceOpEnum source_op, int source_num, ResolverInterface *resolver) = 0; + virtual absl::Status AppendSrcOpRelocation(uint64_t address, + absl::string_view text, SlotEnum slot, int entry, OpcodeEnum opcode, + SourceOpEnum source_op, int source_num, ResolverInterface *resolver, + std::vector<RelocationInfo> &relocations) = 0; + virtual absl::StatusOr<uint64_t> GetDestOpEncoding(uint64_t address, + absl::string_view text, SlotEnum slot, int entry, OpcodeEnum opcode, + DestOpEnum dest_op, int dest_num, ResolverInterface *resolver) = 0; + virtual absl::Status AppendDestOpRelocation(uint64_t address, + absl::string_view text, SlotEnum slot, int entry, OpcodeEnum opcode, + DestOpEnum dest_op, int dest_num, ResolverInterface *resolver, + std::vector<RelocationInfo> &relocations) = 0; + virtual absl::StatusOr<uint64_t> GetListSrcOpEncoding( uint64_t address, + absl::string_view text,SlotEnum slot, int entry, OpcodeEnum opcode, + ListSourceOpEnum source_op, int source_num, ResolverInterface *resolver) = 0; + virtual absl::StatusOr<uint64_t> GetListDestOpEncoding(uint64_t address, + absl::string_view text, SlotEnum slot, int entry, OpcodeEnum opcode, + ListDestOpEnum dest_op, int dest_num, ResolverInterface *resolver) = 0; + virtual absl::StatusOr<uint64_t> GetPredOpEncoding(uint64_t address, + absl::string_view text, SlotEnum slot, int entry, OpcodeEnum opcode, + PredOpEnum pred_op, ResolverInterface *resolver) = 0; +}; + +)"); + + absl::StrAppend(&cc_output, + "using ::mpact::sim::util::assembler::ResolverInterface;\n" + "\n" + "namespace {\n\n" + "absl::StatusOr<std::tuple<uint64_t, int>> EncodeNone(", + encoder, + "*, SlotEnum, int, OpcodeEnum, uint64_t, const " + "std::vector<std::string> &, ResolverInterface *, " + "std::vector<RelocationInfo> &) {\n" + " return absl::NotFoundError(\"No such opcode\");\n" + "}\n\n"); + std::string array; + absl::StrAppend( + &array, + "using EncodeFcn = absl::StatusOr<std::tuple<uint64_t, int>> (*)(", + encoder, + "*, SlotEnum, int, OpcodeEnum, uint64_t, const " + "std::vector<std::string> " + "&, ResolverInterface *, std::vector<RelocationInfo> &);\n" + "EncodeFcn encode_fcns[] = {\n" + " EncodeNone,\n"); + for (auto &[name, inst_ptr] : instruction_map_) { + std::string prefix; + std::string suffix; + auto *opcode = inst_ptr->opcode(); + absl::StrAppend(&array, " Encode", opcode->pascal_name(), ",\n"); + absl::StrAppend(&prefix, "absl::StatusOr<std::tuple<uint64_t, int>> Encode", + opcode->pascal_name(), "(\n ", encoder, + " *encoder, SlotEnum slot, int entry, OpcodeEnum opcode,\n" + " uint64_t address, const " + "std::vector<std::string> &operands,\n" + " ResolverInterface *resolver, " + "std::vector<RelocationInfo> &relocations) " + "{\n"); + absl::StrAppend(&suffix, + " auto res_opcode = encoder->GetOpcodeEncoding(slot, " + "entry, opcode, resolver);\n" + " if (!res_opcode.ok()) return res_opcode.status();\n" + " auto [encoding, bit_size] = res_opcode.value();\n" + " absl::StatusOr<uint64_t> result;\n"); + int position = 0; + for (auto const *disasm_format : inst_ptr->disasm_format_vec()) { + for (auto const *format_info : disasm_format->format_info_vec) { + if (format_info->op_name.empty()) continue; + auto iter = opcode->op_locator_map().find(format_info->op_name); + if (iter == opcode->op_locator_map().end()) { + absl::StrAppend(&suffix, " #error ", format_info->op_name, + " not found in instruction opcodes\n"); + continue; + } + auto locator = iter->second; + absl::StrAppend(&suffix, + GenerateOperandEncoder(position++, format_info->op_name, + locator, opcode)); + } + } + absl::StrAppend(&suffix, + " return std::make_tuple(encoding, bit_size);\n" + "}\n\n"); + absl::StrAppend(&cc_output, prefix, + " auto num_args = operands.size();\n" + " if (num_args != ", + position, + ") {\n" + " return absl::InvalidArgumentError(\n" + " absl::StrCat(\"Invalid number of operands (\", " + "num_args, \") - expected ", + position, + "\"));\n" + " }\n", + suffix); + } + absl::StrAppend(&array, "};\n\n"); + absl::StrAppend(&cc_output, array, "\n} // namespace\n\n"); + + // Generate the regex matchers for each slot. + for (auto *slot : slot_order_) { + if (!slot->is_referenced()) continue; + auto [h_slot, cc_slot] = slot->GenerateAsmRegexMatcher(); + absl::StrAppend(&h_output, h_slot); + absl::StrAppend(&cc_output, cc_slot); + } + return {h_output, cc_output}; +} + } // namespace instruction_set } // namespace machine_description } // namespace sim
diff --git a/mpact/sim/decoder/instruction_set.h b/mpact/sim/decoder/instruction_set.h index e767325..0db705c 100644 --- a/mpact/sim/decoder/instruction_set.h +++ b/mpact/sim/decoder/instruction_set.h
@@ -17,13 +17,16 @@ #include <memory> #include <string> +#include <tuple> #include <vector> +#include "absl/container/btree_map.h" #include "absl/container/btree_set.h" #include "absl/container/flat_hash_map.h" #include "absl/status/status.h" #include "absl/strings/string_view.h" #include "mpact/sim/decoder/bundle.h" +#include "mpact/sim/decoder/instruction.h" #include "mpact/sim/decoder/opcode.h" #include "mpact/sim/decoder/resource.h" #include "mpact/sim/decoder/slot.h" @@ -70,12 +73,20 @@ absl::string_view encoding_type) const; std::string GenerateClassDefinitions(absl::string_view include_file, absl::string_view encoding_type) const; + std::tuple<std::string, std::string> GenerateEncClasses( + absl::string_view file_name, absl::string_view opcode_file_name, + absl::string_view encoder_type) const; // This method is static, as it considers all the instruction sets that were // defined. - StringPair GenerateEnums(absl::string_view file_name) const; + StringPair GenerateEnums(absl::string_view file_name); static void AddAttributeName(const std::string &name); + void AddInstruction(Instruction *inst) { + if (instruction_map_.contains(inst->opcode()->name())) return; + instruction_map_.emplace(inst->opcode()->name(), inst); + } + // Getters and setters. std::vector<std::string> &namespaces() { return namespaces_; } const std::string &name() const { return name_; } @@ -93,7 +104,25 @@ } absl::flat_hash_map<std::string, Slot *> &slot_map() { return slot_map_; } + // Maps from operand names to enum values. + absl::flat_hash_map<std::string, int> &pred_op_map() { return pred_op_map_; } + absl::flat_hash_map<std::string, int> &source_op_map() { + return source_op_map_; + } + absl::flat_hash_map<std::string, int> &list_source_op_map() { + return list_source_op_map_; + } + absl::flat_hash_map<std::string, int> &dest_op_map() { return dest_op_map_; } + absl::flat_hash_map<std::string, int> &list_dest_op_map() { + return list_dest_op_map_; + } + + std::string GenerateEncodingFunctions() const; + private: + std::string GenerateOperandEncoder(int position, absl::string_view op_name, + const OperandLocator &locator, + const Opcode *opcode) const; // Add bundle and slot to list of classes that need to be generated. void AddToBundleOrder(Bundle *); void AddToSlotOrder(Slot *); @@ -107,11 +136,19 @@ // Name in PascalCase. std::string pascal_name_; Bundle *bundle_ = nullptr; + // Map from instruction name to pointer. + absl::btree_map<std::string, Instruction *> instruction_map_; // Maps from names to bundle/slot pointers. absl::flat_hash_map<std::string, Bundle *> bundle_map_; absl::flat_hash_map<std::string, Slot *> slot_map_; // Attribute name list - shared across all the isas. static absl::btree_set<std::string> *attribute_names_; + // Maps from operand names to enum values. + absl::flat_hash_map<std::string, int> pred_op_map_; + absl::flat_hash_map<std::string, int> source_op_map_; + absl::flat_hash_map<std::string, int> list_source_op_map_; + absl::flat_hash_map<std::string, int> dest_op_map_; + absl::flat_hash_map<std::string, int> list_dest_op_map_; }; } // namespace instruction_set
diff --git a/mpact/sim/decoder/instruction_set_visitor.cc b/mpact/sim/decoder/instruction_set_visitor.cc index e9f7f37..dae84eb 100644 --- a/mpact/sim/decoder/instruction_set_visitor.cc +++ b/mpact/sim/decoder/instruction_set_visitor.cc
@@ -23,6 +23,7 @@ #include <memory> #include <optional> #include <string> +#include <tuple> #include <utility> #include <variant> #include <vector> @@ -166,43 +167,69 @@ absl::StrCat(ToPascalCase(isa_name), "EncodingBase"); // Create output streams for .h and .cc files. - std::string dot_h_name = absl::StrCat(isa_prefix, "_decoder.h"); - std::string dot_cc_name = absl::StrCat(isa_prefix, "_decoder.cc"); + std::string dec_dot_h_name = absl::StrCat(isa_prefix, "_decoder.h"); + std::string dec_dot_cc_name = absl::StrCat(isa_prefix, "_decoder.cc"); + std::string enc_dot_h_name = absl::StrCat(isa_prefix, "_encoder.h"); + std::string enc_dot_cc_name = absl::StrCat(isa_prefix, "_encoder.cc"); std::string enum_h_name = absl::StrCat(isa_prefix, "_enums.h"); std::string enum_cc_name = absl::StrCat(isa_prefix, "_enums.cc"); - std::ofstream dot_h_file(absl::StrCat(directory, "/", dot_h_name)); - std::ofstream dot_cc_file(absl::StrCat(directory, "/", dot_cc_name)); + std::ofstream dec_dot_h_file(absl::StrCat(directory, "/", dec_dot_h_name)); + std::ofstream dec_dot_cc_file(absl::StrCat(directory, "/", dec_dot_cc_name)); + std::ofstream enc_dot_h_file(absl::StrCat(directory, "/", enc_dot_h_name)); + std::ofstream enc_dot_cc_file(absl::StrCat(directory, "/", enc_dot_cc_name)); std::ofstream enum_h_file(absl::StrCat(directory, "/", enum_h_name)); std::ofstream enum_cc_file(absl::StrCat(directory, "/", enum_cc_name)); // Generate the code, close the files and return. - std::string guard_name = ToHeaderGuard(dot_h_name); - dot_h_file << GenerateHdrFileProlog(dot_h_name, enum_h_name, guard_name, - encoding_type_name, - instruction_set->namespaces()); - dot_h_file << instruction_set->GenerateClassDeclarations( - dot_h_name, enum_h_name, encoding_type_name); - dot_h_file << GenerateHdrFileEpilog(guard_name, - instruction_set->namespaces()); - dot_cc_file << GenerateCcFileProlog(dot_h_name, - instruction_set->namespaces()); - dot_cc_file << instruction_set->GenerateClassDefinitions(dot_h_name, - encoding_type_name); - dot_cc_file << GenerateNamespaceEpilog(instruction_set->namespaces()); - enum_cc_file << GenerateCcFileProlog(enum_h_name, - instruction_set->namespaces()); + std::string guard_name = ToHeaderGuard(dec_dot_h_name); + // Decoder .h file. + dec_dot_h_file << GenerateHdrFileProlog(dec_dot_h_name, enum_h_name, + guard_name, encoding_type_name, + instruction_set->namespaces()); + dec_dot_h_file << instruction_set->GenerateClassDeclarations( + dec_dot_h_name, enum_h_name, encoding_type_name); + dec_dot_h_file << GenerateHdrFileEpilog(guard_name, + instruction_set->namespaces()); + dec_dot_h_file.close(); + // Decoder .cc file. + dec_dot_cc_file << GenerateCcFileProlog(dec_dot_h_name, + instruction_set->namespaces()); + dec_dot_cc_file << instruction_set->GenerateClassDefinitions( + dec_dot_h_name, encoding_type_name); + dec_dot_cc_file << GenerateNamespaceEpilog(instruction_set->namespaces()); + dec_dot_cc_file.close(); + + // Enum files. enum_h_file << GenerateSimpleHdrProlog(ToHeaderGuard(enum_h_name), instruction_set->namespaces()); + enum_cc_file << GenerateCcFileProlog(enum_h_name, + instruction_set->namespaces()); auto [h_output, cc_output] = instruction_set->GenerateEnums(enum_h_name); enum_h_file << h_output; enum_cc_file << cc_output; enum_h_file << GenerateHdrFileEpilog(ToHeaderGuard(enum_h_name), instruction_set->namespaces()); enum_cc_file << GenerateNamespaceEpilog(instruction_set->namespaces()); - dot_h_file.close(); - dot_cc_file.close(); enum_h_file.close(); enum_cc_file.close(); + // Encoder files + guard_name = ToHeaderGuard(enc_dot_h_name); + auto [enc_dot_h_prolog, enc_dot_cc_prolog] = + GenerateEncFilePrologs(enc_dot_h_name, guard_name, enum_h_name, + encoding_type_name, instruction_set->namespaces()); + enc_dot_h_file << enc_dot_h_prolog; + enc_dot_cc_file << enc_dot_cc_prolog; + auto [h_enc, cc_enc] = instruction_set->GenerateEncClasses( + enc_dot_h_name, enum_h_name, encoding_type_name); + enc_dot_h_file << h_enc; + enc_dot_cc_file << cc_enc; + enc_dot_h_file << GenerateHdrFileEpilog(guard_name, + instruction_set->namespaces()); + enc_dot_cc_file << GenerateNamespaceEpilog( + instruction_set->namespaces()); // Enum .h and .cc files. + enc_dot_h_file.close(); + enc_dot_cc_file.close(); + return absl::OkStatus(); } @@ -1515,6 +1542,7 @@ Opcode *top = result.value(); auto inst = new Instruction(top, slot); + slot->instruction_set()->AddInstruction(inst); // Get the size of the instruction if specified, otherwise use default size. if (opcode_ctx->size_spec() != nullptr) { @@ -1580,24 +1608,37 @@ if (ctx->pred != nullptr) { std::string name = ctx->pred->getText(); child->opcode()->set_predicate_op_name(name); - parent->opcode()->op_locator_map().insert( - std::make_pair(name, OperandLocator(op_spec_number, 'p', 0))); + parent->opcode()->op_locator_map().insert(std::make_pair( + name, OperandLocator(op_spec_number, 'p', /*is_reloc=*/false, + /*instance=*/0))); } if (ctx->source != nullptr) { int instance = 0; for (auto *source_op : ctx->source->source_operand()) { std::string name; - bool is_array; - if (source_op->source != nullptr) { - name = source_op->source->getText(); - is_array = false; + bool is_array = false; + bool is_reloc = false; + if (source_op->operand() != nullptr) { + name = source_op->operand()->op_name->getText(); + if (source_op->operand()->op_attribute != nullptr) { + auto attr = source_op->operand()->op_attribute->getText(); + if (attr == "%reloc") { + is_reloc = true; + } else { + error_listener()->semanticError( + file_names_[context_file_map_.at(slot->ctx())], + source_op->operand()->op_attribute, + absl::StrCat("Invalid operand attribute '", attr, "'")); + } + } } else { name = source_op->array_source->getText(); is_array = true; } - child->opcode()->AppendSourceOp(name, is_array); - parent->opcode()->op_locator_map().insert( - std::make_pair(name, OperandLocator(op_spec_number, 's', instance))); + child->opcode()->AppendSourceOp(name, is_array, is_reloc); + parent->opcode()->op_locator_map().insert(std::make_pair( + name, OperandLocator(op_spec_number, is_array ? 't' : 's', is_reloc, + instance))); instance++; } } @@ -1605,10 +1646,21 @@ int instance = 0; for (auto *dest_op : ctx->dest_list()->dest_operand()) { std::string ident; - bool is_array; - if (dest_op->dest != nullptr) { - ident = dest_op->dest->getText(); - is_array = false; + bool is_array = false; + bool is_reloc = false; + if (dest_op->operand() != nullptr) { + ident = dest_op->operand()->op_name->getText(); + if (dest_op->operand()->op_attribute != nullptr) { + auto attr = dest_op->operand()->op_attribute->getText(); + if (attr == "%reloc") { + is_reloc = true; + } else { + error_listener()->semanticError( + file_names_[context_file_map_.at(slot->ctx())], + dest_op->operand()->op_attribute, + absl::StrCat("Invalid operand attribute '", attr, "'")); + } + } } else { ident = dest_op->array_dest->getText(); is_array = true; @@ -1620,18 +1672,20 @@ context_file_map_.insert( {dest_op->expression(), context_file_map_.at(slot->ctx())}); child->opcode()->AppendDestOp( - ident, is_array, + ident, is_array, is_reloc, VisitExpression(dest_op->expression(), slot, child)); } else if (dest_op->wildcard != nullptr) { - child->opcode()->AppendDestOp(ident, is_array); + child->opcode()->AppendDestOp(ident, is_array, is_reloc); } else if (slot->default_latency() != nullptr) { - child->opcode()->AppendDestOp(ident, is_array, + child->opcode()->AppendDestOp(ident, is_array, is_reloc, slot->default_latency()->DeepCopy()); } else { - child->opcode()->AppendDestOp(ident, is_array, new TemplateConstant(1)); + child->opcode()->AppendDestOp(ident, is_array, is_reloc, + new TemplateConstant(1)); } - parent->opcode()->op_locator_map().insert( - std::make_pair(ident, OperandLocator(op_spec_number, 'd', instance))); + parent->opcode()->op_locator_map().insert(std::make_pair( + ident, OperandLocator(op_spec_number, is_array ? 'e' : 'd', is_reloc, + instance))); instance++; } } @@ -2248,6 +2302,65 @@ return output; } +std::tuple<std::string, std::string> +InstructionSetVisitor::GenerateEncFilePrologs( + absl::string_view file_name, absl::string_view guard_name, + absl::string_view opcode_file_name, absl::string_view encoding_type_name, + const std::vector<std::string> &namespaces) { + std::string h_output; + std::string cc_output; + absl::StrAppend(&h_output, "#ifndef ", guard_name, + "\n" + "#define ", + guard_name, + "\n" + "\n" + "#include <array>\n" + "#include <string>\n" + "#include <vector>\n" + "\n" + "#include \"absl/status/status.h\"\n" + "#include \"absl/status/statusor.h\"\n" + "#include \"absl/strings/string_view.h\"\n" + "#include " + "\"mpact/sim/util/asm/opcode_assembler_interface.h\"\n" + "#include \"mpact/sim/util/asm/resolver_interface.h\"\n" + "#include \"re2/re2.h\"\n" + "#include \"re2/set.h\"\n" + "#include \"", + opcode_file_name, + "\"\n" + "\n"); + absl::StrAppend(&cc_output, "#include \"", file_name, + "\"\n" + "\n" + "#include <array>\n" + "#include <string>\n" + "#include <vector>\n" + "\n" + "#include \"absl/status/status.h\"\n" + "#include \"absl/status/statusor.h\"\n" + "#include \"absl/strings/str_cat.h\"\n" + "#include \"absl/strings/string_view.h\"\n" + "#include " + "\"mpact/sim/util/asm/opcode_assembler_interface.h\"\n" + "#include \"mpact/sim/util/asm/resolver_interface.h\"\n" + "#include \"re2/re2.h\"\n" + "#include \"re2/set.h\"\n" + "#include \"", + opcode_file_name, + "\"\n" + "\n"); + + for (const auto &namespace_name : namespaces) { + absl::StrAppend(&h_output, "namespace ", namespace_name, " {\n"); + absl::StrAppend(&cc_output, "namespace ", namespace_name, " {\n"); + } + absl::StrAppend(&h_output, "\n"); + absl::StrAppend(&cc_output, "\n"); + return {h_output, cc_output}; +} + std::string InstructionSetVisitor::GenerateHdrFileEpilog( absl::string_view guard_name, const std::vector<std::string> &namespaces) { std::string output;
diff --git a/mpact/sim/decoder/instruction_set_visitor.h b/mpact/sim/decoder/instruction_set_visitor.h index 5889a21..e0f19b7 100644 --- a/mpact/sim/decoder/instruction_set_visitor.h +++ b/mpact/sim/decoder/instruction_set_visitor.h
@@ -21,6 +21,7 @@ #include <memory> #include <optional> #include <string> +#include <tuple> #include <utility> #include <vector> @@ -179,6 +180,10 @@ absl::string_view guard_name, absl::string_view encoding_base_name, const std::vector<std::string> &namespaces); + std::tuple<std::string, std::string> GenerateEncFilePrologs( + absl::string_view file_name, absl::string_view guard_name, + absl::string_view opcode_file_name, absl::string_view encoding_type_name, + const std::vector<std::string> &namespaces); std::string GenerateHdrFileEpilog(absl::string_view guard_name, const std::vector<std::string> &namespaces); std::string GenerateCcFileProlog(absl::string_view hdr_file_name,
diff --git a/mpact/sim/decoder/mpact_sim_isa.bzl b/mpact/sim/decoder/mpact_sim_isa.bzl index e8dacc6..5a86d18 100644 --- a/mpact/sim/decoder/mpact_sim_isa.bzl +++ b/mpact/sim/decoder/mpact_sim_isa.bzl
@@ -51,7 +51,7 @@ data = data, ) -def mpact_isa_decoder(name, includes, src = "", srcs = [], deps = [], isa_name = "", prefix = ""): +def mpact_isa_decoder(name, includes, src = "", srcs = [], deps = [], isa_name = "", prefix = "", testonly = False): """Generates the C++ source corresponding to an MPACT Isa decoder definition. Args: @@ -86,6 +86,8 @@ "%s_decoder.cc" % base_file_prefix, "%s_enums.h" % base_file_prefix, "%s_enums.cc" % base_file_prefix, + "%s_encoder.h" % base_file_prefix, + "%s_encoder.cc" % base_file_prefix, ] # The command to generate the files. @@ -103,6 +105,7 @@ cmd = command, heuristic_label_expansion = 0, tools = ["@com_google_mpact-sim//mpact/sim/decoder:decoder_gen"], + testonly = testonly, ) # The rule for the lib that is built from the generated sources. @@ -115,14 +118,25 @@ lib_deps.append("@com_google_mpact-sim//mpact/sim/generic:arch_state") if "@com_google_mpact-sim//mpact/sim/generic:instruction" not in deps: lib_deps.append("@com_google_mpact-sim//mpact/sim/generic:instruction") + if "@com_google_mpact-sim//mpact/sim/util/asm" not in deps: + lib_deps.append("@com_google_mpact-sim//mpact/sim/util/asm") + if "@com_googlesource_code_re2//:re2" not in deps: + lib_deps.append("@com_googlesource_code_re2//:re2") + if "@com_google_absl//absl/status" not in deps: + lib_deps.append("@com_google_absl//absl/status") + if "@com_google_absl//absl/status:statusor" not in deps: + lib_deps.append("@com_google_absl//absl/status:statusor") + if "@com_google_absl//absl/strings" not in deps: + lib_deps.append("@com_google_absl//absl/strings") native.cc_library( name = name, srcs = [f for f in out_files if f.endswith(".cc")], hdrs = [f for f in out_files if f.endswith(".h")], deps = lib_deps + deps, + testonly = testonly, ) -def mpact_bin_fmt_decoder(name, includes, src = "", srcs = [], deps = [], decoder_name = "", prefix = ""): +def mpact_bin_fmt_decoder(name, includes, src = "", srcs = [], deps = [], decoder_name = "", prefix = "", testonly = False): """Generates the C++ source corresponding to an MPACT Bin Format decoder definition. Args: @@ -154,6 +168,8 @@ out_files = [ "%s_bin_decoder.h" % base_file_prefix, "%s_bin_decoder.cc" % base_file_prefix, + "%s_bin_encoder.h" % base_file_prefix, + "%s_bin_encoder.cc" % base_file_prefix, ] # The command to generate the files. @@ -171,10 +187,13 @@ cmd = command, heuristic_label_expansion = 0, tools = ["@com_google_mpact-sim//mpact/sim/decoder:bin_format_gen"], + testonly = testonly, ) # The rule for the lib that is built from the generated sources. lib_deps = [] + if "@com_google_absl//absl/base:no_destructor" not in deps: + lib_deps.append("@com_google_absl//absl/base:no_destructor") if "@com_google_absl//absl/container:flat_hash_map" not in deps: lib_deps.append("@com_google_absl//absl/container:flat_hash_map") if "@com_google_absl//absl/functional:any_invocable" not in deps: @@ -190,9 +209,10 @@ srcs = [f for f in out_files if f.endswith(".cc")], hdrs = [f for f in out_files if f.endswith(".h")], deps = lib_deps + deps, + testonly = testonly, ) -def mpact_proto_fmt_decoder(name, includes, src = "", srcs = [], proto_files = [], deps = [], decoder_name = "", prefix = ""): +def mpact_proto_fmt_decoder(name, includes, src = "", srcs = [], proto_files = [], deps = [], decoder_name = "", prefix = "", testonly = False): """Generates the C++ source corresponding to an MPACT Bin Format decoder definition. Args: @@ -254,6 +274,7 @@ "@com_google_absl//absl/functional:any_invocable", "@com_google_absl//absl/strings:str_format", ] + deps, + testonly = testonly, ) # Strip any path component from text. Return only the string that follows the last "/".
diff --git a/mpact/sim/decoder/opcode.cc b/mpact/sim/decoder/opcode.cc index aaed476..a4a3d30 100644 --- a/mpact/sim/decoder/opcode.cc +++ b/mpact/sim/decoder/opcode.cc
@@ -44,19 +44,22 @@ dest_op_map_.clear(); } -void Opcode::AppendSourceOp(absl::string_view op_name, bool is_array) { - source_op_vec_.emplace_back(std::string(op_name), is_array); +void Opcode::AppendSourceOp(absl::string_view op_name, bool is_array, + bool is_reloc) { + source_op_vec_.emplace_back(std::string(op_name), is_array, is_reloc); } -void Opcode::AppendDestOp(absl::string_view op_name, bool is_array) { - auto *op = new DestinationOperand(std::string(op_name), is_array); +void Opcode::AppendDestOp(absl::string_view op_name, bool is_array, + bool is_reloc) { + auto *op = new DestinationOperand(std::string(op_name), is_array, is_reloc); dest_op_vec_.push_back(op); dest_op_map_.insert(std::make_pair(std::string(op_name), op)); } void Opcode::AppendDestOp(absl::string_view op_name, bool is_array, - TemplateExpression *expression) { - auto *op = new DestinationOperand(std::string(op_name), is_array, expression); + bool is_reloc, TemplateExpression *expression) { + auto *op = new DestinationOperand(std::string(op_name), is_array, is_reloc, + expression); dest_op_vec_.push_back(op); dest_op_map_.insert(std::make_pair(std::string(op_name), op)); } @@ -108,14 +111,15 @@ new_opcode->predicate_op_name_ = opcode->predicate_op_name(); new_opcode->op_locator_map_ = opcode->op_locator_map(); for (auto const &src_op : opcode->source_op_vec()) { - new_opcode->AppendSourceOp(src_op.name, src_op.is_array); + new_opcode->AppendSourceOp(src_op.name, src_op.is_array, src_op.is_reloc); } // Copy destination operands, but evaluate any latencies using the template // instantiation arguments, in case those expressions use them. for (auto const *dest_op : opcode->dest_op_vec()) { if (dest_op->expression() == nullptr) { - new_opcode->AppendDestOp(dest_op->name(), dest_op->is_array()); + new_opcode->AppendDestOp(dest_op->name(), dest_op->is_array(), + dest_op->is_reloc()); } else { // For each destination operand that has an expression, evaluate it in the // context of the passed in TemplateInstantiationArgs. This creates a copy @@ -124,7 +128,7 @@ auto result = dest_op->expression()->Evaluate(args); if (result.ok()) { new_opcode->AppendDestOp(dest_op->name(), dest_op->is_array(), - result.value()); + dest_op->is_reloc(), result.value()); } else { delete new_opcode; return absl::InternalError(absl::StrCat(
diff --git a/mpact/sim/decoder/opcode.h b/mpact/sim/decoder/opcode.h index ad327aa..1cacd87 100644 --- a/mpact/sim/decoder/opcode.h +++ b/mpact/sim/decoder/opcode.h
@@ -15,6 +15,8 @@ #ifndef MPACT_SIM_DECODER_OPCODE_H_ #define MPACT_SIM_DECODER_OPCODE_H_ +#include <stdbool.h> + #include <functional> #include <string> #include <utility> @@ -50,31 +52,39 @@ class DestinationOperand { public: // Operand latency is defined by the expression. - DestinationOperand(std::string name, bool is_array, + DestinationOperand(std::string name, bool is_array, bool is_reloc, TemplateExpression *expression) : name_(std::move(name)), pascal_case_name_(ToPascalCase(name_)), expression_(expression), - is_array_(is_array) {} + is_array_(is_array), + is_reloc_(is_reloc) {} // Operand latency is a constant. - DestinationOperand(std::string name, bool is_array, int latency) + DestinationOperand(std::string name, bool is_array, bool is_reloc, + int latency) : name_(std::move(name)), pascal_case_name_(ToPascalCase(name_)), expression_(new TemplateConstant(latency)), - is_array_(is_array) {} + is_array_(is_array), + is_reloc_(is_reloc) {} // This constructor is used when the destination operand latency is specified // as '*' - meaning that it will be computed at the time of decode. - explicit DestinationOperand(std::string name, bool is_array) + DestinationOperand(std::string name, bool is_array, bool is_reloc) : name_(std::move(name)), pascal_case_name_(ToPascalCase(name_)), expression_(nullptr), - is_array_(is_array) {} - ~DestinationOperand() { delete expression_; } + is_array_(is_array), + is_reloc_(is_reloc) {} + ~DestinationOperand() { + delete expression_; + expression_ = nullptr; + } const std::string &name() const { return name_; } const std::string &pascal_case_name() const { return pascal_case_name_; } TemplateExpression *expression() const { return expression_; } bool is_array() const { return is_array_; } + bool is_reloc() const { return is_reloc_; } bool HasLatency() const { return expression_ != nullptr; } absl::StatusOr<int> GetLatency() const { if (expression_ == nullptr) return -1; @@ -96,27 +106,39 @@ std::string pascal_case_name_; TemplateExpression *expression_; bool is_array_ = false; + bool is_reloc_ = false; }; struct SourceOperand { std::string name; bool is_array; - SourceOperand(std::string name_, bool is_array_) - : name(std::move(name_)), is_array(is_array_) {} + bool is_reloc; + SourceOperand(std::string name_, bool is_array_, bool is_reloc_) + : name(std::move(name_)), is_array(is_array_), is_reloc(is_reloc_) {} }; // This struct is used to specify the location of an operand within an // instruction. It specifies which instruction (or child instruction) number. In // this case, 0 is the top level instruction, 1 is the first child instruction -// etc. The type is 'p' for predicate operand, 's' for source operand, and 'd' -// for destination operand. The instance number specifies the entry index in the +// etc. The type is 'p' for predicate operand, 's' for source operand, 't' for +// source array operand, 'd' for destination operand, and 'e' for destination +// array operand. The instance number specifies the entry index in the // source or destination operand vector. struct OperandLocator { + static constexpr char kPredicate = 'p'; + static constexpr char kSource = 's'; + static constexpr char kSourceArray = 't'; + static constexpr char kDestination = 'd'; + static constexpr char kDestinationArray = 'e'; int op_spec_number; char type; + bool is_reloc; int instance; - OperandLocator(int op_spec_number_, char type_, int instance_) - : op_spec_number(op_spec_number_), type(type_), instance(instance_) {} + OperandLocator(int op_spec_number_, char type_, bool is_reloc_, int instance_) + : op_spec_number(op_spec_number_), + type(type_), + is_reloc(is_reloc_), + instance(instance_) {} }; struct FormatInfo { @@ -204,10 +226,10 @@ // to get the Predicate, Source and Destination operand interfaces (defined // in .../sim/generic/operand_interfaces.h. The implementation of these // methods will be left to the user of this generator tool. - void AppendSourceOp(absl::string_view op_name, bool is_array); - void AppendDestOp(absl::string_view op_name, bool is_array, + void AppendSourceOp(absl::string_view op_name, bool is_array, bool is_reloc); + void AppendDestOp(absl::string_view op_name, bool is_array, bool is_reloc, TemplateExpression *expression); - void AppendDestOp(absl::string_view op_name, bool is_array); + void AppendDestOp(absl::string_view op_name, bool is_array, bool is_reloc); DestinationOperand *GetDestOp(absl::string_view op_name); // Append child opcode specification. void AppendChild(Opcode *op) { child_ = op; }
diff --git a/mpact/sim/decoder/proto_encoding_group.cc b/mpact/sim/decoder/proto_encoding_group.cc index 93fda57..93be8e8 100644 --- a/mpact/sim/decoder/proto_encoding_group.cc +++ b/mpact/sim/decoder/proto_encoding_group.cc
@@ -155,7 +155,7 @@ field_info->min_value = std::numeric_limits<int64_t>::max(); field_info->max_value = std::numeric_limits<int64_t>::min(); field_info->ctx = qualifed_ident_ctx; - field_map_.insert({name, field_info}); + field_map_.emplace(name, field_info); } else { field_info = iter->second; }
diff --git a/mpact/sim/decoder/proto_encoding_info.cc b/mpact/sim/decoder/proto_encoding_info.cc index bb66269..1ced0e7 100644 --- a/mpact/sim/decoder/proto_encoding_info.cc +++ b/mpact/sim/decoder/proto_encoding_info.cc
@@ -193,7 +193,7 @@ inst_group->message_type()->full_name(), {{".", "::"}}); absl::StrAppend(&type_aliases, "using ", ToPascalCase(inst_group->name()), "MessageType = ", qualified_message_type, ";\n"); - std::string file_name = inst_group->message_type()->file()->name(); + absl::string_view file_name = inst_group->message_type()->file()->name(); // Verify that this is a .proto file. if ((file_name.size() <= 5) &&
diff --git a/mpact/sim/decoder/proto_instruction_encoding.cc b/mpact/sim/decoder/proto_instruction_encoding.cc index 50d4ce3..5b725b9 100644 --- a/mpact/sim/decoder/proto_instruction_encoding.cc +++ b/mpact/sim/decoder/proto_instruction_encoding.cc
@@ -227,7 +227,7 @@ ProtoConstraint *constraint = new ProtoConstraint{ nullptr, field_descriptor, ConstraintType::kHas, nullptr, 0, depends_on}; - has_constraints_.insert({field_descriptor->full_name(), constraint}); + has_constraints_.emplace(field_descriptor->full_name(), constraint); return constraint; }
diff --git a/mpact/sim/decoder/slot.cc b/mpact/sim/decoder/slot.cc index d8ca513..63372ff 100644 --- a/mpact/sim/decoder/slot.cc +++ b/mpact/sim/decoder/slot.cc
@@ -14,12 +14,14 @@ #include "mpact/sim/decoder/slot.h" +#include <algorithm> #include <cctype> #include <cstddef> #include <cstdlib> #include <map> #include <stack> #include <string> +#include <tuple> #include <utility> #include <variant> #include <vector> @@ -58,9 +60,9 @@ } if (locator.type == 'p') { absl::StrAppend(&code, "Predicate()"); - } else if (locator.type == 's') { + } else if (locator.type == 's' || locator.type == 't') { absl::StrAppend(&code, "Source(", locator.instance, ")"); - } else if (locator.type == 'd') { + } else if (locator.type == 'd' || locator.type == 'e') { absl::StrAppend(&code, "Destination(", locator.instance, ")"); } else { return absl::InternalError(absl::StrCat("Unknown locator type '", @@ -284,6 +286,312 @@ return iter->second; } +namespace { + +std::string EscapeRegexCharacters(const std::string &str) { + std::string output; + if (str.empty()) return output; + auto pos = str.find_last_not_of(' '); + if (pos == std::string::npos) { + return "\\s+"; + } + std::string input(str.substr(pos)); + bool in_space = false; + char p; + for (auto c : str) { + if (isspace(c)) { + if (!in_space) { + if (ispunct(p)) { + absl::StrAppend(&output, "\\s*"); + } else { + absl::StrAppend(&output, "\\s+"); + } + } + in_space = true; + continue; + } + p = c; + in_space = false; + switch (c) { + case '.': + absl::StrAppend(&output, "\\."); + break; + case '(': + absl::StrAppend(&output, "\\("); + break; + case ')': + absl::StrAppend(&output, "\\)"); + break; + case '[': + absl::StrAppend(&output, "\\["); + break; + case ']': + absl::StrAppend(&output, "\\]"); + break; + case '*': + absl::StrAppend(&output, "\\*"); + break; + case '+': + absl::StrAppend(&output, "\\+"); + break; + case '?': + absl::StrAppend(&output, "\\?"); + break; + case '|': + absl::StrAppend(&output, "\\|"); + break; + case '{': + absl::StrAppend(&output, "\\{"); + break; + case '}': + absl::StrAppend(&output, "\\}"); + break; + case '^': + absl::StrAppend(&output, "\\^"); + break; + case '$': + absl::StrAppend(&output, "\\$"); + break; + case '!': + absl::StrAppend(&output, "\\!"); + break; + case '\\': + absl::StrAppend(&output, "\\\\"); + break; + default: + absl::StrAppend(&output, std::string(1, c)); + break; + } + } + return output; +} + +} // namespace + +std::tuple<std::string, std::vector<OperandLocator>> Slot::GenerateRegEx( + const Instruction *inst, std::vector<std::string> &formats) const { + std::string output = "R\"("; + std::string sep = "^\\s*"; + std::vector<OperandLocator> opnd_locators; + // Iterate over the vector of disasm formats. These will end up concatenated + // with \s+ separators. + for (auto const *disasm_fmt : inst->disasm_format_vec()) { + absl::StrAppend(&output, sep); + sep = "\\s+"; + // The fragments are the text part (not part of operands), that occur + // between the operand of the format. E.g., the commas in "r1, r2, r3". + auto fragment_iter = disasm_fmt->format_fragment_vec.begin(); + auto fragment_end = disasm_fmt->format_fragment_vec.end(); + // The formats are the instruction formats, E.g., the register names in + // "r1, r2, r3". + auto format_iter = disasm_fmt->format_info_vec.begin(); + auto format_end = disasm_fmt->format_info_vec.end(); + char prev = '\0'; + // Iterate over the format fragments. + while (fragment_iter != fragment_end) { + auto fragment = *fragment_iter; + if (!fragment.empty()) { + auto str = EscapeRegexCharacters(fragment); + absl::StrAppend(&output, str); + prev = str.back(); + } else { + prev = '\0'; + } + fragment_iter++; + if (format_iter != format_end) { + // If the trailling part of output is not '\\s*', and prev is + // punctuation, but not '.' or '_', add a space separator. + auto len = output.size(); + if (output.substr(len - 3) != "\\s*") { + if ((prev != '\0') && + !(isalnum(prev) || (prev == '_') || (prev == '.'))) { + absl::StrAppend(&output, "\\s*"); + } + } + std::string op_name = (*format_iter)->op_name; + absl::StrAppend(&output, "(\\S*?)"); + opnd_locators.push_back(inst->opcode()->op_locator_map().at(op_name)); + if ((fragment_iter != fragment_end) && (!(*fragment_iter).empty())) { + char c = (*fragment_iter)[0]; + // If the next fragment is not alnum or underscore, add a space + // separator. + if (!isalnum(c) || (c != '_')) { + absl::StrAppend(&output, "\\s*"); + } + } + format_iter++; + } + } + } + absl::StrAppend(&output, "\\s*$)\""); + return {output, opnd_locators}; +} + +std::string GenerateEncodingFunctions(const std::string &encoder, + InstructionSet instruction_set) { + std::string output; + absl::StrAppend(&output, "namespace {\n\n"); + absl::StrAppend( + &output, "absl::StatusOr<std::tuple<uint64_t, int>> EncodeNone(", encoder, + "*, SlotEnum, int, OpcodeEnum, uint64_t, const " + "std::vector<std::string> &) {\n" + " return absl::NotFoundError(\"No such opcode\");\n" + "}\n\n"); + return output; +} +// Generate a regex to match the assembly string for the instructions. +std::tuple<std::string, std::string> Slot::GenerateAsmRegexMatcher() const { + std::string h_output; + std::string cc_output; + std::string class_name = pascal_name() + "SlotMatcher"; + size_t max_args = 0; + + // Generate the encoder function for each instruction. + std::string encoder = + absl::StrCat(instruction_set_->pascal_name(), "EncoderInterfaceBase"); + + // Generate the matcher class. + absl::StrAppend( + &h_output, + "// Assembly matcher.\n" + "class ", + class_name, + " {\n" + " public:\n" + " ", + class_name, "(", instruction_set_->pascal_name(), + "EncoderInterfaceBase *encoder);\n" + " ~", + class_name, + "();\n" + " absl::Status Initialize();\n" + "absl::StatusOr<std::tuple<uint64_t, int>> " + " Encode(uint64_t address, absl::string_view text, int entry, " + "ResolverInterface *resolver, std::vector<RelocationInfo> " + "&relocations);\n\n" + " private:\n" + " bool Match(absl::string_view text, std::vector<int> &matches);\n" + " bool Extract(absl::string_view text, int index, " + "std::vector<std::string> &values);\n" + " ", + encoder, + " *encoder_;\n" + " std::vector<RE2 *> regex_vec_;\n" + " RE2::Set regex_set_;\n"); + absl::StrAppend(&cc_output, class_name, "::", class_name, "(", + instruction_set_->pascal_name(), + "EncoderInterfaceBase *encoder) :\n" + " encoder_(encoder),\n" + " regex_set_(RE2::Options(), RE2::ANCHOR_BOTH) {}\n" + "\n", + class_name, "::~", class_name, + "() {\n" + " for (int i = 0; i < re2_args.size(); ++i) {\n" + " delete re2_args[i];\n" + " }\n" + " for (auto *regex : regex_vec_) delete regex;\n" + " regex_vec_.clear();\n" + "}\n\n" + "absl::Status ", + class_name, + "::Initialize() {\n" + " std::string error;\n" + " int index = regex_set_.Add(\"^$\", &error);\n" + " if (index == -1) return absl::InternalError(error);\n" + " regex_vec_.push_back(new RE2(\"^$\"));\n"); + std::vector<std::string> formats; + for (auto const &[name, inst_ptr] : instruction_map_) { + auto [regex, opnd_locators] = GenerateRegEx(inst_ptr, formats); + max_args = std::max(max_args, opnd_locators.size()); + absl::StrAppend(&cc_output, " regex_vec_.push_back(new RE2(", regex, + "));\n" + " index = regex_set_.Add(", + regex, + ", &error);\n" + " if (index == -1) return absl::InternalError(error);\n"); + } + absl::StrAppend(&h_output, " std::string args[", max_args, + "];\n" + " std::array<RE2::Arg*, ", + max_args, "> re2_args = {"); + for (int i = 0; i < max_args; ++i) absl::StrAppend(&h_output, "nullptr, "); + absl::StrAppend(&h_output, " };\n"); + // Construct the RE2::Arg objects. + absl::StrAppend(&cc_output, + " auto ok = regex_set_.Compile();\n" + " if (!ok) return absl::InternalError(\"Failed to compile " + "regex set\");\n" + " for (int i = 0; i < ", + max_args, + "; ++i) {\n" + " re2_args[i] = new RE2::Arg(&args[i]);\n" + " }\n"); + absl::StrAppend( + &cc_output, + " return absl::OkStatus();\n" + "}\n\n" + "bool ", + class_name, + "::Match(absl::string_view text, std::vector<int> &matches) {\n" + " return regex_set_.Match(text, &matches);\n" + "}\n\n" + "bool ", + class_name, + "::Extract(absl::string_view text, int index, " + "std::vector<std::string> &values) {\n" + " auto ®ex = regex_vec_.at(index);\n" + " int arg_count = regex->NumberOfCapturingGroups();\n" + " if (!regex_vec_.at(index)->FullMatchN(text, *regex, " + "re2_args.data(), " + "arg_count))\n" + " return false;\n" + " for (int i = 0; i < arg_count; ++i) {\n" + " values.push_back(args[i]);\n" + " }\n" + " return true;\n" + "}\n\n" + "absl::StatusOr<std::tuple<uint64_t, int>> ", + pascal_name(), + "SlotMatcher::Encode(\n" + R"( + uint64_t address, absl::string_view text, int entry, ResolverInterface *resolver, + std::vector<RelocationInfo> &relocations) { + std::vector<int> matches; + std::string error_message = absl::StrCat("Failed to encode '", text, "':"); + if (!Match(text, matches) || (matches.size() == 0)) { + return absl::NotFoundError(error_message); + } + std::vector<std::tuple<uint64_t, int>> encodings; + for (auto index : matches) { + std::vector<std::string> values; + if (!Extract(text, index, values)) continue; +)", + " auto result = encode_fcns[index](encoder_, SlotEnum::k", + pascal_name(), + ", entry, \n" + " " + "static_cast<OpcodeEnum>(index), address, values, resolver, " + "relocations);\n", + R"( + if (!result.status().ok()) { + absl::StrAppend(&error_message, "\n ", result.status().message()); + continue; + } + encodings.push_back(result.value()); + } + if (encodings.empty()) return absl::NotFoundError(error_message); + if (encodings.size() > 1) { + return absl::NotFoundError( + absl::StrCat("Failed to encode '", text, "': ambiguous")); + } + return encodings[0]; +} + +)"); + absl::StrAppend(&h_output, "};\n\n"); + return {h_output, cc_output}; +} + // Generate a function that will set the disassembly string for the given // instruction. std::string Slot::GenerateDisasmSetterFcn(absl::string_view name, @@ -328,8 +636,8 @@ in_strcat.push(true); } // Generate the strings from the format fragments and the format info. + std::string next_sep; for (auto const &frag : disasm_fmt->format_fragment_vec) { - std::string next_sep; if (!frag.empty()) { absl::StrAppend(&output, inner_sep, indent_string(indent), "\"", frag, "\""); @@ -341,9 +649,11 @@ if (!format_info->is_formatted) { absl::StrAppend(&output, "\n#error Missing locator information"); } else { - absl::StrAppend(&output, next_sep, "absl::StrFormat(\"", - format_info->number_format, "\", ", - ExpandExpression(*format_info, ""), ")"); + absl::StrAppend( + &output, next_sep, "absl::StrFormat(\"", + format_info->number_format.back() == 'x' ? "0x" : "", + format_info->number_format, "\", ", + ExpandExpression(*format_info, ""), ")"); } } else { auto key = format_info->op_name; @@ -362,13 +672,15 @@ if (!format_info->is_formatted) { absl::StrAppend(&output, next_sep, result.value(), "->AsString()"); } else { - absl::StrAppend(&output, next_sep, "absl::StrFormat(\"", - format_info->number_format, "\", ", - ExpandExpression(*format_info, result.value()), - ")"); + absl::StrAppend( + &output, next_sep, "absl::StrFormat(\"", + format_info->number_format.back() == 'x' ? "0x" : "", + format_info->number_format, "\", ", + ExpandExpression(*format_info, result.value()), ")"); } } } + next_sep = ", "; index++; if (inner_sep.empty()) inner_sep = ",\n"; } @@ -416,8 +728,58 @@ return absl::StrCat(iter->second); } -// Generate a string that is a unique identifier from the resources to determine -// which instructions can share resource setter functions. +// Generate the assembler function for the given instruction. +std::string Slot::GenerateAssemblerFcn(const Instruction *inst, + absl::string_view encoder_type) const { + std::string output; + int num_values = inst->opcode()->source_op_vec().size() + + inst->opcode()->dest_op_vec().size(); + absl::StrAppend( + &output, "absl::StatusOr<std::tuple<int, uint64_t>> ", pascal_name(), + "Slot", "Assemble", inst->opcode()->pascal_name(), "(", encoder_type, + " *enc, const std::vector<std::string> &values, SlotEnum " + "slot, int entry) {\n", + " if (values.size() != ", num_values, + ")\n" + " return absl::InvalidArgumentError(\"Wrong number of values\");\n" + " constexpr OpcodeEnum opcode = OpcodeEnum::k", + inst->opcode()->pascal_name(), + ";\n" + "auto [inst_word, num_bits] = enc->GetOpEncoding(opcode, slot, " + "entry);\n", + " absl::Status status;\n"); + auto const &source_op_vec = inst->opcode()->source_op_vec(); + for (int i = 0; i < source_op_vec.size(); ++i) { + std::string op_name = ToPascalCase(source_op_vec[i].name); + absl::StrAppend(&output, " status = enc->SetSrcEncoding(values.at(", i, + "), slot, entry,\n" + "SourceOpEnum::k", + op_name, ", ", i, + ", opcode);\n" + " if (!stats.ok()) return status;\n"); + } + auto const &dest_op_vec = inst->opcode()->dest_op_vec(); + for (int i = 0; i < dest_op_vec.size(); ++i) { + absl::StrAppend(&output, " status = enc->SetDestEncoding(values.at(", i, + "), slot, entry,\n" + "DestOpEnum::k", + dest_op_vec[i]->pascal_case_name(), ", ", i, + ", opcode);\n" + " if (!stats.ok()) return status;\n"); + } + absl::StrAppend( + &output, + " auto ok = enc->ValidateEncoding(opcode, slot, entry, inst_word);\n" + " if (!ok) return absl::InvalidArgumentError(\"Invalid " + "encoding\");\n"); + absl::StrAppend(&output, + "return std::tie(num_bits, inst_word);\n" + "}\n\n"); + return output; +} + +// Generate a string that is a unique identifier from the resources to +// determine which instructions can share resource setter functions. std::string Slot::CreateResourceKey( const std::vector<const ResourceReference *> &refs) const { std::string key; @@ -480,8 +842,8 @@ return key; } -// Generate a resource setter function call for the resource "key" of the given -// instruction. If a matching one does not exist, call to create such a +// Generate a resource setter function call for the resource "key" of the +// given instruction. If a matching one does not exist, call to create such a // function. std::string Slot::GenerateResourceSetter(const Instruction *inst, absl::string_view encoding_type) { @@ -590,8 +952,8 @@ } } - // Get all the simple resources that need to be reserved, then all the complex - // resources that need to be reserved when issuing this instruction. + // Get all the simple resources that need to be reserved, then all the + // complex resources that need to be reserved when issuing this instruction. complex_refs.clear(); simple_refs.clear(); for (auto const *ref : inst->resource_acquire_vec()) { @@ -701,8 +1063,8 @@ return output; } -// Generates a string that is a unique identifier from the operands to determine -// which instructions can share operand getter functions. +// Generates a string that is a unique identifier from the operands to +// determine which instructions can share operand getter functions. std::string Slot::CreateOperandLookupKey(const Opcode *opcode) const { std::string key; // Generate identifier for the predicate operand, if the opcode has one. @@ -850,9 +1212,10 @@ if (instruction_map_.empty()) return output; std::string class_name = pascal_name() + "Slot"; // For each instruction create two lambda functions. One that is used to - // obtain the semantic function object for the instruction, the other a lambda - // that sets the predicate, source and target operands. Both lambdas use calls - // to virtual functions declared in the current class or a base class thereof. + // obtain the semantic function object for the instruction, the other a + // lambda that sets the predicate, source and target operands. Both lambdas + // use calls to virtual functions declared in the current class or a base + // class thereof. std::string signature = absl::StrCat("(Instruction *inst, ", encoding_type, " *enc, OpcodeEnum opcode, SlotEnum slot, int entry)"); @@ -884,8 +1247,8 @@ // Construct operand getter lookup key. std::string key = CreateOperandLookupKey(inst->opcode()); auto iter = operand_setter_name_map_.find(key); - // If the key is not found, create a new getter function, otherwise reuse - // the existing one. + // If the key is not found, create a new getter function, otherwise + // reuse the existing one. if (iter == operand_setter_name_map_.end()) { auto index = operand_setter_name_map_.size(); std::string setter_name =
diff --git a/mpact/sim/decoder/slot.h b/mpact/sim/decoder/slot.h index 79cb6ea..cc1df51 100644 --- a/mpact/sim/decoder/slot.h +++ b/mpact/sim/decoder/slot.h
@@ -17,6 +17,7 @@ #include <limits> #include <string> +#include <tuple> #include <vector> #include "absl/container/btree_map.h" @@ -91,6 +92,19 @@ absl::Status AddTemplateFormal(const std::string &name); TemplateFormal *GetTemplateFormal(const std::string &name) const; + // Generate the calls to encode the given operand. + std::string GenerateOperandEncoder(int position, absl::string_view op_name, + const OperandLocator &locator, + const Opcode *opcode) const; + // Generate regex for a given instruction. + std::tuple<std::string, std::vector<OperandLocator>> GenerateRegEx( + const Instruction *inst, std::vector<std::string> &formats) const; + // Generate regexes to match the assembly string for the instructions. + std::tuple<std::string, std::string> GenerateAsmRegexMatcher() const; + // Generate assembler function for the given instruction. + std::string GenerateAssemblerFcn(const Instruction *inst, + absl::string_view encoder_type) const; + // Resources Resource *GetOrInsertResource(const std::string &name);
diff --git a/mpact/sim/decoder/test/example_decoder_test.cc b/mpact/sim/decoder/test/example_decoder_test.cc index a851315..86799c3 100644 --- a/mpact/sim/decoder/test/example_decoder_test.cc +++ b/mpact/sim/decoder/test/example_decoder_test.cc
@@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "googlemock/include/gmock/gmock.h" +#include "googlemock/include/gmock/gmock.h" // IWYU pragma: keep #include "googletest/include/gtest/gtest.h" namespace {
diff --git a/mpact/sim/decoder/test/opcode_test.cc b/mpact/sim/decoder/test/opcode_test.cc index fba903c..8dba3f0 100644 --- a/mpact/sim/decoder/test/opcode_test.cc +++ b/mpact/sim/decoder/test/opcode_test.cc
@@ -90,7 +90,8 @@ TEST_F(OpcodeTest, SourceOperandNames) { for (int indx = 0; indx < 3; indx++) { std::string source_op_name = absl::StrCat("SourceOp", indx); - opcode_->AppendSourceOp(source_op_name, /*is_array=*/false); + opcode_->AppendSourceOp(source_op_name, /*is_array=*/false, + /*is_reloc=*/false); EXPECT_EQ(opcode_->source_op_vec().size(), indx + 1); EXPECT_STREQ(opcode_->source_op_vec()[indx].name.c_str(), source_op_name.c_str()); @@ -102,10 +103,12 @@ for (int indx = 0; indx < 2; indx++) { std::string dest_op_name = absl::StrCat("DestOp", indx); if (indx == 0) { - opcode_->AppendDestOp(dest_op_name, /*is_array=*/false); + opcode_->AppendDestOp(dest_op_name, /*is_array=*/false, + /*is_reloc=*/false); } else if (indx == 1) { // Using nullptr - the value isn't checked upon append. - opcode_->AppendDestOp(dest_op_name, /*is_array=*/false, nullptr); + opcode_->AppendDestOp(dest_op_name, /*is_array=*/false, + /*is_reloc=*/false, nullptr); } EXPECT_EQ(opcode_->dest_op_vec().size(), indx + 1); EXPECT_STREQ(opcode_->dest_op_vec()[indx]->name().c_str(),
diff --git a/mpact/sim/decoder/test/testfiles/example.isa b/mpact/sim/decoder/test/testfiles/example.isa index 3a6cc47..2038b83 100644 --- a/mpact/sim/decoder/test/testfiles/example.isa +++ b/mpact/sim/decoder/test/testfiles/example.isa
@@ -38,7 +38,7 @@ int mult_plus_2 = mult + my_const; default attributes = { one, two = 3, three = base, four = base_plus_1, five = 0}; opcodes { - vctsf{(pred : sy : dest(base_plus_1))}; + vctsf{(pred : sy : %reloc(dest)(base_plus_1))}; cvtfs{(pred : sy : dest(base + 1))}; adds{(pred : sy, sx : dest(base))}, attributes: {five = 1, six}; @@ -80,7 +80,7 @@ default opcode = semfunc: "[](Instruction *) {}"; opcodes { - ld{(pred : yop : ),(: : dest)}, + ld{(pred : %reloc(yop) : ),(: : dest)}, disasm:"%dest = sld %pred [smem:%yop]"; ld_offset{(pred : xop, yop : dest(abs(-2)))}; st{(pred)};
diff --git a/mpact/sim/generic/literal_operand.h b/mpact/sim/generic/literal_operand.h index f7d584d..c74cba2 100644 --- a/mpact/sim/generic/literal_operand.h +++ b/mpact/sim/generic/literal_operand.h
@@ -54,8 +54,12 @@ template <bool literal> class BoolLiteralOperand : public SourceOperandInterface { public: - BoolLiteralOperand() = default; - explicit BoolLiteralOperand(const std::vector<int> &shape) : shape_(shape) {} + BoolLiteralOperand() : as_string_(absl::StrCat(literal)) {} + BoolLiteralOperand(absl::string_view as_string) : as_string_(as_string) {} + BoolLiteralOperand(const std::vector<int> &shape, absl::string_view as_string) + : shape_(shape), as_string_(as_string) {} + explicit BoolLiteralOperand(const std::vector<int> &shape) + : BoolLiteralOperand(shape, absl::StrCat(literal)) {} // Methods for accessing the literal value. Always returns the same // value regardless of the index parameter. @@ -78,18 +82,19 @@ // 128 element vector quantity. std::vector<int> shape() const override { return shape_; } - std::string AsString() const override { return absl::StrCat(literal); } + std::string AsString() const override { return as_string_; } private: - std::string as_string_; std::vector<int> shape_; + std::string as_string_; }; // Integer valued literal operand. template <int literal> class IntLiteralOperand : public SourceOperandInterface { public: - IntLiteralOperand() = default; + IntLiteralOperand() : as_string_(absl::StrCat(literal)) {}; + IntLiteralOperand(absl::string_view as_string) : as_string_(as_string) {} IntLiteralOperand(const std::vector<int> &shape, absl::string_view as_string) : shape_(shape), as_string_(as_string) {} explicit IntLiteralOperand(const std::vector<int> &shape) @@ -116,7 +121,7 @@ // 128 element vector quantity. std::vector<int> shape() const override { return shape_; } - std::string AsString() const override { return absl::StrCat(literal); } + std::string AsString() const override { return as_string_; } private: std::vector<int> shape_;
diff --git a/mpact/sim/util/asm/BUILD b/mpact/sim/util/asm/BUILD new file mode 100644 index 0000000..1620657 --- /dev/null +++ b/mpact/sim/util/asm/BUILD
@@ -0,0 +1,56 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This file contains the build rules for assembler related libraries. + +package( + default_applicable_licenses = ["//:license"], + default_visibility = ["//visibility:public"], +) + +cc_library( + name = "asm", + hdrs = [ + "opcode_assembler_interface.h", + "resolver_interface.h", + ], + deps = [ + "@com_github_serge1_elfio//:elfio", + "@com_google_absl//absl/functional:any_invocable", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", + ], +) + +cc_library( + name = "simple_assembler", + srcs = ["simple_assembler.cc"], + hdrs = ["simple_assembler.h"], + deps = [ + ":asm", + "@com_github_serge1_elfio//:elfio", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/functional:any_invocable", + "@com_google_absl//absl/functional:bind_front", + "@com_google_absl//absl/log", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:span", + "@com_googlesource_code_re2//:re2", + ], +)
diff --git a/mpact/sim/util/asm/MPACT-Sim Assembler.pdf b/mpact/sim/util/asm/MPACT-Sim Assembler.pdf new file mode 100644 index 0000000..9717241 --- /dev/null +++ b/mpact/sim/util/asm/MPACT-Sim Assembler.pdf Binary files differ
diff --git a/mpact/sim/util/asm/opcode_assembler_interface.h b/mpact/sim/util/asm/opcode_assembler_interface.h new file mode 100644 index 0000000..80782d1 --- /dev/null +++ b/mpact/sim/util/asm/opcode_assembler_interface.h
@@ -0,0 +1,75 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MPACT_SIM_UTIL_ASM_OPCODE_ASSEMBLER_INTERFACE_H_ +#define MPACT_SIM_UTIL_ASM_OPCODE_ASSEMBLER_INTERFACE_H_ + +#include <cstdint> +#include <string> +#include <vector> + +#include "absl/functional/any_invocable.h" +#include "absl/status/status.h" +#include "absl/strings/string_view.h" +#include "elfio/elf_types.hpp" +#include "mpact/sim/util/asm/resolver_interface.h" + +// This file defines the interface that the opcode assembler must implement. It +// is used by the SimpleAssembler to parse an assembly source line and convert +// it into a vector of bytes. + +namespace mpact { +namespace sim { +namespace util { +namespace assembler { + +struct RelocationInfo { + uint64_t offset; + std::string symbol; + uint32_t type; + uint64_t addend; + uint16_t section_index; + RelocationInfo(uint64_t offset, const std::string &symbol, uint32_t type, + uint64_t addend, uint16_t section_index) + : offset(offset), + symbol(symbol), + type(type), + addend(addend), + section_index(section_index) {} +}; + +class OpcodeAssemblerInterface { + public: + virtual ~OpcodeAssemblerInterface() = default; + using AddSymbolCallback = absl::AnyInvocable<absl::Status( + const std::string &, ELFIO::Elf64_Addr /*value*/, + ELFIO::Elf_Xword /*size*/, uint8_t /*type*/, uint8_t /*binding*/, + uint8_t /*other*/)>; + // Takes the current address, the text for the assembly instruction (including + // any label definitions), and a symbol resolver interface.Return ok status if + // the text is successfully encoded into the bytes vector. Symbols for any + // labels are added using the callback function interface. + virtual absl::Status Encode(uint64_t address, absl::string_view text, + AddSymbolCallback add_symbol_callback, + ResolverInterface *resolver, + std::vector<uint8_t> &bytes, + std::vector<RelocationInfo> &relocations) = 0; +}; + +} // namespace assembler +} // namespace util +} // namespace sim +} // namespace mpact + +#endif // MPACT_SIM_UTIL_ASM_OPCODE_ASSEMBLER_INTERFACE_H_
diff --git a/mpact/sim/util/asm/resolver_interface.h b/mpact/sim/util/asm/resolver_interface.h new file mode 100644 index 0000000..cefa1e3 --- /dev/null +++ b/mpact/sim/util/asm/resolver_interface.h
@@ -0,0 +1,42 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MPACT_SIM_UTIL_ASM_RESOLVER_INTERFACE_H_ +#define MPACT_SIM_UTIL_ASM_RESOLVER_INTERFACE_H_ + +#include <cstdint> + +#include "absl/status/statusor.h" +#include "absl/strings/string_view.h" + +// This file defines the interface that the symbol resolver must implement. It +// is used by the SimpleAssembler to resolve symbol names to values. + +namespace mpact { +namespace sim { +namespace util { +namespace assembler { + +class ResolverInterface { + public: + virtual ~ResolverInterface() = default; + virtual absl::StatusOr<uint64_t> Resolve(absl::string_view text) = 0; +}; + +} // namespace assembler +} // namespace util +} // namespace sim +} // namespace mpact + +#endif // MPACT_SIM_UTIL_ASM_RESOLVER_INTERFACE_H_
diff --git a/mpact/sim/util/asm/simple_assembler.cc b/mpact/sim/util/asm/simple_assembler.cc new file mode 100644 index 0000000..1622c79 --- /dev/null +++ b/mpact/sim/util/asm/simple_assembler.cc
@@ -0,0 +1,1098 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mpact/sim/util/asm/simple_assembler.h" + +#include <cstddef> +#include <cstdint> +#include <cstring> +#include <istream> +#include <ostream> +#include <string> +#include <utility> +#include <vector> + +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" +#include "absl/functional/any_invocable.h" +#include "absl/functional/bind_front.h" +#include "absl/log/log.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" +#include "absl/types/span.h" +#include "elfio/elf_types.hpp" +#include "elfio/elfio_section.hpp" +#include "elfio/elfio_segment.hpp" +#include "elfio/elfio_strings.hpp" +#include "elfio/elfio_symbols.hpp" +#include "mpact/sim/util/asm/opcode_assembler_interface.h" +#include "mpact/sim/util/asm/resolver_interface.h" +#include "re2/re2.h" + +namespace mpact { +namespace sim { +namespace util { +namespace assembler { + +// A symbol resolver that always returns 0 for any symbol name. This is used +// for the first pass of parsing the assembly code, when we are just creating +// the symbols and computing the sizes of the sections. +class ZeroResolver : public ResolverInterface { + public: + // Constructor takes a callback function that will be called for each symbol + // name encountered so that it can be added to the symbol table. + template <typename T> + ZeroResolver(T add_symbol_fcn) : add_symbol_fcn_(add_symbol_fcn) {} + absl::StatusOr<uint64_t> Resolve(absl::string_view text) override { + // Any symbol name should be added to the symbol table as an undefined + // symbol if it is not already there. When the symbol is defined, the + // symbol table will be updated. In the case of generating an executable + // ELF file, any unresolved symbols will result in an error. When generating + // an object file, any unresolved symbols will remain in the symbol table + // and must be handled by the linker. + add_symbol_fcn_(text); + // Return 0 for any symbol name. + return 0; + } + + private: + absl::AnyInvocable<void(absl::string_view)> add_symbol_fcn_; +}; + +// A symbol resolver that uses the symbol table and the symbol indices to +// resolve symbol names to values. +class SymbolResolver : public ResolverInterface { + public: + SymbolResolver( + int elf_file_class, ELFIO::section *symtab, + const absl::flat_hash_map<std::string, ELFIO::Elf_Word> &symbol_indices) + : elf_file_class_(elf_file_class), + symtab_(symtab), + symbol_indices_(symbol_indices) {} + absl::StatusOr<uint64_t> Resolve(absl::string_view text) override { + auto iter = symbol_indices_.find(text); + if (iter == symbol_indices_.end()) { + return absl::InvalidArgumentError( + absl::StrCat("SymbolResolver: Symbol '", text, "' not found")); + } + auto index = iter->second; + if (elf_file_class_ == ELFCLASS64) { + auto *sym = + reinterpret_cast<const ELFIO::Elf64_Sym *>(symtab_->get_data()); + return sym[index].st_value; + } else if (elf_file_class_ == ELFCLASS32) { + auto *sym = + reinterpret_cast<const ELFIO::Elf32_Sym *>(symtab_->get_data()); + return sym[index].st_value; + } + return absl::InternalError("Unsupported ELF file class"); + } + + private: + // Elf file class. + int elf_file_class_ = 0; + // The symbol table ELF section. + ELFIO::section *symtab_; + // Map from symbol name to symbol index in the symbol table. + const absl::flat_hash_map<std::string, ELFIO::Elf_Word> &symbol_indices_; +}; + +// Helper functions for parsing the assembly code. +namespace { + +// This template is used to convert the given type to the smallest valid type +// that absl Atoi functions can handle. +template <typename T> +struct AtoIType { + using type = T; +}; + +template <> +struct AtoIType<char> { + using type = int32_t; +}; + +template <> +struct AtoIType<uint8_t> { + using type = uint32_t; +}; + +template <> +struct AtoIType<uint16_t> { + using type = uint32_t; +}; + +template <> +struct AtoIType<int16_t> { + using type = int32_t; +}; + +template <> +struct AtoIType<int8_t> { + using type = int32_t; +}; + +// Convert the text to an integer. Checks for a leading 0x and then converts +// using absl::SimpleHexAtoi. If the text does not start with 0x, then it +// converts using absl::SimpleAtoi. If the text is not a valid integer, then +// it calls the resolver to see if it is a symbol name, in which case it returns +// the value of the symbol. If the text is not a valid integer or symbol name, +// then it returns an error. +template <typename T> +absl::StatusOr<T> SimpleTextToInt(absl::string_view text, + ResolverInterface *resolver = nullptr) { + T value; + if (text.substr(0, 2) == "0x") { + if (absl::SimpleHexAtoi(text.substr(2), &value)) return value; + return absl::InvalidArgumentError( + absl::StrCat("Invalid immediate: ", text)); + } + if (absl::SimpleAtoi(text, &value)) return value; + if (resolver == nullptr) { + return absl::InvalidArgumentError(absl::StrCat("Invalid argument: ", text)); + } + auto result = resolver->Resolve(text); + if (!result.ok()) { + return absl::InvalidArgumentError(absl::StrCat("Invalid argument: ", text)); + } + return static_cast<T>(result.value()); +} + +// Expand escaped characters in the given text. This is for use in parsing +// .string, .char, and .cstring directives. +std::string ExpandEscapes(absl::string_view text) { + std::string result; + bool in_escape = false; + for (auto c : text) { + if (in_escape) { + switch (c) { + case 'n': + result.push_back('\n'); + break; + case 'r': + result.push_back('\r'); + break; + case 'v': + result.push_back('\v'); + break; + case 'f': + result.push_back('\f'); + break; + case 'a': + result.push_back('\a'); + break; + case 'b': + result.push_back('\b'); + break; + case 't': + result.push_back('\t'); + break; + case '\\': + result.push_back('\\'); + break; + case '\'': + result.push_back('\''); + break; + case '"': + result.push_back('"'); + break; + case '\?': + result.push_back('?'); + break; + default: + result.push_back('\\'); + result.push_back(c); + break; + } + in_escape = false; + continue; + } + if (c == '\\') { + in_escape = true; + continue; + } + result.push_back(c); + } + if (in_escape) result.push_back('\\'); + return result; +} + +// This function is used to parse a list of values from the remainder of an +// assembly directive. The values are separated by commas. The type T is the +// type of the values, and must be an integer type or char. The resolver +// interface is optional and is used to resolve any symbol names in the text. +template <typename T> +absl::StatusOr<std::vector<T>> GetValues( + absl::string_view remainder, ResolverInterface *resolver = nullptr) { + std::vector<T> values; + static RE2 value_re("(0x[0-9a-fA-F]+|-?[0-9]+)\\s*(?:,|$)"); + std::string match; + while (RE2::Consume(&remainder, value_re, &match)) { + auto result = SimpleTextToInt<typename AtoIType<T>::type>(match); + if (!result.ok()) return result.status(); + T value = static_cast<T>(result.value()); + values.push_back(value); + } + return values; +} + +// Specialization of the above that handles char values. +template <> +absl::StatusOr<std::vector<char>> GetValues<char>(absl::string_view remainder, + ResolverInterface *resolver) { + std::vector<char> values; + static RE2 value_re("'(.{1,2})'\\s*(?:,|$)"); + std::string match; + while (RE2::Consume(&remainder, value_re, &match)) { + auto expanded = ExpandEscapes(match); + if (expanded.size() != 1) + return absl::InvalidArgumentError( + absl::StrCat("Invalid character: '", match, "'")); + values.push_back(expanded[0]); + } + return values; +} + +// Specialization of the above that handles double quoted string values. +template <> +absl::StatusOr<std::vector<std::string>> GetValues<std::string>( + absl::string_view remainder, ResolverInterface *resolver) { + std::vector<std::string> values; + std::string match; + static RE2 value_re("\"([^\"]*)\"\\s*(?:,|$)"); + while (RE2::Consume(&remainder, value_re, &match)) { + values.push_back(ExpandEscapes(match)); + } + return values; +} + +// Specialization of the above that handles labels (string values without +// quotes). +absl::StatusOr<std::vector<std::string>> GetLabels( + absl::string_view remainder) { + std::vector<std::string> values; + std::string match; + static RE2 label_re("([a-zA-Z_][a-zA-Z0-9_]*)\\s*(?:,|$)"); + while (RE2::Consume(&remainder, label_re, &match)) { + values.push_back(match); + } + return values; +} + +// Helper that converts a vector of integer values to a vector of bytes. +template <typename T> +inline void ConvertToBytes(const std::vector<T> &values, + std::vector<uint8_t> &bytes) { + union { + T i; + uint8_t b[sizeof(T)]; + } u; + for (auto value : values) { + u.i = value; + for (int i = sizeof(T) - 1; i >= 0; i--) { + bytes.push_back(u.b[i]); + } + } +} + +} // namespace + +SimpleAssembler::SimpleAssembler(absl::string_view comment, int elf_file_class, + OpcodeAssemblerInterface *opcode_assembler_if) + : elf_file_class_(elf_file_class), + opcode_assembler_if_(opcode_assembler_if), + comment_re_(absl::StrCat("^(.*?)(?:", comment, ".*?)?(\\\\)?$")), + asm_line_re_("^(?:(?:(\\S+)\\s*:)?|\\s)\\s*(.*)\\s*$"), + directive_re_( + "^\\.(align|bss|bytes|char|cstring|data|global|long|sect" + "|short|space|string|type|text|uchar|ulong|ushort|uword|word)(?:\\s+(" + ".*)" + ")?\\s*" + "$") { + // Configure the ELF file writer. + writer_.create(elf_file_class_, ELFDATA2LSB); + writer_.set_os_abi(ELFOSABI_NONE); + writer_.set_machine(EM_NONE); + // Create the symbol table section. + symtab_ = writer_.sections.add(".symtab"); + section_index_map_.insert({symtab_->get_index(), symtab_}); + symtab_->set_type(SHT_SYMTAB); + symtab_->set_addr_align(0x8); + symtab_->set_entry_size(elf_file_class_ == ELFCLASS64 + ? sizeof(ELFIO::Elf64_Sym) + : sizeof(ELFIO::Elf32_Sym)); + // Create the string table section. + strtab_ = writer_.sections.add(".strtab"); + section_index_map_.insert({strtab_->get_index(), strtab_}); + strtab_->set_type(SHT_STRTAB); + strtab_->set_addr_align(0x1); + // Link the symbol table to the string table. + symtab_->set_link(strtab_->get_index()); + // Create the symbol and string table accessors. + symbol_accessor_ = new ELFIO::symbol_section_accessor(writer_, symtab_); + string_accessor_ = + new ELFIO::string_section_accessor(writer_.sections[".strtab"]); + // Create .text, .data. and .bss sections. + SetTextSection(".text"); + SetDataSection(".data"); + SetBssSection(".bss"); + // Clear the current section. + current_section_ = nullptr; +} + +SimpleAssembler::~SimpleAssembler() { + delete symbol_resolver_; + delete symbol_accessor_; + delete string_accessor_; +} + +absl::Status SimpleAssembler::Parse(std::istream &is) { + // A trivial symbol resolver that always returns 0. + ZeroResolver zero_resolver( + absl::bind_front(&SimpleAssembler::SimpleAddSymbol, this)); + // First pass of parsing the input stream. This will add symbols to the symbol + // table and compute the sizes of all instructions and the sections. The + // section_address_map_ will keep track of the current location within each + // section (i.e., the offset within the section of the next + // instruction/object). + std::string label; + std::string statement; + while (is.good() && !is.eof()) { + std::string line; + while (true) { + std::string tmp; + if (!is.good() || is.eof()) break; + getline(is, tmp); + std::string prefix; + std::string suffix; + // Remove comments from the input line. + if (!RE2::FullMatch(tmp, comment_re_, &prefix, &suffix)) { + return absl::InternalError("Failed to parse comment"); + } + tmp = absl::StrCat(prefix, suffix); + int len = tmp.length(); + // If there is an escaped newline then append the line, up to the '\', + // and continue. + if ((len >= 1) && (tmp[len - 1] == '\\')) { + // Insert the escaped newline that getline removed. + absl::StrAppend(&line, tmp, "\n"); + continue; + } + absl::StrAppend(&line, tmp); + break; + } + if (line.empty()) continue; + // Parse the line into a label and a statement. This is done to determine if + // the line contains a label, only a label, or if the statement is directive + // or not. + if (RE2::FullMatch(line, asm_line_re_, &label, &statement)) { + std::vector<uint8_t> byte_vector; + std::vector<RelocationInfo> relo_vector; + auto *section = current_section_; + uint64_t address = + (section == nullptr) ? 0 : section_address_map_[section]; + if (!statement.empty()) { + absl::Status status; + // Pass the full line into the parse functions, they are responsible + // for handling the labels in pass one. + if (statement[0] == '.') { + status = ParseAsmDirective(line, address, &zero_resolver, byte_vector, + relo_vector); + } else { + status = ParseAsmStatement(line, address, &zero_resolver, byte_vector, + relo_vector); + } + if (!status.ok()) return status; + // Save the statements for processing in pass two (labels are all + // processed in pass one). + lines_.push_back(statement); + } else if (!label.empty()) { + // This is just a single label definition. Add it to the symbol table. + auto status = + AddSymbolToCurrentSection(label, address, 0, STT_NOTYPE, 0, 0); + if (!status.ok()) return status; + } + continue; + } + // Parse failure. + return absl::AbortedError(absl::StrCat("Parse failure: '", line, "'")); + } + + if (!is.eof()) return absl::InternalError("Input stream entered bad state"); + + // Add undefined symbols to the symbol table. + for (auto const &symbol : undefined_symbols_) { + auto status = AddSymbol(symbol, 0, 0, STT_NOTYPE, 0, 0, nullptr); + if (!status.ok()) { + return absl::InternalError(absl::StrCat( + "Failed to add undefined symbol '", symbol, "': ", status.message())); + } + } + undefined_symbols_.clear(); + + if (bss_section_ != nullptr) { + bss_section_->set_size(section_address_map_[bss_section_]); + } + return absl::OkStatus(); +} + +absl::Status SimpleAssembler::CreateExecutable(uint64_t base_address, + uint64_t entry_point) { + return CreateExecutable(base_address, absl::StrCat(entry_point)); +} + +// Helper function to update the symbol table entries for an executable file. +template <typename SymbolType> +void SimpleAssembler::UpdateSymbolsForExecutable(uint64_t text_segment_start, + uint64_t data_segment_start, + uint64_t bss_segment_start) { + auto num_symbols = symtab_->get_size() / sizeof(SymbolType); + auto size = num_symbols * sizeof(SymbolType); + auto *symbols = new SymbolType[num_symbols]; + std::memcpy(symbols, symtab_->get_data(), size); + for (int i = 0; i < num_symbols; ++i) { + auto &sym = symbols[i]; + auto shndx = sym.st_shndx; + std::string name = string_accessor_->get_string(sym.st_name); + if (global_symbols_.contains(name)) { + sym.st_info = ELF_ST_INFO(STB_GLOBAL, ELF_ST_TYPE(sym.st_info)); + } + if ((text_section_ != nullptr) && (shndx == text_section_->get_index())) { + sym.st_value += text_segment_start; + } else if ((data_section_ != nullptr) && + (shndx == data_section_->get_index())) { + sym.st_value += data_segment_start; + } else if ((bss_section_ != nullptr) && + (shndx == bss_section_->get_index())) { + sym.st_value += bss_segment_start; + } + } + symtab_->set_data(reinterpret_cast<char *>(symbols), size); + delete[] symbols; +} + +template <typename SymbolType> +void SimpleAssembler::UpdateSymbolsForRelocatable() { + auto num_symbols = symtab_->get_size() / sizeof(SymbolType); + auto size = num_symbols * sizeof(SymbolType); + auto *symbols = new SymbolType[num_symbols]; + std::memcpy(symbols, symtab_->get_data(), size); + for (int i = 0; i < num_symbols; ++i) { + auto &sym = symbols[i]; + std::string name = string_accessor_->get_string(sym.st_name); + if (global_symbols_.contains(name)) { + sym.st_info = ELF_ST_INFO(STB_GLOBAL, ELF_ST_TYPE(sym.st_info)); + } + } + symtab_->set_data(reinterpret_cast<char *>(symbols), size); + delete[] symbols; +} + +absl::Status SimpleAssembler::CreateExecutable(uint64_t base_address, + const std::string &entry_point) { + if (!undefined_symbols_.empty()) { + std::string message; + absl::StrAppend( + &message, + "Cannot create executable with the following undefined symbols: "); + for (auto const &symbol : undefined_symbols_) { + absl::StrAppend(&message, " ", symbol, "\n"); + } + return absl::InvalidArgumentError(message); + } + writer_.set_type(ET_EXEC); + // Section sizes are now known. So let's compute the layout and update all + // the symbol values/addresses before the next pass. + // The layout is: + // text segment starting at base address + any alignment. + // data segment starting at the end of the text segment + any alignment. + // The bss section is added to the end of the data segment + any alignment. + + ELFIO::segment *text_segment = nullptr; + uint64_t text_segment_start = 0; + if (text_section_ != nullptr) { + text_segment_start = base_address & ~4095ULL; + ELFIO::segment *text_segment = writer_.segments.add(); + text_segment->set_type(PT_LOAD); + text_segment->set_virtual_address(text_segment_start); + text_segment->set_physical_address(text_segment_start); + text_segment->set_flags(PF_X | PF_R); + text_segment->set_align(4096); + } + + ELFIO::segment *data_segment = nullptr; + uint64_t data_segment_start = 0; + uint64_t bss_segment_start = 0; + if ((data_section_ != nullptr) || (bss_section_ != nullptr)) { + data_segment_start = + (text_segment_start + section_address_map_[text_section_] + 4095) & + ~4095ULL; + + ELFIO::segment *data_segment = writer_.segments.add(); + data_segment->set_type(PT_LOAD); + data_segment->set_virtual_address(data_segment_start); + data_segment->set_physical_address(data_segment_start); + data_segment->set_flags(PF_W | PF_R); + data_segment->set_align(4096); + + uint64_t bss_align = bss_section_->get_addr_align() - 1; + bss_segment_start = + (data_segment_start + section_address_map_[data_section_] + bss_align) & + ~bss_align; + } + + // Now we can update the symbol table based on the new section sizes. + + // Different size symbol table entries for 32 and 64 bit ELF files. + if (elf_file_class_ == ELFCLASS64) { + UpdateSymbolsForExecutable<ELFIO::Elf64_Sym>( + text_segment_start, data_segment_start, bss_segment_start); + } else if (elf_file_class_ == ELFCLASS32) { + UpdateSymbolsForExecutable<ELFIO::Elf32_Sym>( + text_segment_start, data_segment_start, bss_segment_start); + } else { + return absl::InternalError( + absl::StrCat("Unsupported ELF file class: ", elf_file_class_)); + } + + // Update the section address map so that each section starts at the right + // address, i.e., it no longer tracks the offset within each section, but the + // absolute address. + section_address_map_[text_section_] = text_segment_start; + section_address_map_[data_section_] = data_segment_start; + section_address_map_[bss_section_] = bss_segment_start; + + // Pass in the relocation vector to the second pass of parsing, but ignore + // the values, since we are creating an executable file, and all the symbols + // are resolved. + std::vector<RelocationInfo> relo_vector; + auto status = ParsePassTwo(relo_vector); + if (!status.ok()) return status; + + // Add sections to the segments. First segment gets the text section. The + // second segment gets the data and bss sections. + if (text_segment != nullptr) { + text_segment->add_section_index(text_section_->get_index(), + text_section_->get_addr_align()); + } + if (data_segment != nullptr) { + data_segment->add_section_index(data_section_->get_index(), + data_section_->get_addr_align()); + data_segment->add_section_index(bss_section_->get_index(), + bss_section_->get_addr_align()); + } + + auto res = SimpleTextToInt<uint64_t>(entry_point, symbol_resolver_); + if (!res.ok()) return res.status(); + uint64_t entry_point_value = res.value(); + + symbol_accessor_->arrange_local_symbols(); + writer_.set_entry(entry_point_value); + return absl::OkStatus(); +} + +namespace { + +// Helper function to add a relocation entry to a relocation section. +template <typename RelocaType> +absl::Status AddRelocationEntries( + const std::vector<RelocationInfo> &relo_vector, + absl::flat_hash_map<std::string, ELFIO::Elf_Word> &symbol_indices, + ELFIO::section *reloca_section) { + for (auto const &relo : relo_vector) { + RelocaType rela; + rela.r_offset = relo.offset; + rela.r_addend = relo.addend; + auto iter = symbol_indices.find(relo.symbol); + if (iter == symbol_indices.end()) { + return absl::InvalidArgumentError( + absl::StrCat("Symbol '", relo.symbol, "' not found")); + } + if (sizeof(RelocaType) == sizeof(ELFIO::Elf64_Rela)) { + rela.r_info = ELF64_R_INFO(iter->second, relo.type); + } else { + rela.r_info = ELF32_R_INFO(iter->second, relo.type); + } + reloca_section->append_data(reinterpret_cast<const char *>(&rela), + sizeof(RelocaType)); + } + return absl::OkStatus(); +} + +} // namespace + +template <typename SymbolType> +void SimpleAssembler::UpdateSymtabHeaderInfo() { + int last_local = 0; + auto syms = + absl::MakeSpan(reinterpret_cast<const SymbolType *>(symtab_->get_data()), + symtab_->get_size() / sizeof(SymbolType)); + for (int i = 0; i < syms.size(); ++i) { + auto name = string_accessor_->get_string(syms[i].st_name); + symbol_indices_.insert({name, i}); + if (ELF_ST_BIND(syms[i].st_info) == STB_LOCAL) last_local = i; + } + symtab_->set_info(last_local + 1); +} + +absl::Status SimpleAssembler::CreateRelocatable() { + writer_.set_type(ET_REL); + // Reset the section address map to zero since we are creating a relocatable + // file. + section_address_map_[text_section_] = 0; + section_address_map_[data_section_] = 0; + section_address_map_[bss_section_] = 0; + + // Since the symbols now are rearranged, we need to set global symbols flag + // for those in the global_symbols_ set. + // Different size symbol table entries for 32 and 64 bit ELF files. + if (elf_file_class_ == ELFCLASS64) { + UpdateSymbolsForRelocatable<ELFIO::Elf64_Sym>(); + } else if (elf_file_class_ == ELFCLASS32) { + UpdateSymbolsForRelocatable<ELFIO::Elf32_Sym>(); + } else { + return absl::InternalError( + absl::StrCat("Unsupported ELF file class: ", elf_file_class_)); + } + // Rearrange local symbols in the symbol table so that they are at the + // beginning (ELF requirement). + symbol_accessor_->arrange_local_symbols(nullptr); + // Find the last local symbol and set the section header info for symbtab + // to point to 1 past that. Update the symbol_indices_ map. + symbol_indices_.clear(); + if (elf_file_class_ == ELFCLASS64) { + UpdateSymtabHeaderInfo<ELFIO::Elf64_Sym>(); + } else { + UpdateSymtabHeaderInfo<ELFIO::Elf32_Sym>(); + } + + // Parse the source again, collect relocations. + std::vector<RelocationInfo> relo_vector; + auto status = ParsePassTwo(relo_vector); + if (!status.ok()) return status; + + // Handle relocations if there are any. + if (!relo_vector.empty()) { + // First scan through the entries relocation vector and group them by + // the section in which the relocation is to be applied. + absl::flat_hash_map<uint16_t, std::vector<RelocationInfo>> relo_map; + for (auto const &relo : relo_vector) { + relo_map[relo.section_index].push_back(relo); + } + for (auto const &[section_index, relo_vec] : relo_map) { + if (section_index == 0) { + return absl::InternalError( + "Relocation entry with section index 0 not supported"); + } + if (!section_index_map_.contains(section_index)) { + return absl::InternalError( + absl::StrCat("Section index not found: ", section_index)); + } + // Now, create a relocation section for each key in the map. + std::string name = + absl::StrCat(".rela", section_index_map_[section_index]->get_name()); + auto *rela_section = writer_.sections.add(name); + rela_section->set_type(SHT_RELA); + rela_section->set_flags(SHF_INFO_LINK); + rela_section->set_entry_size(elf_file_class_ == ELFCLASS64 + ? sizeof(ELFIO::Elf64_Rela) + : sizeof(ELFIO::Elf32_Rela)); + rela_section->set_link(symtab_->get_index()); + rela_section->set_info(text_section_->get_index()); + rela_section->set_addr_align(8); + // Process the relocation vector entries. + absl::Status status; + if (elf_file_class_ == ELFCLASS64) { + status = AddRelocationEntries<ELFIO::Elf64_Rela>( + relo_vec, symbol_indices_, rela_section); + } else if (elf_file_class_ == ELFCLASS32) { + status = AddRelocationEntries<ELFIO::Elf32_Rela>( + relo_vec, symbol_indices_, rela_section); + } else { + return absl::InternalError( + absl::StrCat("Unsupported ELF file class: ", elf_file_class_)); + } + if (!status.ok()) return status; + } + } + return absl::OkStatus(); +} + +absl::Status SimpleAssembler::ParsePassTwo( + std::vector<RelocationInfo> &relo_vector) { + // For the second pass, we need a symbol resolver that uses the symbol + // table and the symbol indices. + symbol_resolver_ = + new SymbolResolver(elf_file_class_, symtab_, symbol_indices_); + + // Now fill in the sections. Parse each of the lines saved in the first + // pass. + for (auto const &line : lines_) { + std::vector<uint8_t> byte_vector; + absl::Status status; + auto *section = current_section_; + auto relo_size = relo_vector.size(); + auto address = section_address_map_[section]; + if (line[0] == '.') { + auto status = ParseAsmDirective(line, address, symbol_resolver_, + byte_vector, relo_vector); + } else { + auto status = ParseAsmStatement(line, address, symbol_resolver_, + byte_vector, relo_vector); + } + if (!status.ok()) return status; + // Update section information in the relocation vector. + for (int i = relo_size; i < relo_vector.size(); ++i) { + relo_vector[i].section_index = section->get_index(); + relo_vector[i].offset = address; + } + // Go to the next line if there is no data to add to the section. + if (byte_vector.empty()) continue; + // Add data to the section, but first make sure it's not bss. + if (section != bss_section_) { + section->append_data(reinterpret_cast<const char *>(byte_vector.data()), + byte_vector.size()); + } + } + return absl::OkStatus(); +} + +// Top level function that writes the ELF file out to disk. +absl::Status SimpleAssembler::Write(std::ostream &os) { + writer_.save(os); + return absl::OkStatus(); +} + +// Parse and process an assembly directive. The assembly directive is +// expected to be in the form of a line starting with a period followed by a +// directive name and an optional argument. The argument is a string of +// tokens separated by spaces. The argument is parsed using regular +// expressions. The byte values are appended to the given vector. +absl::Status SimpleAssembler::ParseAsmDirective( + absl::string_view line, uint64_t address, ResolverInterface *resolver, + std::vector<uint8_t> &byte_values, + std::vector<RelocationInfo> &relocations) { + std::string match; + std::string remainder; + ELFIO::section *section = current_section_; + uint64_t size = 0; + std::string directive; + std::string label; + if (!RE2::FullMatch(line, asm_line_re_, &label, &directive)) { + return absl::InvalidArgumentError( + absl::StrCat("Invalid assembly line: '", line, "'")); + } + if (!RE2::FullMatch(directive, directive_re_, &match, &remainder)) { + return absl::InvalidArgumentError( + absl::StrCat("Invalid directive: '", directive, "'")); + } + if (match == "align") { + // .align <n> + if (section == nullptr) { + return absl::InvalidArgumentError( + absl::StrCat("No section for directive: '", directive, "'")); + } + auto res = SimpleTextToInt<uint64_t>(remainder); + if (!res.ok()) return res.status(); + uint64_t align = res.value(); + // Verify that the alignment is a power of two. + if ((align & (align - 1)) != 0) { + return absl::InvalidArgumentError( + absl::StrCat("Invalid alignment: '", directive, "'")); + } + uint64_t address = section_address_map_[section]; + size = ((address + align - 1) & ~(align - 1)) - address; + } else if (match == "bss") { + // .bss + SetBssSection(".bss"); + } else if (match == "bytes") { + // .bytes + auto res = GetValues<uint8_t>(remainder, resolver); + if (!res.ok()) return res.status(); + auto values = res.value(); + size = values.size(); + for (auto const &value : values) byte_values.push_back(value); + } else if (match == "char") { + // .char + auto res = GetValues<char>(remainder, resolver); + if (!res.ok()) return res.status(); + auto values = res.value(); + size = values.size(); + for (auto const &value : values) byte_values.push_back(value); + } else if (match == "cstring") { + // .cstring + auto res = GetValues<std::string>(remainder, resolver); + if (!res.ok()) return res.status(); + auto values = res.value(); + size = 0; + for (auto const &value : values) { + for (auto const &c : value) byte_values.push_back(c); + byte_values.push_back('\0'); + size += value.size() + 1; + } + } else if (match == "data") { + // .data + SetDataSection(".data"); + } else if (match == "global") { + // .global <name> + auto res = GetLabels(remainder); + if (!res.ok()) return res.status(); + auto values = res.value(); + for (auto const &value : values) { + global_symbols_.insert(value); + } + } else if (match == "long") { + // .long + auto res = GetValues<int64_t>(remainder); + if (!res.ok()) return res.status(); + auto values = res.value(); + size = values.size() * sizeof(int64_t); + ConvertToBytes<int64_t>(values, byte_values); + } else if (match == "section") { + // .section <name>,<type> + // TODO(torerik): Implement. + return absl::UnimplementedError("Section directive not implemented"); + } else if (match == "short") { + // .short + auto res = GetValues<int16_t>(remainder); + if (!res.ok()) return res.status(); + auto values = res.value(); + size = values.size() * sizeof(int16_t); + ConvertToBytes<int16_t>(values, byte_values); + } else if (match == "space") { + // .space <n> + auto res = SimpleTextToInt<uint64_t>(remainder); + if (!res.ok()) return res.status(); + size = res.value(); + } else if (match == "string") { + // .string + auto res = GetValues<std::string>(remainder); + if (!res.ok()) return res.status(); + auto values = res.value(); + size = 0; + for (auto const &value : values) { + for (auto const &c : value) byte_values.push_back(c); + size += value.size(); + } + } else if (match == "text") { + // .text + SetTextSection(".text"); + } else if (match == "uchar") { + // .uchar + auto res = GetValues<uint8_t>(remainder); + if (!res.ok()) return res.status(); + auto values = res.value(); + size = values.size(); + for (auto const &value : values) byte_values.push_back(value); + } else if (match == "ulong") { + // .ulong + auto res = GetValues<uint64_t>(remainder); + if (!res.ok()) return res.status(); + auto values = res.value(); + size = values.size() * sizeof(uint64_t); + ConvertToBytes<uint64_t>(values, byte_values); + } else if (match == "ushort") { + // .ushort + auto res = GetValues<uint16_t>(remainder); + if (!res.ok()) return res.status(); + auto values = res.value(); + size = values.size() * sizeof(uint16_t); + ConvertToBytes<uint16_t>(values, byte_values); + } else if (match == "uword") { + // .uword + auto res = GetValues<uint32_t>(remainder); + if (!res.ok()) return res.status(); + auto values = res.value(); + size = values.size() * sizeof(uint32_t); + ConvertToBytes<uint32_t>(values, byte_values); + } else if (match == "word") { + // .word + auto res = GetValues<int32_t>(remainder); + if (!res.ok()) return res.status(); + auto values = res.value(); + size = values.size() * sizeof(int32_t); + ConvertToBytes<int32_t>(values, byte_values); + } else { + return absl::InvalidArgumentError( + absl::StrCat("Unsupported directive: '", directive, "'")); + } + if ((size > 0) && (section != nullptr)) { + if (!section_address_map_.contains(section)) { + return absl::InternalError( + absl::StrCat("No address for section '", section->get_name(), "'")); + } + section_address_map_[section] += size; + } + + if (!label.empty()) { + // When initially adding symbols, the address is relative to the start + // of the containing section. This will be corrected later. + if (section == nullptr) { + return absl::InvalidArgumentError( + absl::StrCat("Label: '", label, "' defined outside of a section")); + } + auto status = + AddSymbol(label, address, size, STT_NOTYPE, STB_LOCAL, 0, section); + } + return absl::OkStatus(); +} + +// Parse and process an assembly statement. The assembly statement is +// expected to be a single line of text. The byte values are appended to the +// given vector. +absl::Status SimpleAssembler::ParseAsmStatement( + absl::string_view line, uint64_t address, ResolverInterface *resolver, + std::vector<uint8_t> &byte_values, + std::vector<RelocationInfo> &relocations) { + // Call the target specific assembler to encode the statement. + auto status = opcode_assembler_if_->Encode( + address, line, + absl::bind_front(&SimpleAssembler::AddSymbolToCurrentSection, this), + resolver, byte_values, relocations); + if (!status.ok()) return status; + section_address_map_[current_section_] += byte_values.size(); + return absl::OkStatus(); +} + +void SimpleAssembler::SetTextSection(const std::string &name) { + // First check if the section already exists. + auto *section = writer_.sections[name]; + if (section != nullptr) { + current_section_ = section; + return; + } + section = writer_.sections.add(name); + auto status = AddSymbol(name, 0, 0, STT_SECTION, STB_LOCAL, 0, section); + if (!status.ok()) { + LOG(ERROR) << "Failed to add symbol for data section: " << status.message(); + } + section->set_type(SHT_PROGBITS); + section->set_flags(SHF_ALLOC | SHF_EXECINSTR); + section->set_addr_align(0x10); + // Should probably add the section symbol to the symbol table. + current_section_ = section; + text_section_ = section; + section_index_map_.insert({section->get_index(), text_section_}); +} + +void SimpleAssembler::SetDataSection(const std::string &name) { + // First check if the section already exists. + auto *section = writer_.sections[name]; + if (section != nullptr) { + current_section_ = section; + return; + } + section = writer_.sections.add(name); + auto status = AddSymbol(name, 0, 0, STT_SECTION, STB_LOCAL, 0, section); + if (!status.ok()) { + LOG(ERROR) << "Failed to add symbol for data section: " << status.message(); + } + section->set_type(SHT_PROGBITS); + section->set_flags(SHF_ALLOC | SHF_WRITE); + section->set_addr_align(0x10); + // Should probably add the section symbol to the symbol table. + current_section_ = section; + data_section_ = section; + section_index_map_.insert({section->get_index(), data_section_}); +} + +void SimpleAssembler::SetBssSection(const std::string &name) { + // First check if the section already exists. + auto *section = writer_.sections[name]; + if (section != nullptr) { + current_section_ = section; + return; + } + section = writer_.sections.add(name); + auto status = AddSymbol(name, 0, 0, STT_SECTION, STB_LOCAL, 0, section); + if (!status.ok()) { + LOG(ERROR) << "Failed to add symbol for bss section: " << status.message(); + } + section->set_type(SHT_NOBITS); + section->set_flags(SHF_ALLOC | SHF_WRITE); + section->set_addr_align(0x10); + // Should probably add the section symbol to the symbol table. + current_section_ = section; + bss_section_ = section; + section_index_map_.insert({section->get_index(), bss_section_}); +} +absl::Status SimpleAssembler::AddSymbolToCurrentSection( + const std::string &name, ELFIO::Elf64_Addr value, ELFIO::Elf_Xword size, + uint8_t type, uint8_t binding, uint8_t other) { + return AddSymbol(name, value, size, type, binding, other, current_section_); +} + +absl::Status SimpleAssembler::AddSymbol(const std::string &name, + ELFIO::Elf64_Addr value, + ELFIO::Elf_Xword size, uint8_t type, + uint8_t binding, uint8_t other, + const std::string §ion_name) { + ELFIO::section *section = nullptr; + if (!section_name.empty()) { + section = writer_.sections[section_name]; + if (section == nullptr) { + return absl::InvalidArgumentError( + absl::StrCat("Section '", section_name, "' not found")); + } + } + return AddSymbol(name, value, size, type, binding, other, section); +} + +absl::Status SimpleAssembler::AddSymbol(const std::string &name, + ELFIO::Elf64_Addr value, + ELFIO::Elf_Xword size, uint8_t type, + uint8_t binding, uint8_t other, + ELFIO::section *section) { + auto iter = symbol_indices_.find(name); + if (iter != symbol_indices_.end()) { + return absl::AlreadyExistsError( + absl::StrCat("Symbol '", name, "' already exists")); + } + auto index = symbol_accessor_->add_symbol( + *string_accessor_, name.c_str(), value, size, binding, type, other, + section == nullptr ? SHN_UNDEF : section->get_index()); + symbol_indices_.insert({name, index}); + // If this is not an undefined symbol reference, then see if the symbol name + // is part of the "current" undefined symbols, and if so, remove it. + if (section != nullptr) { + auto iter = undefined_symbols_.find(name); + if (iter != undefined_symbols_.end()) { + undefined_symbols_.erase(iter); + } + } + return absl::OkStatus(); +} + +void SimpleAssembler::SimpleAddSymbol(absl::string_view name) { + // If the symbol exists, then just return. + if (symbol_indices_.contains(name)) return; + if (undefined_symbols_.contains(name)) return; + std::string name_str(name); + undefined_symbols_.insert(name_str); +} + +absl::Status SimpleAssembler::AppendData(const char *data, size_t size) { + if (current_section_ == nullptr) { + return absl::FailedPreconditionError("No current section"); + } + current_section_->append_data(data, size); + return absl::OkStatus(); +} + +} // namespace assembler +} // namespace util +} // namespace sim +} // namespace mpact
diff --git a/mpact/sim/util/asm/simple_assembler.h b/mpact/sim/util/asm/simple_assembler.h new file mode 100644 index 0000000..e711fc7 --- /dev/null +++ b/mpact/sim/util/asm/simple_assembler.h
@@ -0,0 +1,185 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MPACT_SIM_UTIL_ASM_SIMPLE_ASSEMBLER_H_ +#define MPACT_SIM_UTIL_ASM_SIMPLE_ASSEMBLER_H_ + +#include <cstddef> +#include <cstdint> +#include <istream> +#include <ostream> +#include <string> +#include <vector> + +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" +#include "absl/status/status.h" +#include "absl/strings/string_view.h" +#include "elfio/elf_types.hpp" +#include "elfio/elfio.hpp" +#include "elfio/elfio_section.hpp" +#include "elfio/elfio_strings.hpp" +#include "elfio/elfio_symbols.hpp" +#include "mpact/sim/util/asm/opcode_assembler_interface.h" +#include "mpact/sim/util/asm/resolver_interface.h" +#include "re2/re2.h" + +// This file declares the SimpleAssembler class, which provides simple handling +// of assembly source, including a number of assembly directives. It currently +// handles three sections: .text, .data, and .bss. It produces either a +// relocatable file or an executable ELF file with the text section in its own +// segment starting at the base address, followed by the data section, and then +// the bss section. For the executable file, the entry point is set by calling +// SetEntryPoint(). +// +// Only little-endian ELF files are currently supported. +// +// The ELF file class is specified in the constructor. Any other ELF header +// values have to be set using methods in the ELFIO writer that is accessed +// using the writer() method. +// At the very least, the following methods should be called: +// writer().set_os_abi() +// writer().set_machine() +// If additional sections need to be created, use the add_section() method of +// the writer (see ELFIO documentation for details). + +namespace mpact { +namespace sim { +namespace util { +namespace assembler { + +class SimpleAssembler { + public: + // The constructor takes the following parameters: + // comment: The comment string or character that starts a comment. + // elf_file_class: The ELF file class (32 or 64 bit). Use either ELFCLASS32 + // or ELFCLASS64 from ELFIO. + // opcode_assembler_if: The opcode assembler interface to use for parsing + // and encoding assembly statements. + SimpleAssembler(absl::string_view comment, int elf_file_class, + OpcodeAssemblerInterface *opcode_assembler_if); + SimpleAssembler(const SimpleAssembler &) = delete; + SimpleAssembler &operator=(const SimpleAssembler &) = delete; + virtual ~SimpleAssembler(); + + // Parse the input stream as assembly. + absl::Status Parse(std::istream &is); + // Add the symbol to the symbol table for the current section. See ELFIO + // documentation for details of the meaning of the parameters. + absl::Status AddSymbolToCurrentSection(const std::string &name, + ELFIO::Elf64_Addr value, + ELFIO::Elf_Xword size, uint8_t type, + uint8_t binding, uint8_t other); + // Add the symbol to the symbol table for the named section. See ELFIO + // documentation for details of the meaning of the parameters. + absl::Status AddSymbol(const std::string &name, ELFIO::Elf64_Addr value, + ELFIO::Elf_Xword size, uint8_t type, uint8_t binding, + uint8_t other, const std::string §ion_name); + // Create executable ELF file with the given value as the entry point. + // The text segment will be laid out starting at base address, followed by + // the data segment. + absl::Status CreateExecutable(uint64_t base_address, + const std::string &entry_point); + absl::Status CreateExecutable(uint64_t base_address, uint64_t entry_point); + // Create a relocatable ELF file. + absl::Status CreateRelocatable(); + // Write the ELF file to the given output stream. + absl::Status Write(std::ostream &os); + // Access the ELF writer. + ELFIO::elfio &writer() { return writer_; } + + private: + // Helper function to update the symbol table entries. + template <typename SymbolType> + void UpdateSymbolsForExecutable(uint64_t text_segment_start, + uint64_t data_segment_start, + uint64_t bss_segment_start); + template <typename SymbolType> + void UpdateSymbolsForRelocatable(); + template <typename SymbolType> + void UpdateSymtabHeaderInfo(); + // Perform second pass of parsing. + absl::Status ParsePassTwo(std::vector<RelocationInfo> &relo_vector); + // Parse and process an assembly directive. + absl::Status ParseAsmDirective(absl::string_view line, uint64_t address, + ResolverInterface *resolver, + std::vector<uint8_t> &byte_values, + std::vector<RelocationInfo> &relocations); + // Parse and process and assembly statement. + absl::Status ParseAsmStatement(absl::string_view line, uint64_t address, + ResolverInterface *resolver, + std::vector<uint8_t> &byte_values, + std::vector<RelocationInfo> &relocations); + // Add the symbol to the symbol table. + absl::Status AddSymbol(const std::string &name, ELFIO::Elf64_Addr value, + ELFIO::Elf_Xword size, uint8_t type, uint8_t binding, + uint8_t other, ELFIO::section *section); + // Add a symbol reference to the symbol table if it is not already defined. + void SimpleAddSymbol(absl::string_view name); + // Append the data to the current section. + absl::Status AppendData(const char *data, size_t size); + + // Set the the given section as the current section. Create if it has not + // already been created. + void SetTextSection(const std::string &name); + void SetDataSection(const std::string &name); + void SetBssSection(const std::string &name); + + // ELF file class. + int elf_file_class_ = 0; + // Elf file top level object. + ELFIO::elfio writer_; + // The current section being processed. + ELFIO::section *current_section_ = nullptr; + // Map from section index to section pointer. + absl::flat_hash_map<uint16_t, ELFIO::section *> section_index_map_; + // Interface used to parse and encode assembly statements. + OpcodeAssemblerInterface *opcode_assembler_if_ = nullptr; + // Interface used to access strings in the string table. + ELFIO::string_section_accessor *string_accessor_ = nullptr; + // Interface used to access symbols in the symbol table. + ELFIO::symbol_section_accessor *symbol_accessor_ = nullptr; + // ELF symbol table section. + ELFIO::section *symtab_ = nullptr; + // Elf string table section. + ELFIO::section *strtab_ = nullptr; + // Map that tracks the current address of each section. + absl::flat_hash_map<ELFIO::section *, uint64_t> section_address_map_; + + // Current symbol resolver (looks up symbols in the symbol table and returns + // their values). + ResolverInterface *symbol_resolver_ = nullptr; + std::vector<std::string> lines_; + // Section pointers. + ELFIO::section *text_section_ = nullptr; + ELFIO::section *data_section_ = nullptr; + ELFIO::section *bss_section_ = nullptr; + // Regular expressions used to parse the assembly source. + RE2 comment_re_; + RE2 asm_line_re_; + RE2 directive_re_; + // Set of symbol names declared as global. + absl::flat_hash_set<std::string> global_symbols_; + // Map from symbol name to symbol index in the symbol table. + absl::flat_hash_map<std::string, ELFIO::Elf_Word> symbol_indices_; + // Set of undefined symbols. + absl::flat_hash_set<std::string> undefined_symbols_; +}; + +} // namespace assembler +} // namespace util +} // namespace sim +} // namespace mpact + +#endif // MPACT_SIM_UTIL_ASM_SIMPLE_ASSEMBLER_H_
diff --git a/mpact/sim/util/asm/test/BUILD b/mpact/sim/util/asm/test/BUILD new file mode 100644 index 0000000..3c5e77a --- /dev/null +++ b/mpact/sim/util/asm/test/BUILD
@@ -0,0 +1,109 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This file contains the build rules for tests related to assembler related libraries. + +load("//mpact/sim/decoder:mpact_sim_isa.bzl", "mpact_bin_fmt_decoder", "mpact_isa_decoder") + +package( + default_applicable_licenses = ["//:license"], + default_visibility = ["//visibility:public"], +) + +cc_library( + name = "riscv64x_instructions", + testonly = True, + srcs = ["riscv64x_instructions.cc"], + hdrs = ["riscv64x_instructions.h"], + deps = [ + "//mpact/sim/generic:instruction", + ], +) + +mpact_isa_decoder( + name = "riscv64x_isa", + testonly = True, + src = "riscv64x.isa", + includes = [], + isa_name = "RiscV64X", + deps = [ + ":riscv64x_instructions", + ], +) + +mpact_bin_fmt_decoder( + name = "riscv64x_bin_fmt", + testonly = True, + src = "riscv64x.bin_fmt", + decoder_name = "RiscV64X", + includes = [ + ], + prefix = "riscv64x", + deps = [ + ":riscv64x_isa", + ], +) + +cc_library( + name = "riscv64x_encoder", + testonly = True, + srcs = [ + "riscv64x_bin_encoder_interface.cc", + "riscv_bin_setters.cc", + ], + hdrs = [ + "riscv64x_bin_encoder_interface.h", + "riscv_bin_setters.h", + "riscv_getter_helpers.h", + ], + deps = [ + ":riscv64x_bin_fmt", + ":riscv64x_isa", + "//mpact/sim/generic:type_helpers", + "//mpact/sim/util/asm", + "@com_google_absl//absl/base:no_destructor", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/functional:any_invocable", + "@com_google_absl//absl/log", + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/strings", + "@com_googlesource_code_re2//:re2", + ], +) + +cc_test( + name = "riscv64x_asm_test", + size = "small", + srcs = ["riscv64x_asm_test.cc"], + deps = [ + ":riscv64x_bin_fmt", + ":riscv64x_encoder", + ":riscv64x_isa", + "//mpact/sim/util/asm", + "//mpact/sim/util/asm:simple_assembler", + "@com_github_serge1_elfio//:elfio", + "@com_google_absl//absl/base:no_destructor", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/log", + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:span", + "@com_google_googletest//:gtest_main", + "@com_googlesource_code_re2//:re2", + ], +)
diff --git a/mpact/sim/util/asm/test/riscv64x.bin_fmt b/mpact/sim/util/asm/test/riscv64x.bin_fmt new file mode 100644 index 0000000..84ee048 --- /dev/null +++ b/mpact/sim/util/asm/test/riscv64x.bin_fmt
@@ -0,0 +1,108 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// RiscV 64 bit G instruction decoder for testing assembler. +decoder RiscV64X { + opcode_enum = "isa64::OpcodeEnum"; + includes { + #include "mpact/sim/util/asm/test/riscv64x_decoder.h" + } + namespace mpact::sim::riscv::encoding64; + RiscVXInst32; +}; + +format Inst32Format[32] { + fields: + unsigned bits[25]; + unsigned opcode[7]; +}; + +format RType[32] : Inst32Format { + fields: + unsigned func7[7]; + unsigned rs2[5]; + unsigned rs1[5]; + unsigned func3[3]; + unsigned rd[5]; + unsigned opcode[7]; + overlays: + unsigned r_uimm5[5] = rs2; +}; + +// Format for shift immediate for RV64, note 6 bit immediate. +format RSType[32] : Inst32Format { + fields: + unsigned func6[6]; + unsigned r_uimm6[6]; + unsigned rs1[5]; + unsigned func3[3]; + unsigned rd[5]; + unsigned opcode[7]; +}; + +format IType[32] : Inst32Format { + fields: + signed imm12[12]; + unsigned rs1[5]; + unsigned func3[3]; + unsigned rd[5]; + unsigned opcode[7]; + overlays: + unsigned u_imm12[12] = imm12; + unsigned i_uimm5[5] = rs1; +}; + +format SType[32] : Inst32Format { + fields: + unsigned imm7[7]; + unsigned rs2[5]; + unsigned rs1[5]; + unsigned func3[3]; + unsigned imm5[5]; + unsigned opcode[7]; + overlays: + signed s_imm[12] = imm7, imm5; +}; + + +format UType[32] : Inst32Format { + fields: + unsigned imm20[20]; + unsigned rd[5]; + unsigned opcode[7]; + overlays: + unsigned u_imm[32] = imm20, 0b0000'0000'0000; + signed s_imm[32] = imm20, 0b0000'0000'0000; +}; + +format JType[32] : Inst32Format { + fields: + signed imm20[20]; + unsigned rd[5]; + unsigned opcode[7]; + overlays: + signed j_imm[21] = imm20[19, 7..0, 8, 18..9], 0b0; +}; + +instruction group RiscVXInst32[32] : Inst32Format { + addi : IType : func3 == 0b000, opcode == 0b001'0011; + lui : UType : opcode == 0b011'0111; + sd : SType : func3 == 0b011, opcode == 0b010'0011; + jal : JType : rd != 0, opcode == 0b110'1111; + j : JType : rd == 0, opcode == 0b110'1111; + slli : RSType : func6 == 0b000'000, func3==0b001, opcode == 0b001'0011; + ebreak : Inst32Format : bits == 0b0000'0000'0001'00000'000'00000, opcode == 0b111'0011; + srai : RSType : func6 == 0b010'000, func3==0b101, opcode == 0b001'0011; + jr : IType : rd == 0, func3 == 0b000, opcode == 0b110'0111; +};
diff --git a/mpact/sim/util/asm/test/riscv64x.isa b/mpact/sim/util/asm/test/riscv64x.isa new file mode 100644 index 0000000..b93fc56 --- /dev/null +++ b/mpact/sim/util/asm/test/riscv64x.isa
@@ -0,0 +1,58 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file contains the subset of the ISA description for the RiscV64G architecture for +// the purpose of generating a simple "subset" assembler for testing + +// First disasm field is 18 char wide and left justified. +disasm widths = {-18}; + +int global_latency = 1; + +isa RiscV64X { + namespace mpact::sim::riscv::isa64; + slots { riscv64x; } +} + +// Minimal set of instructions for hello world. +slot riscv64x { + includes { + #include "mpact/sim/util/asm/test/riscv64x_instructions.h" + } + default size = 4; + default latency = global_latency; + default opcode = + disasm: "Illegal instruction at %(@:08x)", + semfunc: "&RiscVIllegalInstruction"; + opcodes { + addi{: rs1, %reloc(I_imm12) : rd}, + disasm: "addi", "%rd, %rs1, %I_imm12"; + lui{: %reloc(U_imm20) : rd}, + disasm: "lui", "%rd, %(U_imm20:08x)"; + sd{: rs1, %reloc(S_imm12), rs2 : }, + disasm: "sd", "%rs2, %S_imm12(%rs1)"; + jal{: %reloc(J_imm20) : next_pc, rd}, + disasm: "jal", "%rd, %(@+J_imm20:08x)"; + j{: %reloc(J_imm20) : next_pc, rd}, + disasm: "j", "%(@+J_imm20:08x)"; + slli{: rs1, I_uimm6 : rd}, + disasm: "slli", "%rd, %rs1, %(I_uimm6:x)"; + ebreak{}, + disasm: "ebreak"; + srai{: rs1, I_uimm6 : rd}, + disasm: "srai", "%rd, %rs1, %(I_uimm6:x)"; + jr{: rs1, %reloc(J_imm12) : next_pc, rd}, + disasm: "jr", "%rs1, %(J_imm12:08x)"; + } +}
diff --git a/mpact/sim/util/asm/test/riscv64x_asm_test.cc b/mpact/sim/util/asm/test/riscv64x_asm_test.cc new file mode 100644 index 0000000..91d9d93 --- /dev/null +++ b/mpact/sim/util/asm/test/riscv64x_asm_test.cc
@@ -0,0 +1,373 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <cstdint> +#include <sstream> +#include <string> +#include <vector> + +#include "absl/base/no_destructor.h" +#include "absl/container/flat_hash_map.h" +#include "absl/log/check.h" +#include "absl/status/status.h" +#include "absl/strings/string_view.h" +#include "absl/types/span.h" +#include "elfio/elf_types.hpp" +#include "elfio/elfio.hpp" +#include "elfio/elfio_strings.hpp" +#include "elfio/elfio_symbols.hpp" +#include "googlemock/include/gmock/gmock.h" // IWYU pragma: keep +#include "googletest/include/gtest/gtest.h" +#include "mpact/sim/util/asm/opcode_assembler_interface.h" +#include "mpact/sim/util/asm/resolver_interface.h" +#include "mpact/sim/util/asm/simple_assembler.h" +#include "mpact/sim/util/asm/test/riscv64x_bin_encoder_interface.h" +#include "mpact/sim/util/asm/test/riscv64x_encoder.h" +#include "re2/re2.h" + +// This file contains tests for the simple assembler using a very reduced +// subset of the RISC-V ISA. + +namespace { + +using ::mpact::sim::riscv::isa64::RiscV64XBinEncoderInterface; +using ::mpact::sim::riscv::isa64::Riscv64xSlotMatcher; +using ::mpact::sim::util::assembler::OpcodeAssemblerInterface; +using ::mpact::sim::util::assembler::RelocationInfo; +using ::mpact::sim::util::assembler::ResolverInterface; +using ::mpact::sim::util::assembler::SimpleAssembler; + +// This class implements the OpcodeAssemblerInterface using the slot matcher. +class RiscV64XAssembler : public OpcodeAssemblerInterface { + public: + RiscV64XAssembler(Riscv64xSlotMatcher* matcher) + : label_re_("^(\\S+)\\s*:"), matcher_(matcher) {}; + ~RiscV64XAssembler() override = default; + absl::Status Encode(uint64_t address, absl::string_view text, + AddSymbolCallback add_symbol_callback, + ResolverInterface* resolver, std::vector<uint8_t>& bytes, + std::vector<RelocationInfo>& relocations) override { + // First check to see if there is a label, if so, add it to the symbol table + // with the current address. + std::string label; + if (RE2::Consume(&text, label_re_, &label)) { + auto status = add_symbol_callback(label, address, 0, STT_NOTYPE, 0, 0); + if (!status.ok()) return status; + } + // Call the slot matcher to get the encoded value. + auto res = matcher_->Encode(address, text, 0, resolver, relocations); + if (!res.status().ok()) return res.status(); + // Convert the value to a byte array. + auto [value, size] = res.value(); + union { + uint64_t i; + uint8_t b[sizeof(uint64_t)]; + } u; + u.i = value; + for (int i = 0; i < size / 8; ++i) { + bytes.push_back(u.b[i]); + } + return absl::OkStatus(); + } + + private: + RE2 label_re_; + Riscv64xSlotMatcher* matcher_; +}; + +// Sample assembly code. +absl::NoDestructor<std::string> kTestAssembly(R"( +; text section + .text + .global main +main: + addi a0, zero, 5 + lui a1, %hi(semihost_param) + addi a1, a1, %lo(semihost_param) + addi t0, zero, 2 + sd t0, 0(a1) + lui t2, %hi(hello) + addi t2, t2, %lo(hello) + sd t2, 8(a1) + addi t0, zero, 12 + sd t0, 0x10(a1) + jal ra, semihost + ; now exit + addi a0, zero, 24 + lui t0, 0x20026 + addi t0, t0, 0x20026 + sd t0, 0(a1) + jal ra, semihost +exit: + j exit + +semihost: + slli zero, zero, 0x1f + ebreak + srai zero, zero, 7 + jr ra, 0 + +; data section + + .data + .global hello +hello: + .cstring "Hello World\n" + .char '\n' + +; bss + + .bss + .global tohost +tohost: + .space 16 +semihost_param: + .space 16 +)"); + +// Test fixture. It creates the assembler and parses the assembly code. +class RiscV64XAssemblerTest : public ::testing::Test { + protected: + RiscV64XAssemblerTest() + : matcher_(&bin_encoder_interface_), riscv_64x_assembler_(&matcher_) { + CHECK_OK(matcher_.Initialize()); + // Create the assembler. + assembler_ = new SimpleAssembler(";", ELFCLASS64, &riscv_64x_assembler_); + assembler_->writer().set_os_abi(ELFOSABI_LINUX); + assembler_->writer().set_machine(EM_RISCV); + std::istringstream source(*kTestAssembly); + // Parse the assembly code. + auto status = assembler_->Parse(source); + CHECK_OK(status) << status.message(); + } + + ~RiscV64XAssemblerTest() override { delete assembler_; } + + // Access the ELF writer. + ELFIO::elfio& elf() { return assembler_->writer(); } + SimpleAssembler* assembler() const { return assembler_; } + + private: + RiscV64XBinEncoderInterface bin_encoder_interface_; + Riscv64xSlotMatcher matcher_; + RiscV64XAssembler riscv_64x_assembler_; + SimpleAssembler* assembler_; +}; + +// Test that the expected sections are present. +TEST_F(RiscV64XAssemblerTest, Sections) { + auto sections = elf().sections; + // Null section and the 6 sections listed below. + EXPECT_EQ(sections.size(), 7); + EXPECT_NE(sections[".text"], nullptr); + EXPECT_NE(sections[".data"], nullptr); + EXPECT_NE(sections[".bss"], nullptr); + EXPECT_NE(sections[".shstrtab"], nullptr); + EXPECT_NE(sections[".strtab"], nullptr); + EXPECT_NE(sections[".symtab"], nullptr); +} + +// Verify that the information about the text section is as expected. +TEST_F(RiscV64XAssemblerTest, Text) { + auto status = assembler()->CreateExecutable(0x1000, "main"); + CHECK_OK(status) << status.message(); + auto* text = elf().sections[".text"]; + EXPECT_EQ(text->get_type(), SHT_PROGBITS); + EXPECT_EQ(text->get_flags(), SHF_ALLOC | SHF_EXECINSTR); + EXPECT_EQ(text->get_link(), SHN_UNDEF); + EXPECT_EQ(text->get_size(), /*num inst*/ 21 * /*bytes per inst*/ 4); +} + +TEST_F(RiscV64XAssemblerTest, Data) { + auto status = assembler()->CreateExecutable(0x1000, "main"); + CHECK_OK(status) << status.message(); + auto* data = elf().sections[".data"]; + EXPECT_EQ(data->get_type(), SHT_PROGBITS); + EXPECT_EQ(data->get_flags(), SHF_ALLOC | SHF_WRITE); + EXPECT_EQ(data->get_link(), SHN_UNDEF); + // Hello world is 12 bytes, plus the null terminator. + // Add one .char declaration. + EXPECT_EQ(data->get_size(), 14); +} + +TEST_F(RiscV64XAssemblerTest, Bss) { + auto status = assembler()->CreateExecutable(0x1000, "main"); + CHECK_OK(status) << status.message(); + auto* bss = elf().sections[".bss"]; + EXPECT_EQ(bss->get_type(), SHT_NOBITS); + EXPECT_EQ(bss->get_flags(), SHF_ALLOC | SHF_WRITE); + EXPECT_EQ(bss->get_link(), SHN_UNDEF); + // Two .space declarations, each 16 bytes. + EXPECT_EQ(bss->get_size(), 32); +} + +TEST_F(RiscV64XAssemblerTest, RelocatableSymbols) { + auto status = assembler()->CreateRelocatable(); + CHECK_OK(status) << status.message(); + auto* symtab = elf().sections[".symtab"]; + ELFIO::symbol_section_accessor symbols(elf(), symtab); + ELFIO::Elf64_Addr value; + ELFIO::Elf_Xword size; + unsigned char bind; + unsigned char type; + ELFIO::Elf_Half section_index; + unsigned char other; + int num_symbols = symtab->get_size() / sizeof(ELFIO::Elf64_Sym); + auto symspan = absl::MakeSpan( + reinterpret_cast<const ELFIO::Elf64_Sym*>(symtab->get_data()), + num_symbols); + absl::flat_hash_map<std::string, int> symbol_map; + auto* string_accessor = + new ELFIO::string_section_accessor(elf().sections[".strtab"]); + for (int i = 0; i < num_symbols; ++i) { + auto name = string_accessor->get_string(symspan[i].st_name); + symbol_map.insert({name, i}); + } + // Verify that main is valued 0x0, global and located in the text section. + symbols.get_symbol("main", value, size, bind, type, section_index, other); + auto* sym = &symspan[symbol_map["main"]]; + EXPECT_EQ(sym->st_value, 0x0); + EXPECT_EQ(ELF_ST_BIND(sym->st_info), STB_GLOBAL); + EXPECT_EQ(sym->st_shndx, elf().sections[".text"]->get_index()); + EXPECT_EQ(ELF_ST_TYPE(sym->st_info), STT_NOTYPE); + // Verify that exit is valued 16 * 4, local and located in the text + // section. + sym = &symspan[symbol_map["exit"]]; + EXPECT_EQ(sym->st_value, 16 * 4); + EXPECT_EQ(ELF_ST_BIND(sym->st_info), STB_LOCAL); + EXPECT_EQ(sym->st_shndx, elf().sections[".text"]->get_index()); + EXPECT_EQ(ELF_ST_TYPE(sym->st_info), STT_NOTYPE); + // Verify that hello is global and located in the data section at 0x2000. + symbols.get_symbol("hello", value, size, bind, type, section_index, other); + sym = &symspan[symbol_map["hello"]]; + EXPECT_EQ(sym->st_value, 0); + EXPECT_EQ(sym->st_shndx, elf().sections[".data"]->get_index()); + EXPECT_EQ(ELF_ST_BIND(sym->st_info), STB_GLOBAL); + EXPECT_EQ(ELF_ST_TYPE(sym->st_info), STT_NOTYPE); + // Verify that semihost_param is global and located in the bss section at + // 16 bytes. + sym = &symspan[symbol_map["semihost_param"]]; + EXPECT_EQ(sym->st_value, 16); + EXPECT_EQ(sym->st_shndx, elf().sections[".bss"]->get_index()); + EXPECT_EQ(ELF_ST_BIND(sym->st_info), STB_LOCAL); + EXPECT_EQ(ELF_ST_TYPE(sym->st_info), STT_NOTYPE); + delete string_accessor; +} + +TEST_F(RiscV64XAssemblerTest, ExecutableSymbols) { + auto status = assembler()->CreateExecutable(0x1000, "main"); + CHECK_OK(status) << status.message(); + auto* symtab = elf().sections[".symtab"]; + ELFIO::symbol_section_accessor symbols(elf(), symtab); + ELFIO::Elf64_Addr value; + ELFIO::Elf_Xword size; + unsigned char bind; + unsigned char type; + ELFIO::Elf_Half section_index; + unsigned char other; + // Verify that main is valued 0x1000, global and located in the text section. + symbols.get_symbol("main", value, size, bind, type, section_index, other); + EXPECT_EQ(value, 0x1000); + EXPECT_EQ(section_index, elf().sections[".text"]->get_index()); + EXPECT_EQ(type, STT_NOTYPE); + // Verify that exit is valued 0x1000 + 16 * 4, local and located in the text + // section. + symbols.get_symbol("exit", value, size, bind, type, section_index, other); + EXPECT_EQ(value, 0x1000 + 16 * 4); + EXPECT_EQ(bind, STB_LOCAL); + EXPECT_EQ(section_index, elf().sections[".text"]->get_index()); + EXPECT_EQ(type, STT_NOTYPE); + // Verify that hello is global and located in the data section at 0x2000. + symbols.get_symbol("hello", value, size, bind, type, section_index, other); + EXPECT_EQ(value, 0x2000); + EXPECT_EQ(section_index, elf().sections[".data"]->get_index()); + EXPECT_EQ(bind, STB_GLOBAL); + EXPECT_EQ(type, STT_NOTYPE); + // Verify that semihost_param is global and located in the bss section at + // 0x2000 + 14 + alignment to 16 byte boundary, plus 16 bytes. + symbols.get_symbol("semihost_param", value, size, bind, type, section_index, + other); + EXPECT_EQ(value, 0x2000 + 16 + 16); + EXPECT_EQ(section_index, elf().sections[".bss"]->get_index()); + EXPECT_EQ(bind, STB_LOCAL); + EXPECT_EQ(type, STT_NOTYPE); +} + +// Verify that the first 16 instructions were assembled correctly. +TEST_F(RiscV64XAssemblerTest, ExecutableTextContent) { + auto status = assembler()->CreateExecutable(0x1000, "main"); + CHECK_OK(status) << status.message(); + auto* text = elf().sections[".text"]; + auto* data = text->get_data(); + auto* word_data = reinterpret_cast<const uint32_t*>(data); + // Verify the first 16 instructions. + EXPECT_EQ(word_data[0], 0x00500513); // addi a0, zero, 5 + EXPECT_EQ(word_data[1], 0x000025b7); // lui a1, semihost_param + EXPECT_EQ(word_data[2], 0x02058593); // addi a1, a1, semihost_param + EXPECT_EQ(word_data[3], 0x00200293); // addi t0, zero, 2 + EXPECT_EQ(word_data[4], 0x0055b023); // sd t0, 0(a1) + EXPECT_EQ(word_data[5], 0x000023b7); // lui t2, hello + EXPECT_EQ(word_data[6], 0x00038393); // addi t2, t2, hello + EXPECT_EQ(word_data[7], 0x0075b423); // sd t2, 8(a1) + EXPECT_EQ(word_data[8], 0x00c00293); // addi t0, zero, 12 + EXPECT_EQ(word_data[9], 0x0055b823); // sd t0, 0x10(a1) + EXPECT_EQ(word_data[10], 0x01c000ef); // jal ra, semihost + EXPECT_EQ(word_data[11], 0x01800513); // addi a0, zero, 24 + EXPECT_EQ(word_data[12], 0x000202b7); // lui t0, 0x20026 + EXPECT_EQ(word_data[13], 0x02628293); // addi t0, t0, 0x20026 + EXPECT_EQ(word_data[14], 0x0055b023); // sd t0, 0(a1) + EXPECT_EQ(word_data[15], 0x008000ef); // jal ra, semihost +} + +// Verify that the first 16 instructions were assembled correctly. +TEST_F(RiscV64XAssemblerTest, RelocatableTextContent) { + auto status = assembler()->CreateRelocatable(); + CHECK_OK(status) << status.message(); + auto* text = elf().sections[".text"]; + auto* data = text->get_data(); + auto* word_data = reinterpret_cast<const uint32_t*>(data); + // Verify the first 16 instructions. These will be slightly different from + // the executable version since the symbol values are not relocated to their + // final memory values. + EXPECT_EQ(word_data[0], 0x00500513); // addi a0, zero, 5 + EXPECT_EQ(word_data[1], 0x000005b7); // lui a1, semihost_param + EXPECT_EQ(word_data[2], 0x01058593); // addi a1, a1, semihost_param + EXPECT_EQ(word_data[3], 0x00200293); // addi t0, zero, 2 + EXPECT_EQ(word_data[4], 0x0055b023); // sd t0, 0(a1) + EXPECT_EQ(word_data[5], 0x000003b7); // lui t2, hello + EXPECT_EQ(word_data[6], 0x00038393); // addi t2, t2, hello + EXPECT_EQ(word_data[7], 0x0075b423); // sd t2, 8(a1) + EXPECT_EQ(word_data[8], 0x00c00293); // addi t0, zero, 12 + EXPECT_EQ(word_data[9], 0x0055b823); // sd t0, 0x10(a1) + EXPECT_EQ(word_data[10], 0x01c000ef); // jal ra, semihost + EXPECT_EQ(word_data[11], 0x01800513); // addi a0, zero, 24 + EXPECT_EQ(word_data[12], 0x000202b7); // lui t0, 0x20026 + EXPECT_EQ(word_data[13], 0x02628293); // addi t0, t0, 0x20026 + EXPECT_EQ(word_data[14], 0x0055b023); // sd t0, 0(a1) + EXPECT_EQ(word_data[15], 0x008000ef); // jal ra, semihost +} + +TEST_F(RiscV64XAssemblerTest, TextRelocations) { + auto status = assembler()->CreateRelocatable(); + CHECK_OK(status) << status.message(); + auto* rela_section = elf().sections[".rela.text"]; + EXPECT_NE(rela_section, nullptr); + auto* rela_data = rela_section->get_data(); + auto rela = + absl::MakeSpan(reinterpret_cast<const ELFIO::Elf64_Rela*>(rela_data), + rela_section->get_size() / sizeof(ELFIO::Elf64_Rela)); + EXPECT_EQ(rela.size(), 4); +} + +} // namespace
diff --git a/mpact/sim/util/asm/test/riscv64x_bin_encoder_interface.cc b/mpact/sim/util/asm/test/riscv64x_bin_encoder_interface.cc new file mode 100644 index 0000000..9f45dbf --- /dev/null +++ b/mpact/sim/util/asm/test/riscv64x_bin_encoder_interface.cc
@@ -0,0 +1,135 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mpact/sim/util/asm/test/riscv64x_bin_encoder_interface.h" + +#include <cstdint> +#include <tuple> +#include <vector> + +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" +#include "mpact/sim/generic/type_helpers.h" +#include "mpact/sim/util/asm/resolver_interface.h" +#include "mpact/sim/util/asm/test/riscv64x_bin_encoder.h" +#include "mpact/sim/util/asm/test/riscv64x_encoder.h" +#include "mpact/sim/util/asm/test/riscv64x_enums.h" +#include "mpact/sim/util/asm/test/riscv_bin_setters.h" + +namespace mpact { +namespace sim { +namespace riscv { +namespace isa64 { + +using ::mpact::sim::generic::operator*; // NOLINT(misc-unused-using-decls) +using ::mpact::sim::util::assembler::ResolverInterface; + +RiscV64XBinEncoderInterface::RiscV64XBinEncoderInterface() { + AddRiscvSourceOpBinSetters<SourceOpEnum, OpMap, encoding64::Encoder>( + source_op_map_); + AddRiscvDestOpBinSetters<DestOpEnum, OpMap, encoding64::Encoder>( + dest_op_map_); + AddRiscvSourceOpRelocationSetters<OpcodeEnum, SourceOpEnum, RelocationMap>( + relocation_source_op_map_); +} + +absl::StatusOr<std::tuple<uint64_t, int>> +RiscV64XBinEncoderInterface::GetOpcodeEncoding(SlotEnum slot, int entry, + OpcodeEnum opcode, + ResolverInterface *resolver) { + return encoding64::kOpcodeEncodings->at(opcode); +} + +absl::StatusOr<uint64_t> RiscV64XBinEncoderInterface::GetSrcOpEncoding( + uint64_t address, absl::string_view text, SlotEnum slot, int entry, + OpcodeEnum opcode, SourceOpEnum source_op, int source_num, + ResolverInterface *resolver) { + auto iter = source_op_map_.find(*source_op); + if (iter == source_op_map_.end()) { + return absl::NotFoundError(absl::StrCat( + "Source operand not found for op enum value ", *source_op)); + } + return iter->second(address, text, resolver); +} + +absl::Status RiscV64XBinEncoderInterface::AppendSrcOpRelocation( + uint64_t address, absl::string_view text, SlotEnum slot, int entry, + OpcodeEnum opcode, SourceOpEnum source_op, int source_num, + ResolverInterface *resolver, std::vector<RelocationInfo> &relocations) { + auto iter = relocation_source_op_map_.find(std::tie(opcode, source_op)); + if (iter == relocation_source_op_map_.end()) return absl::OkStatus(); + return iter->second(address, text, resolver, relocations); +} + +absl::StatusOr<uint64_t> RiscV64XBinEncoderInterface::GetDestOpEncoding( + uint64_t address, absl::string_view text, SlotEnum slot, int entry, + OpcodeEnum opcode, DestOpEnum dest_op, int dest_num, + ResolverInterface *resolver) { + auto iter = dest_op_map_.find(*dest_op); + if (iter == dest_op_map_.end()) { + return absl::NotFoundError( + absl::StrCat("Dest operand not found for op enum value ", *dest_op)); + } + return iter->second(address, text, resolver); +} + +absl::Status RiscV64XBinEncoderInterface::AppendDestOpRelocation( + uint64_t address, absl::string_view text, SlotEnum slot, int entry, + OpcodeEnum opcode, DestOpEnum dest_op, int dest_num, + ResolverInterface *resolver, std::vector<RelocationInfo> &relocations) { + // There are no destination operands that require relocation. + return absl::OkStatus(); +} + +absl::StatusOr<uint64_t> RiscV64XBinEncoderInterface::GetListDestOpEncoding( + uint64_t address, absl::string_view text, SlotEnum slot, int entry, + OpcodeEnum opcode, ListDestOpEnum dest_op, int dest_num, + ResolverInterface *resolver) { + auto iter = list_dest_op_map_.find(*dest_op); + if (iter == list_dest_op_map_.end()) { + return absl::NotFoundError(absl::StrCat( + "List dest operand not found for op enum value ", *dest_op)); + } + return iter->second(address, text, resolver); +} + +absl::StatusOr<uint64_t> RiscV64XBinEncoderInterface::GetListSrcOpEncoding( + uint64_t address, absl::string_view text, SlotEnum slot, int entry, + OpcodeEnum opcode, ListSourceOpEnum source_op, int source_num, + ResolverInterface *resolver) { + auto iter = list_source_op_map_.find(*source_op); + if (iter == list_source_op_map_.end()) { + return absl::NotFoundError(absl::StrCat( + "List source operand not found for op enum value ", *source_op)); + } + return iter->second(address, text, resolver); +} + +absl::StatusOr<uint64_t> RiscV64XBinEncoderInterface::GetPredOpEncoding( + uint64_t address, absl::string_view text, SlotEnum slot, int entry, + OpcodeEnum opcode, PredOpEnum pred_op, ResolverInterface *resolver) { + auto iter = pred_op_map_.find(*pred_op); + if (iter == pred_op_map_.end()) { + return absl::NotFoundError(absl::StrCat( + "Predicate operand not found for op enum value ", *pred_op)); + } + return iter->second(address, text, resolver); +} + +} // namespace isa64 +} // namespace riscv +} // namespace sim +} // namespace mpact
diff --git a/mpact/sim/util/asm/test/riscv64x_bin_encoder_interface.h b/mpact/sim/util/asm/test/riscv64x_bin_encoder_interface.h new file mode 100644 index 0000000..4c1d341 --- /dev/null +++ b/mpact/sim/util/asm/test/riscv64x_bin_encoder_interface.h
@@ -0,0 +1,103 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MPACT_SIM_UTIL_ASM_TEST_RISCV64X_BIN_ENCODER_INTERFACE_H_ +#define MPACT_SIM_UTIL_ASM_TEST_RISCV64X_BIN_ENCODER_INTERFACE_H_ + +#include <cstdint> +#include <functional> +#include <tuple> +#include <vector> + +#include "absl/container/flat_hash_map.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "absl/strings/string_view.h" +#include "mpact/sim/util/asm/resolver_interface.h" +#include "mpact/sim/util/asm/test/riscv64x_encoder.h" +#include "mpact/sim/util/asm/test/riscv64x_enums.h" + +namespace mpact { +namespace sim { +namespace riscv { +namespace isa64 { + +using ::mpact::sim::util::assembler::ResolverInterface; + +class RiscV64XBinEncoderInterface : public RiscV64XEncoderInterfaceBase { + public: + RiscV64XBinEncoderInterface(); + RiscV64XBinEncoderInterface(const RiscV64XBinEncoderInterface &) = delete; + RiscV64XBinEncoderInterface &operator=(const RiscV64XBinEncoderInterface &) = + delete; + ~RiscV64XBinEncoderInterface() override = default; + + absl::StatusOr<std::tuple<uint64_t, int>> GetOpcodeEncoding( + SlotEnum slot, int entry, OpcodeEnum opcode, + ResolverInterface *resolver) override; + absl::StatusOr<uint64_t> GetSrcOpEncoding( + uint64_t address, absl::string_view text, SlotEnum slot, int entry, + OpcodeEnum opcode, SourceOpEnum source_op, int source_num, + ResolverInterface *resolver) override; + absl::Status AppendSrcOpRelocation( + uint64_t address, absl::string_view text, SlotEnum slot, int entry, + OpcodeEnum opcode, SourceOpEnum source_op, int source_num, + ResolverInterface *resolver, + std::vector<RelocationInfo> &relocations) override; + absl::StatusOr<uint64_t> GetDestOpEncoding( + uint64_t address, absl::string_view text, SlotEnum slot, int entry, + OpcodeEnum opcode, DestOpEnum dest_op, int dest_num, + ResolverInterface *resolver) override; + absl::Status AppendDestOpRelocation( + uint64_t address, absl::string_view text, SlotEnum slot, int entry, + OpcodeEnum opcode, DestOpEnum dest_op, int dest_num, + ResolverInterface *resolver, + std::vector<RelocationInfo> &relocations) override; + absl::StatusOr<uint64_t> GetListDestOpEncoding( + uint64_t address, absl::string_view text, SlotEnum slot, int entry, + OpcodeEnum opcode, ListDestOpEnum dest_op, int dest_num, + ResolverInterface *resolver) override; + absl::StatusOr<uint64_t> GetListSrcOpEncoding( + uint64_t address, absl::string_view text, SlotEnum slot, int entry, + OpcodeEnum opcode, ListSourceOpEnum source_op, int source_num, + ResolverInterface *resolver) override; + absl::StatusOr<uint64_t> GetPredOpEncoding( + uint64_t address, absl::string_view text, SlotEnum slot, int entry, + OpcodeEnum opcode, PredOpEnum pred_op, + ResolverInterface *resolver) override; + + private: + using OpMap = absl::flat_hash_map< + int, std::function<absl::StatusOr<uint64_t>(uint64_t, absl::string_view, + ResolverInterface *)>>; + using RelocationMap = + absl::flat_hash_map<std::tuple<OpcodeEnum, SourceOpEnum>, + std::function<absl::Status( + uint64_t, absl::string_view, ResolverInterface *, + std::vector<RelocationInfo> &)>>; + + OpMap source_op_map_; + RelocationMap relocation_source_op_map_; + OpMap dest_op_map_; + OpMap list_dest_op_map_; + OpMap list_source_op_map_; + OpMap pred_op_map_; +}; + +} // namespace isa64 +} // namespace riscv +} // namespace sim +} // namespace mpact + +#endif // MPACT_SIM_UTIL_ASM_TEST_RISCV64X_BIN_ENCODER_INTERFACE_H_
diff --git a/mpact/sim/util/asm/test/riscv64x_instructions.cc b/mpact/sim/util/asm/test/riscv64x_instructions.cc new file mode 100644 index 0000000..44990e6 --- /dev/null +++ b/mpact/sim/util/asm/test/riscv64x_instructions.cc
@@ -0,0 +1,27 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mpact/sim/util/asm/test/riscv64x_instructions.h" + +#include <iostream> + +#include "mpact/sim/generic/instruction.h" + +namespace mpact::sim::riscv { + +void RiscVIllegalInstruction(const generic::Instruction *inst) { + std::cerr << "Illegal instruction\n"; +} + +} // namespace mpact::sim::riscv
diff --git a/mpact/sim/util/asm/test/riscv64x_instructions.h b/mpact/sim/util/asm/test/riscv64x_instructions.h new file mode 100644 index 0000000..9dc1c42 --- /dev/null +++ b/mpact/sim/util/asm/test/riscv64x_instructions.h
@@ -0,0 +1,28 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MPACT_SIM_UTIL_ASM_TEST_RISC64X_INSTRUCTIONS_H_ +#define MPACT_SIM_UTIL_ASM_TEST_RISC64X_INSTRUCTIONS_H_ + +#include "mpact/sim/generic/instruction.h" + +namespace mpact::sim::riscv { + +using ::mpact::sim::generic::Instruction; + +void RiscVIllegalInstruction(const generic::Instruction *inst); + +} // namespace mpact::sim::riscv + +#endif // MPACT_SIM_UTIL_ASM_TEST_RISC64X_INSTRUCTIONS_H_
diff --git a/mpact/sim/util/asm/test/riscv_bin_setters.cc b/mpact/sim/util/asm/test/riscv_bin_setters.cc new file mode 100644 index 0000000..165b407 --- /dev/null +++ b/mpact/sim/util/asm/test/riscv_bin_setters.cc
@@ -0,0 +1,123 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mpact/sim/util/asm/test/riscv_bin_setters.h" + +#include <cstdint> +#include <string> +#include <vector> + +#include "absl/base/no_destructor.h" +#include "absl/status/status.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" +#include "mpact/sim/generic/type_helpers.h" +#include "re2/re2.h" + +namespace mpact { +namespace sim { +namespace riscv { + +namespace internal { + +enum class RelocType { + kNone = 0, + kBranch = 16, + kJal = 17, + kPcrelHi20 = 23, + kPcrelLo12I = 24, + kPcrelLo12S = 25, + kHi20 = 26, + kLo12I = 27, + kLo12S = 28, +}; + +using ::mpact::sim::generic::operator*; // NOLINT(misc-unused-using-decls) + +absl::NoDestructor<RE2> kSymRe("^\\s*(%[a-zA-Z0-9_]+)\\s*\\(?([^)]+)\\)?\\s*$"); + +absl::Status RelocateAddiIImm12(uint64_t address, absl::string_view text, + ResolverInterface *resolver, + std::vector<RelocationInfo> &relocations) { + std::string relo; + std::string sym; + if (!RE2::FullMatch(text, *kSymRe, &relo, &sym)) return absl::OkStatus(); + if (relo == "%lo") { + relocations.emplace_back(0, sym, *RelocType::kLo12I, 0, 0); + return absl::OkStatus(); + } + if (relo == "%pcrel_lo") { + relocations.emplace_back(0, sym, *RelocType::kPcrelLo12I, 0, 0); + return absl::OkStatus(); + } + if (!relo.empty()) { + return absl::InvalidArgumentError( + absl::StrCat("Invalid relocation: '", relo, "'")); + } + return absl::OkStatus(); +} + +absl::Status RelocateJJImm20(uint64_t address, absl::string_view text, + ResolverInterface *resolver, + std::vector<RelocationInfo> &relocations) { + std::string relo; + std::string sym; + if (!RE2::FullMatch(text, *kSymRe, &relo, &sym)) return absl::OkStatus(); + + relocations.emplace_back(0, sym, *RelocType::kJal, 0, 0); + return absl::OkStatus(); +} + +absl::Status RelocateJrJImm12(uint64_t address, absl::string_view text, + ResolverInterface *resolver, + std::vector<RelocationInfo> &relocations) { + return absl::OkStatus(); +} + +absl::Status RelocateLuiUImm20(uint64_t address, absl::string_view text, + ResolverInterface *resolver, + std::vector<RelocationInfo> &relocations) { + std::string relo; + std::string sym; + if (!RE2::FullMatch(text, *kSymRe, &relo, &sym)) return absl::OkStatus(); + relocations.emplace_back(0, sym, *RelocType::kHi20, 0, 0); + return absl::OkStatus(); +} + +absl::Status RelocateSdSImm12(uint64_t address, absl::string_view text, + ResolverInterface *resolver, + std::vector<RelocationInfo> &relocations) { + std::string relo; + std::string sym; + if (!RE2::FullMatch(text, *kSymRe, &relo, &sym)) return absl::OkStatus(); + if (relo == "%lo") { + relocations.emplace_back(0, sym, *RelocType::kLo12S, 0, 0); + return absl::OkStatus(); + } + if (relo == "%pcrel_lo") { + relocations.emplace_back(0, sym, *RelocType::kPcrelLo12S, 0, 0); + return absl::OkStatus(); + } + if (!relo.empty()) { + return absl::InvalidArgumentError( + absl::StrCat("Invalid relocation: '", relo, "'")); + } + return absl::OkStatus(); +} + +} // namespace internal + +} // namespace riscv +} // namespace sim +} // namespace mpact
diff --git a/mpact/sim/util/asm/test/riscv_bin_setters.h b/mpact/sim/util/asm/test/riscv_bin_setters.h new file mode 100644 index 0000000..d461381 --- /dev/null +++ b/mpact/sim/util/asm/test/riscv_bin_setters.h
@@ -0,0 +1,224 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MPACT_SIM_UTIL_ASM_TEST_RISCV_BIN_SETTERS_H_ +#define MPACT_SIM_UTIL_ASM_TEST_RISCV_BIN_SETTERS_H_ + +#include <cstdint> +#include <initializer_list> +#include <string> +#include <utility> +#include <vector> + +#include "absl/container/flat_hash_map.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "absl/strings/numbers.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" +#include "mpact/sim/util/asm/opcode_assembler_interface.h" +#include "mpact/sim/util/asm/resolver_interface.h" +#include "mpact/sim/util/asm/test/riscv_getter_helpers.h" +#include "re2/re2.h" + +namespace mpact { +namespace sim { +namespace riscv { + +using ::mpact::sim::util::assembler::RelocationInfo; +using ::mpact::sim::util::assembler::ResolverInterface; + +constexpr std::initializer_list<const std::pair<absl::string_view, uint64_t>> + kRegisterList = { + {"x0", 0}, {"x1", 1}, {"x2", 2}, {"x3", 3}, {"x4", 4}, + {"x5", 5}, {"x6", 6}, {"x7", 7}, {"x8", 8}, {"x9", 9}, + {"x10", 10}, {"x11", 11}, {"x12", 12}, {"x13", 13}, {"x14", 14}, + {"x15", 15}, {"x16", 16}, {"x17", 17}, {"x18", 18}, {"x19", 19}, + {"x20", 20}, {"x21", 21}, {"x22", 22}, {"x23", 23}, {"x24", 24}, + {"x25", 25}, {"x26", 26}, {"x27", 27}, {"x28", 28}, {"x29", 29}, + {"x30", 30}, {"x31", 31}, {"zero", 0}, {"ra", 1}, {"sp", 2}, + {"gp", 3}, {"tp", 4}, {"t0", 5}, {"t1", 6}, {"t2", 7}, + {"s0", 8}, {"s1", 9}, {"a0", 10}, {"a1", 11}, {"a2", 12}, + {"a3", 13}, {"a4", 14}, {"a5", 15}, {"a6", 16}, {"a7", 17}, + {"s2", 18}, {"s3", 19}, {"s4", 20}, {"s5", 21}, {"s6", 22}, + {"s7", 23}, {"s8", 24}, {"s9", 25}, {"s10", 26}, {"s11", 27}, + {"t3", 28}, {"t4", 29}, {"t5", 30}, {"t6", 31}}; + +template <typename T> +absl::StatusOr<T> SimpleTextToInt(absl::string_view op_text, + ResolverInterface *resolver) { + T value; + static RE2 hex_re("^\\s*0x([0-9a-fA-F]+)\\s*$"); + static RE2 dec_re("^\\s*(-?[0-9]+)\\s*$"); + static RE2 relo_re("^\\s*\\%[a-zA-Z0-9_]+\\s*\\(([a-zA-Z0-9_]+)\\s*\\)\\s*$"); + static RE2 symbol_re("^\\s*([a-zA-Z0-9_]+)\\s*$"); + std::string str; + std::string text(op_text); + // First see if the operand is a relocation function, and extract the text + // argument. A relocation function is on the form of %name(arg). + if (RE2::FullMatch(op_text, relo_re, &str)) { + text = str; + } + // Extract the hex immediate. + if (RE2::FullMatch(text, hex_re, &str)) { + if (absl::SimpleHexAtoi(str, &value)) return value; + return absl::InvalidArgumentError( + absl::StrCat("Invalid hexadecimal immediate: ", text)); + } + // Extract the decimal immediate. + if (RE2::FullMatch(text, dec_re, &str)) { + if (absl::SimpleAtoi(str, &value)) return value; + return absl::InvalidArgumentError( + absl::StrCat("Invalid decimal immediate: ", text)); + } + // Extract the symbol. + if (RE2::FullMatch(text, symbol_re, &str)) { + if (resolver != nullptr) { + auto res = resolver->Resolve(str); + if (!res.ok()) { + return res.status(); + } + return static_cast<T>(res.value()); + } + } + return absl::InvalidArgumentError(absl::StrCat("Invalid argument: ", text)); +} + +using ValueMap = absl::flat_hash_map<absl::string_view, uint64_t>; + +template <typename Enum, typename Map, typename Encoder> +void AddRiscvSourceOpBinSetters(Map &map) { + Insert(map, *Enum::kIImm12, + [](uint64_t address, absl::string_view text, + ResolverInterface *resolver) -> absl::StatusOr<uint64_t> { + auto res = SimpleTextToInt<int32_t>(text, resolver); + if (!res.ok()) return res.status(); + return Encoder::IType::InsertImm12(res.value(), 0ULL); + }); + Insert(map, *Enum::kIUimm6, + [](uint64_t address, absl::string_view text, + ResolverInterface *resolver) -> absl::StatusOr<uint64_t> { + auto res = SimpleTextToInt<uint32_t>(text, resolver); + if (!res.ok()) return res.status(); + return Encoder::RSType::InsertRUimm6(res.value(), 0ULL); + }); + Insert(map, *Enum::kJImm12, + [](uint64_t address, absl::string_view text, + ResolverInterface *resolver) -> absl::StatusOr<uint64_t> { + auto res = SimpleTextToInt<int32_t>(text, resolver); + if (!res.ok()) return res.status(); + return Encoder::IType::InsertImm12(res.value(), 0ULL); + }); + Insert(map, *Enum::kJImm20, + [](uint64_t address, absl::string_view text, + ResolverInterface *resolver) -> absl::StatusOr<uint64_t> { + auto res = SimpleTextToInt<int32_t>(text, resolver); + if (!res.ok()) return res.status(); + uint32_t delta = res.value() - address; + auto value = Encoder::JType::InsertJImm(delta, 0ULL); + return value; + }); + Insert(map, *Enum::kRs1, + [](uint64_t address, absl::string_view text, + ResolverInterface *resolver) -> absl::StatusOr<uint64_t> { + static ValueMap map(kRegisterList); + auto iter = map.find(text); + if (iter == map.end()) { + return absl::InvalidArgumentError( + absl::StrCat("Invalid source operand: ", text)); + } + return Encoder::RSType::InsertRs1(iter->second, 0ULL); + }); + Insert(map, *Enum::kRs2, + [](uint64_t address, absl::string_view text, + ResolverInterface *resolver) -> absl::StatusOr<uint64_t> { + static ValueMap map(kRegisterList); + auto iter = map.find(text); + if (iter == map.end()) { + return absl::InvalidArgumentError( + absl::StrCat("Invalid source operand: ", text)); + } + return Encoder::SType::InsertRs2(iter->second, 0ULL); + }); + Insert(map, *Enum::kSImm12, + [](uint64_t address, absl::string_view text, + ResolverInterface *resolver) -> absl::StatusOr<uint64_t> { + auto res = SimpleTextToInt<uint32_t>(text, resolver); + if (!res.ok()) return res.status(); + return Encoder::SType::InsertSImm(res.value(), 0ULL); + }); + Insert(map, *Enum::kUImm20, + [](uint64_t address, absl::string_view text, + ResolverInterface *resolver) -> absl::StatusOr<uint64_t> { + auto res = SimpleTextToInt<uint32_t>(text, resolver); + if (!res.ok()) return res.status(); + return Encoder::UType::InsertUImm(res.value(), 0ULL); + }); +} + +template <typename Enum, typename Map, typename Encoder> +void AddRiscvDestOpBinSetters(Map &map) { + Insert(map, *Enum::kRd, + [](uint64_t address, absl::string_view text, + ResolverInterface *resolver) -> absl::StatusOr<uint64_t> { + static ValueMap map(kRegisterList); + auto iter = map.find(text); + if (iter == map.end()) { + return absl::InvalidArgumentError( + absl::StrCat("Invalid destination operand: ", text)); + } + return Encoder::RSType::InsertRd(iter->second, 0ULL); + }); +} + +namespace internal { + +absl::Status RelocateAddiIImm12(uint64_t address, absl::string_view text, + ResolverInterface *resolver, + std::vector<RelocationInfo> &relocations); +absl::Status RelocateJJImm20(uint64_t address, absl::string_view text, + ResolverInterface *resolver, + std::vector<RelocationInfo> &relocations); +absl::Status RelocateJrJImm12(uint64_t address, absl::string_view text, + ResolverInterface *resolver, + std::vector<RelocationInfo> &relocations); +absl::Status RelocateLuiUImm20(uint64_t address, absl::string_view text, + ResolverInterface *resolver, + std::vector<RelocationInfo> &relocations); +absl::Status RelocateSdSImm12(uint64_t address, absl::string_view text, + ResolverInterface *resolver, + std::vector<RelocationInfo> &relocations); + +} // namespace internal + +template <typename OpcodeEnum, typename SourceOpEnum, typename Map> +void AddRiscvSourceOpRelocationSetters(Map &map) { + Insert(map, OpcodeEnum::kAddi, SourceOpEnum::kIImm12, + internal::RelocateAddiIImm12); + Insert(map, OpcodeEnum::kJal, SourceOpEnum::kJImm20, + internal::RelocateJJImm20); + Insert(map, OpcodeEnum::kJ, SourceOpEnum::kJImm20, internal::RelocateJJImm20); + Insert(map, OpcodeEnum::kJr, SourceOpEnum::kJImm12, + internal::RelocateJrJImm12); + Insert(map, OpcodeEnum::kLui, SourceOpEnum::kUImm20, + internal::RelocateLuiUImm20); + Insert(map, OpcodeEnum::kSd, SourceOpEnum::kSImm12, + internal::RelocateSdSImm12); +} + +} // namespace riscv +} // namespace sim +} // namespace mpact + +#endif // MPACT_SIM_UTIL_ASM_TEST_RISCV_BIN_SETTERS_H_
diff --git a/mpact/sim/util/asm/test/riscv_getter_helpers.h b/mpact/sim/util/asm/test/riscv_getter_helpers.h new file mode 100644 index 0000000..5c37128 --- /dev/null +++ b/mpact/sim/util/asm/test/riscv_getter_helpers.h
@@ -0,0 +1,73 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MPACT_SIM_UTIL_ASM_TEST_RISCV_GETTER_HELPERS_H_ +#define MPACT_SIM_UTIL_ASM_TEST_RISCV_GETTER_HELPERS_H_ + +#include "absl/strings/string_view.h" + +// This file contains helper functions that are used to create commonly used +// operands for RiscV instructions. + +namespace mpact { +namespace sim { +namespace riscv { + +// Helper function to insert and entry into a "getter" map. This is used in +// the riscv_*_getter.h files. +template <typename M, typename E, typename G> +inline void Insert(M &map, E entry, G getter) { + if (!map.contains(static_cast<int>(entry))) { + map.insert(std::make_pair(static_cast<int>(entry), getter)); + } else { + map.at(static_cast<int>(entry)) = getter; + } +} + +template <typename M, typename E1, typename E2, typename G> +inline void Insert(M &map, E1 entry1, E2 entry2, G getter) { + auto key = std::tie(entry1, entry2); + if (!map.contains(key)) { + map.insert(std::make_pair(key, getter)); + } else { + map.at(key) = getter; + } +} + +constexpr absl::string_view kXregNames[32] = { + "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", + "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19", "x20", "x21", + "x22", "x23", "x24", "x25", "x26", "x27", "x28", "x29", "x30", "x31"}; +// ABI names for the integer registers. +constexpr absl::string_view kXregAbiNames[32] = { + "zero", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "s0", "s1", "a0", + "a1", "a2", "a3", "a4", "a5", "a6", "a7", "s2", "s3", "s4", "s5", + "s6", "s7", "s8", "s9", "s10", "s11", "t3", "t4", "t5", "t6"}; +// Architectural names for the floating point registers. +constexpr absl::string_view kFregNames[32] = { + "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10", + "f11", "f12", "f13", "f14", "f15", "f16", "f17", "f18", "f19", "f20", "f21", + "f22", "f23", "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31"}; +// ABI names for the floating point registers. +constexpr absl::string_view kFregAbiNames[32] = { + "ft0", "ft1", "ft2", "ft3", "ft4", "ft5", "ft6", "ft7", + "fs0", "fs1", "fa0", "fa1", "fa2", "fa3", "fa4", "fa5", + "fa6", "fa7", "fs2", "fs3", "fs4", "fs5", "fs6", "fs7", + "fs8", "fs9", "fs10", "fs11", "ft8", "ft9", "ft10", "ft11"}; + +} // namespace riscv +} // namespace sim +} // namespace mpact + +#endif // MPACT_SIM_UTIL_ASM_TEST_RISCV_GETTER_HELPERS_H_