This adds more code to provide support for generating an assembler from .bin_fmt and .isa files. PiperOrigin-RevId: 711832481 Change-Id: I5f40e37e2a9ca677218331372a038e6c9feab6a0
diff --git a/mpact/sim/decoder/bin_format_visitor.cc b/mpact/sim/decoder/bin_format_visitor.cc index 060e102..39ba2dc 100644 --- a/mpact/sim/decoder/bin_format_visitor.cc +++ b/mpact/sim/decoder/bin_format_visitor.cc
@@ -180,6 +180,7 @@ std::string dec_dot_cc_name = absl::StrCat(prefix, "_bin_decoder.cc"); std::string enc_dot_h_name = absl::StrCat(prefix, "_bin_encoder.h"); std::string enc_dot_cc_name = absl::StrCat(prefix, "_bin_encoder.cc"); + std::string enum_dot_h_name = absl::StrCat(prefix, "_enums.h"); std::ofstream dec_dot_h_file(absl::StrCat(directory, "/", dec_dot_h_name)); std::ofstream dec_dot_cc_file(absl::StrCat(directory, "/", dec_dot_cc_name)); std::ofstream enc_dot_h_file(absl::StrCat(directory, "/", enc_dot_h_name)); @@ -189,8 +190,8 @@ EmitDecoderFilePrefix(dec_dot_h_name, encoding_info.get()); dec_dot_h_file << dec_h_output; dec_dot_cc_file << dec_cc_output; - auto [enc_h_output, enc_cc_output] = - EmitEncoderFilePrefix(enc_dot_h_name, encoding_info.get()); + auto [enc_h_output, enc_cc_output] = EmitEncoderFilePrefix( + enc_dot_h_name, enum_dot_h_name, encoding_info.get()); enc_dot_h_file << enc_h_output; enc_dot_cc_file << enc_cc_output; // Output file prefix is the input file name. @@ -311,7 +312,8 @@ } std::tuple<std::string, std::string> BinFormatVisitor::EmitEncoderFilePrefix( - const std::string &dot_h_name, BinEncodingInfo *encoding_info) const { + const std::string &dot_h_name, const std::string &enum_h_name, + BinEncodingInfo *encoding_info) const { std::string h_string; std::string cc_string; @@ -323,10 +325,18 @@ "\n" "\n" "#include <iostream>\n" - "#include <cstdint>\n\n"); + "#include <cstdint>\n\n" + "#include \"absl/base/no_destructor.h\"\n" + "#include \"absl/container/flat_hash_map.h\"\n\n" + "#include \"", + enum_h_name, "\"\n"); absl::StrAppend(&cc_string, "#include \"", dot_h_name, "\"\n\n" - "#include <cstdint>\n\n"); + "#include <cstdint>\n\n" + "#include \"absl/base/no_destructor.h\"\n" + "#include \"absl/container/flat_hash_map.h\"\n\n" + "#include \"", + enum_h_name, "\"\n"); for (auto &name_space : encoding_info->decoder()->namespaces()) { auto name_space_str = absl::StrCat("namespace ", name_space, " {\n"); absl::StrAppend(&cc_string, name_space_str); @@ -356,22 +366,21 @@ group->GetInstructionEncodings(encodings); } std::string opcode_enum = encoding->opcode_enum(); - absl::StrAppend(&h_string, - "extern const std::tuple<uint64_t, int> kOpcodeEncodings[", - encodings.size() + 1, "];\n"); - absl::StrAppend(&cc_string, - "const std::tuple<uint64_t, int> kOpcodeEncodings[", - encodings.size() + 1, "] = {\n"); - absl::StrAppend(&cc_string, " /* ", opcode_enum, - "::kNone = */ {0x0ULL, 0},\n"); + absl::StrAppend(&h_string, "extern absl::NoDestructor<absl::flat_hash_map<", + opcode_enum, + ", std::tuple<uint64_t, int>>> kOpcodeEncodings;\n"); + absl::StrAppend(&cc_string, "absl::NoDestructor<absl::flat_hash_map<", + opcode_enum, + ", std::tuple<uint64_t, int>>> kOpcodeEncodings({\n"); + absl::StrAppend(&cc_string, " {", opcode_enum, "::kNone, {0x0ULL, 0}},\n"); for (auto &[name, pair] : encodings) { auto [value, width] = pair; std::string enum_name = absl::StrCat(opcode_enum, "::k", ToPascalCase(name)); - absl::StrAppend(&cc_string, " /* ", enum_name, " = */ {0x", - absl::Hex(value), "ULL, ", width, "},\n"); + absl::StrAppend(&cc_string, " {", enum_name, ", {0x", absl::Hex(value), + "ULL, ", width, "}},\n"); } - absl::StrAppend(&cc_string, "};\n"); + absl::StrAppend(&cc_string, "});\n"); return std::tie(h_string, cc_string); }
diff --git a/mpact/sim/decoder/bin_format_visitor.h b/mpact/sim/decoder/bin_format_visitor.h index c8aec08..a5f15fc 100644 --- a/mpact/sim/decoder/bin_format_visitor.h +++ b/mpact/sim/decoder/bin_format_visitor.h
@@ -102,7 +102,8 @@ std::tuple<std::string, std::string> EmitEncoderCode( BinEncodingInfo *encoding); std::tuple<std::string, std::string> EmitEncoderFilePrefix( - const std::string &dot_h_name, BinEncodingInfo *encoding_info) const; + const std::string &dot_h_name, const std::string &enum_h_name, + BinEncodingInfo *encoding_info) const; // Generate the file suffixes (namespace closing etc.) StringPair EmitFileSuffix(const std::string &dot_h_name, BinEncodingInfo *encoding_info);
diff --git a/mpact/sim/decoder/format.cc b/mpact/sim/decoder/format.cc index c107833..e71bd6b 100644 --- a/mpact/sim/decoder/format.cc +++ b/mpact/sim/decoder/format.cc
@@ -453,18 +453,16 @@ shift = absl::StrCat(" >> ", left - width); } // Extract the bits from the overlay value for the current component. - absl::StrAppend(&h_output, " tmp = (value ", shift, ") & 0x", mask, - "ULL;\n"); + absl::StrAppend(&h_output, " tmp = (value ", shift, ") & 0x", + absl::Hex(mask), "ULL;\n"); shift.clear(); if (bits_or_field->low() != 0) { shift = absl::StrCat(" << ", bits_or_field->low()); } - absl::StrAppend(&h_output, " inst_word |= (tmp ", shift, - ");\n" - " return inst_word;\n"); + absl::StrAppend(&h_output, " inst_word |= (tmp ", shift, ");\n"); left -= width; } - absl::StrAppend(&h_output, "}\n"); + absl::StrAppend(&h_output, " return inst_word;\n}\n"); return h_output; }
diff --git a/mpact/sim/decoder/instruction_set.cc b/mpact/sim/decoder/instruction_set.cc index 35641c5..6962cac 100644 --- a/mpact/sim/decoder/instruction_set.cc +++ b/mpact/sim/decoder/instruction_set.cc
@@ -691,7 +691,7 @@ position, "], slot, " "entry, opcode, ", - pred_op, ");\n"); + pred_op, ", resolver);\n"); break; } case OperandLocator::kSource: { @@ -703,7 +703,7 @@ position, "], slot, " "entry, opcode, ", - source_op, ", ", locator.instance, ");\n"); + source_op, ", ", locator.instance, ", resolver);\n"); break; } case OperandLocator::kSourceArray: { @@ -716,7 +716,7 @@ position, "], slot, " "entry, opcode, ", - list_source_op, ", ", locator.instance, ");\n"); + list_source_op, ", ", locator.instance, ", resolver);\n"); break; } case OperandLocator::kDestination: { @@ -728,7 +728,7 @@ position, "], slot, " "entry, opcode, ", - dest_op, ", ", locator.instance, ");\n"); + dest_op, ", ", locator.instance, ", resolver);\n"); break; } case OperandLocator::kDestinationArray: { @@ -742,7 +742,7 @@ position, "], slot, " "entry, opcode, ", - list_dest_op, ", ", locator.instance, ");\n"); + list_dest_op, ", ", locator.instance, ", resolver);\n"); break; } default: @@ -762,7 +762,11 @@ std::string cc_output; std::string encoder = absl::StrCat(pascal_name(), "EncoderInterfaceBase"); // Generate the bin encoder base class. - absl::StrAppend(&h_output, "class ", encoder, + absl::StrAppend(&h_output, + "using ::mpact::sim::util::assembler::ResolverInterface;\n" + "\n" + "class ", + encoder, " {\n" " public:\n" " virtual ~", @@ -771,32 +775,34 @@ R"( // Returns the opcode encoding and size (in bits) of the opcode. virtual absl::StatusOr<std::tuple<uint64_t, int>> GetOpcodeEncoding( - SlotEnum slot, int entry, OpcodeEnum opcode) = 0; + SlotEnum slot, int entry, OpcodeEnum opcode, ResolverInterface *resolver) = 0; virtual absl::StatusOr<uint64_t> GetSrcOpEncoding(uint64_t address, absl::string_view text, SlotEnum slot, int entry, OpcodeEnum opcode, - SourceOpEnum source_op, int source_num) = 0; + SourceOpEnum source_op, int source_num, ResolverInterface *resolver) = 0; virtual absl::StatusOr<uint64_t> GetDestOpEncoding(uint64_t address, absl::string_view text, SlotEnum slot, int entry, OpcodeEnum opcode, - DestOpEnum dest_op, int dest_num) = 0; + DestOpEnum dest_op, int dest_num, ResolverInterface *resolver) = 0; virtual absl::StatusOr<uint64_t> GetListDestOpEncoding(uint64_t address, absl::string_view text, SlotEnum slot, int entry, OpcodeEnum opcode, - ListDestOpEnum dest_op, int dest_num) = 0; + ListDestOpEnum dest_op, int dest_num, ResolverInterface *resolver) = 0; virtual absl::StatusOr<uint64_t> GetListSourceOpEncoding( uint64_t address, absl::string_view text,SlotEnum slot, int entry, OpcodeEnum opcode, - ListSourceOpEnum source_op, int source_num) = 0; + ListSourceOpEnum source_op, int source_num, ResolverInterface *resolver) = 0; virtual absl::StatusOr<uint64_t> GetPredOpEncoding(uint64_t address, absl::string_view text, SlotEnum slot, int entry, OpcodeEnum opcode, - PredOpEnum pred_op) = 0; + PredOpEnum pred_op, ResolverInterface *resolver) = 0; }; )"); absl::StrAppend(&cc_output, + "using ::mpact::sim::util::assembler::ResolverInterface;\n" + "\n" "namespace {\n\n" "absl::StatusOr<std::tuple<uint64_t, int>> EncodeNone(", encoder, "*, SlotEnum, int, OpcodeEnum, uint64_t, const " - "std::vector<std::string> &) {\n" + "std::vector<std::string> &, ResolverInterface *) {\n" " return absl::NotFoundError(\"No such opcode\");\n" "}\n\n"); std::string array; @@ -806,7 +812,7 @@ encoder, "*, SlotEnum, int, OpcodeEnum, uint64_t, const " "std::vector<std::string> " - "&);\n" + "&, ResolverInterface *);\n" "EncodeFcn encode_fcns[] = {\n" " EncodeNone,\n"); for (auto &[name, inst_ptr] : instruction_map_) { @@ -814,12 +820,13 @@ absl::StrAppend(&array, " Encode", opcode->pascal_name(), ",\n"); absl::StrAppend( &cc_output, "absl::StatusOr<std::tuple<uint64_t, int>> Encode", - opcode->pascal_name(), "(", encoder, - " *encoder, SlotEnum slot, int entry,\n" - " OpcodeEnum opcode, uint64_t address, const " - "std::vector<std::string> &operands) " + opcode->pascal_name(), "(\n ", encoder, + " *encoder, SlotEnum slot, int entry, OpcodeEnum opcode,\n" + " uint64_t address, const " + "std::vector<std::string> &operands, ResolverInterface *resolver) " "{\n" - " auto res_opcode = encoder->GetOpcodeEncoding(slot, entry, opcode);\n" + " auto res_opcode = encoder->GetOpcodeEncoding(slot, " + "entry, opcode, resolver);\n" " if (!res_opcode.ok()) return res_opcode.status();\n" " auto [encoding, bit_size] = res_opcode.value();\n" " absl::StatusOr<uint64_t> result;\n");
diff --git a/mpact/sim/decoder/instruction_set_visitor.cc b/mpact/sim/decoder/instruction_set_visitor.cc index 05a7d3f..2d3f8c1 100644 --- a/mpact/sim/decoder/instruction_set_visitor.cc +++ b/mpact/sim/decoder/instruction_set_visitor.cc
@@ -2298,6 +2298,7 @@ "#include \"absl/status/status.h\"\n" "#include \"absl/status/statusor.h\"\n" "#include \"absl/strings/string_view.h\"\n" + "#include \"mpact/sim/util/asm/resolver_interface.h\"\n" "#include \"re2/re2.h\"\n" "#include \"re2/set.h\"\n" "#include \"", @@ -2314,6 +2315,7 @@ "#include \"absl/status/status.h\"\n" "#include \"absl/status/statusor.h\"\n" "#include \"absl/strings/string_view.h\"\n" + "#include \"mpact/sim/util/asm/resolver_interface.h\"\n" "#include \"re2/re2.h\"\n" "#include \"re2/set.h\"\n" "#include \"",
diff --git a/mpact/sim/decoder/mpact_sim_isa.bzl b/mpact/sim/decoder/mpact_sim_isa.bzl index 02fee77..b764d1b 100644 --- a/mpact/sim/decoder/mpact_sim_isa.bzl +++ b/mpact/sim/decoder/mpact_sim_isa.bzl
@@ -51,7 +51,7 @@ data = data, ) -def mpact_isa_decoder(name, includes, src = "", srcs = [], deps = [], isa_name = "", prefix = ""): +def mpact_isa_decoder(name, includes, src = "", srcs = [], deps = [], isa_name = "", prefix = "", testonly = False): """Generates the C++ source corresponding to an MPACT Isa decoder definition. Args: @@ -105,6 +105,7 @@ cmd = command, heuristic_label_expansion = 0, tools = ["@com_google_mpact-sim//mpact/sim/decoder:decoder_gen"], + testonly = testonly, ) # The rule for the lib that is built from the generated sources. @@ -117,6 +118,8 @@ lib_deps.append("@com_google_mpact-sim//mpact/sim/generic:arch_state") if "@com_google_mpact-sim//mpact/sim/generic:instruction" not in deps: lib_deps.append("@com_google_mpact-sim//mpact/sim/generic:instruction") + if "@com_google_mpact-sim//mpact/sim/util/asm" not in deps: + lib_deps.append("@com_google_mpact-sim//mpact/sim/util/asm") if "@com_googlesource_code_re2//:re2" not in deps: lib_deps.append("@com_googlesource_code_re2//:re2") if "@com_google_absl//absl/status" not in deps: @@ -130,9 +133,10 @@ srcs = [f for f in out_files if f.endswith(".cc")], hdrs = [f for f in out_files if f.endswith(".h")], deps = lib_deps + deps, + testonly = testonly, ) -def mpact_bin_fmt_decoder(name, includes, src = "", srcs = [], deps = [], decoder_name = "", prefix = ""): +def mpact_bin_fmt_decoder(name, includes, src = "", srcs = [], deps = [], decoder_name = "", prefix = "", testonly = False): """Generates the C++ source corresponding to an MPACT Bin Format decoder definition. Args: @@ -183,10 +187,13 @@ cmd = command, heuristic_label_expansion = 0, tools = ["@com_google_mpact-sim//mpact/sim/decoder:bin_format_gen"], + testonly = testonly, ) # The rule for the lib that is built from the generated sources. lib_deps = [] + if "@com_google_absl//absl/base:no_destructor" not in deps: + lib_deps.append("@com_google_absl//absl/base:no_destructor") if "@com_google_absl//absl/container:flat_hash_map" not in deps: lib_deps.append("@com_google_absl//absl/container:flat_hash_map") if "@com_google_absl//absl/functional:any_invocable" not in deps: @@ -202,9 +209,10 @@ srcs = [f for f in out_files if f.endswith(".cc")], hdrs = [f for f in out_files if f.endswith(".h")], deps = lib_deps + deps, + testonly = testonly, ) -def mpact_proto_fmt_decoder(name, includes, src = "", srcs = [], proto_files = [], deps = [], decoder_name = "", prefix = ""): +def mpact_proto_fmt_decoder(name, includes, src = "", srcs = [], proto_files = [], deps = [], decoder_name = "", prefix = "", testonly = False): """Generates the C++ source corresponding to an MPACT Bin Format decoder definition. Args: @@ -266,6 +274,7 @@ "@com_google_absl//absl/functional:any_invocable", "@com_google_absl//absl/strings:str_format", ] + deps, + testonly = testonly, ) # Strip any path component from text. Return only the string that follows the last "/".
diff --git a/mpact/sim/decoder/slot.cc b/mpact/sim/decoder/slot.cc index dab1ac2..93386ff 100644 --- a/mpact/sim/decoder/slot.cc +++ b/mpact/sim/decoder/slot.cc
@@ -371,16 +371,19 @@ std::tuple<std::string, std::vector<OperandLocator>> Slot::GenerateRegEx( const Instruction *inst, std::vector<std::string> &formats) const { std::string output = "R\"("; - std::string sep = "^\\s*"; - int args = 0; + std::string sep = "^"; std::vector<OperandLocator> opnd_locators; // Iterate over the vector of disasm formats. These will end up concatenated // with \s+ separators. for (auto const *disasm_fmt : inst->disasm_format_vec()) { absl::StrAppend(&output, sep); sep = "\\s+"; + // The fragments are the text part (not part of operands), that occur + // between the operand of the format. E.g., the commas in "r1, r2, r3". auto fragment_iter = disasm_fmt->format_fragment_vec.begin(); auto fragment_end = disasm_fmt->format_fragment_vec.end(); + // The formats are the instruction formats, E.g., the register names in + // "r1, r2, r3". auto format_iter = disasm_fmt->format_info_vec.begin(); auto format_end = disasm_fmt->format_info_vec.end(); char prev = '\0'; @@ -396,15 +399,17 @@ } fragment_iter++; if (format_iter != format_end) { - // If the previous character is punctuation, but not '.' or '_', add a - // space separator. - if ((prev != '\0') && - !(isalnum(prev) || (prev == '_') || (prev == '.'))) { - absl::StrAppend(&output, "\\s*"); + // If the trailling part of output is not '\\s*', and prev is + // punctuation, but not '.' or '_', add a space separator. + auto len = output.size(); + if (output.substr(len - 3) != "\\s*") { + if ((prev != '\0') && + !(isalnum(prev) || (prev == '_') || (prev == '.'))) { + absl::StrAppend(&output, "\\s*"); + } } - args++; std::string op_name = (*format_iter)->op_name; - absl::StrAppend(&output, "(?<", op_name, ">\\S*?)"); + absl::StrAppend(&output, "(\\S*?)"); opnd_locators.push_back(inst->opcode()->op_locator_map().at(op_name)); if ((fragment_iter != fragment_end) && (!(*fragment_iter).empty())) { char c = (*fragment_iter)[0]; @@ -418,7 +423,7 @@ } } } - absl::StrAppend(&output, "\\s*(#.*)?$)\""); + absl::StrAppend(&output, "$)\""); return {output, opnd_locators}; } @@ -464,7 +469,8 @@ " bool Extract(absl::string_view text, int index, " "std::vector<std::string> &values);\n" "absl::StatusOr<std::tuple<uint64_t, int>> " - "Encode(uint64_t address, absl::string_view text, int entry);\n" + "Encode(uint64_t address, absl::string_view text, int entry, " + "ResolverInterface *resolver);\n" " private:\n" " ", encoder, @@ -482,6 +488,8 @@ " for (int i = 0; i < re2_args.size(); ++i) {\n" " delete re2_args[i];\n" " }\n" + " for (auto *regex : regex_vec_) delete regex;\n" + " regex_vec_.clear();\n" "}\n\n" "absl::Status ", class_name, @@ -545,7 +553,7 @@ pascal_name(), "SlotMatcher::Encode(\n" R"( - uint64_t address, absl::string_view text, int entry) { + uint64_t address, absl::string_view text, int entry, ResolverInterface *resolver) { std::vector<int> matches; std::string error_message = absl::StrCat("Failed to encode '", text, "':"); if (!Match(text, matches) || (matches.size() == 0)) { @@ -560,7 +568,7 @@ pascal_name(), ", entry, \n" " " - "static_cast<OpcodeEnum>(index), address, values);\n", + "static_cast<OpcodeEnum>(index), address, values, resolver);\n", R"( if (!result.status().ok()) { absl::StrAppend(&error_message, "\n ", result.status().message()); @@ -638,9 +646,11 @@ if (!format_info->is_formatted) { absl::StrAppend(&output, "\n#error Missing locator information"); } else { - absl::StrAppend(&output, next_sep, "absl::StrFormat(\"", - format_info->number_format, "\", ", - ExpandExpression(*format_info, ""), ")"); + absl::StrAppend( + &output, next_sep, "absl::StrFormat(\"", + format_info->number_format.back() == 'x' ? "0x" : "", + format_info->number_format, "\", ", + ExpandExpression(*format_info, ""), ")"); } } else { auto key = format_info->op_name; @@ -659,10 +669,11 @@ if (!format_info->is_formatted) { absl::StrAppend(&output, next_sep, result.value(), "->AsString()"); } else { - absl::StrAppend(&output, next_sep, "absl::StrFormat(\"", - format_info->number_format, "\", ", - ExpandExpression(*format_info, result.value()), - ")"); + absl::StrAppend( + &output, next_sep, "absl::StrFormat(\"", + format_info->number_format.back() == 'x' ? "0x" : "", + format_info->number_format, "\", ", + ExpandExpression(*format_info, result.value()), ")"); } } }
diff --git a/mpact/sim/decoder/test/example_decoder_test.cc b/mpact/sim/decoder/test/example_decoder_test.cc index a851315..86799c3 100644 --- a/mpact/sim/decoder/test/example_decoder_test.cc +++ b/mpact/sim/decoder/test/example_decoder_test.cc
@@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "googlemock/include/gmock/gmock.h" +#include "googlemock/include/gmock/gmock.h" // IWYU pragma: keep #include "googletest/include/gtest/gtest.h" namespace {
diff --git a/mpact/sim/util/asm/BUILD b/mpact/sim/util/asm/BUILD new file mode 100644 index 0000000..ad67e8f --- /dev/null +++ b/mpact/sim/util/asm/BUILD
@@ -0,0 +1,51 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This file contains the build rules for assembler related libraries. + +package( + default_applicable_licenses = ["//:license"], + default_visibility = ["//visibility:public"], +) + +cc_library( + name = "asm", + hdrs = [ + "opcode_assembler_interface.h", + "resolver_interface.h", + ], + deps = [ + "@com_github_serge1_elfio//:elfio", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", + ], +) + +cc_library( + name = "simple_assembler", + srcs = ["simple_assembler.cc"], + hdrs = ["simple_assembler.h"], + deps = [ + ":asm", + "@com_github_serge1_elfio//:elfio", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/strings", + "@com_googlesource_code_re2//:re2", + ], +)
diff --git a/mpact/sim/util/asm/opcode_assembler_interface.h b/mpact/sim/util/asm/opcode_assembler_interface.h new file mode 100644 index 0000000..a22c7c5 --- /dev/null +++ b/mpact/sim/util/asm/opcode_assembler_interface.h
@@ -0,0 +1,50 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MPACT_SIM_UTIL_ASM_OPCODE_ASSEMBLER_INTERFACE_H_ +#define MPACT_SIM_UTIL_ASM_OPCODE_ASSEMBLER_INTERFACE_H_ + +#include <cstdint> +#include <vector> + +#include "absl/status/status.h" +#include "absl/strings/string_view.h" +#include "mpact/sim/util/asm/resolver_interface.h" + +// This file defines the interface that the opcode assembler must implement. It +// is used by the SimpleAssembler to parse an assembly source line and convert +// it into a vector of bytes. + +namespace mpact { +namespace sim { +namespace util { +namespace assembler { + +class OpcodeAssemblerInterface { + public: + virtual ~OpcodeAssemblerInterface() = default; + // Takes the current address, the text for the assembly instruction, and a + // symbol resolver interface.Return ok status if the text is successfully + // encoded into the bytes vector. + virtual absl::Status Encode(uint64_t address, absl::string_view text, + ResolverInterface *resolver, + std::vector<uint8_t> &bytes) = 0; +}; + +} // namespace assembler +} // namespace util +} // namespace sim +} // namespace mpact + +#endif // MPACT_SIM_UTIL_ASM_OPCODE_ASSEMBLER_INTERFACE_H_
diff --git a/mpact/sim/util/asm/resolver_interface.h b/mpact/sim/util/asm/resolver_interface.h new file mode 100644 index 0000000..cefa1e3 --- /dev/null +++ b/mpact/sim/util/asm/resolver_interface.h
@@ -0,0 +1,42 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MPACT_SIM_UTIL_ASM_RESOLVER_INTERFACE_H_ +#define MPACT_SIM_UTIL_ASM_RESOLVER_INTERFACE_H_ + +#include <cstdint> + +#include "absl/status/statusor.h" +#include "absl/strings/string_view.h" + +// This file defines the interface that the symbol resolver must implement. It +// is used by the SimpleAssembler to resolve symbol names to values. + +namespace mpact { +namespace sim { +namespace util { +namespace assembler { + +class ResolverInterface { + public: + virtual ~ResolverInterface() = default; + virtual absl::StatusOr<uint64_t> Resolve(absl::string_view text) = 0; +}; + +} // namespace assembler +} // namespace util +} // namespace sim +} // namespace mpact + +#endif // MPACT_SIM_UTIL_ASM_RESOLVER_INTERFACE_H_
diff --git a/mpact/sim/util/asm/simple_assembler.cc b/mpact/sim/util/asm/simple_assembler.cc new file mode 100644 index 0000000..f9f948e --- /dev/null +++ b/mpact/sim/util/asm/simple_assembler.cc
@@ -0,0 +1,745 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mpact/sim/util/asm/simple_assembler.h" + +#include <cstddef> +#include <cstdint> +#include <cstring> +#include <istream> +#include <ostream> +#include <string> +#include <vector> + +#include "absl/container/flat_hash_map.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" +#include "elfio/elf_types.hpp" +#include "elfio/elfio_section.hpp" +#include "elfio/elfio_segment.hpp" +#include "elfio/elfio_strings.hpp" +#include "elfio/elfio_symbols.hpp" +#include "mpact/sim/util/asm/opcode_assembler_interface.h" +#include "mpact/sim/util/asm/resolver_interface.h" +#include "re2/re2.h" + +namespace mpact { +namespace sim { +namespace util { +namespace assembler { + +// A symbol resolver that always returns 0 for any symbol name. This is used +// for the first pass of parsing the assembly code, when we are just creating +// the symbols and computing the sizes of the sections. +class ZeroResolver : public ResolverInterface { + public: + absl::StatusOr<uint64_t> Resolve(absl::string_view text) override { + return 0; + } +}; + +// A symbol resolver that uses the symbol table and the symbol indices to +// resolve symbol names to values. +class SymbolResolver : public ResolverInterface { + public: + explicit SymbolResolver( + ELFIO::section *symtab, + const absl::flat_hash_map<std::string, ELFIO::Elf_Xword> &symbol_indices) + : symtab_(symtab), symbol_indices_(symbol_indices) {} + absl::StatusOr<uint64_t> Resolve(absl::string_view text) override { + auto iter = symbol_indices_.find(text); + if (iter == symbol_indices_.end()) { + return absl::InvalidArgumentError( + absl::StrCat("Symbol '", text, "' not found")); + } + auto index = iter->second; + auto *sym = reinterpret_cast<const ELFIO::Elf64_Sym *>(symtab_->get_data()); + return sym[index].st_value; + } + + private: + // The symbol table ELF section. + ELFIO::section *symtab_; + // Map from symbol name to symbol index in the symbol table. + const absl::flat_hash_map<std::string, ELFIO::Elf_Xword> &symbol_indices_; +}; + +SimpleAssembler::SimpleAssembler(int os_abi, int type, int machine, + uint64_t base_address, + OpcodeAssemblerInterface *opcode_assembler_if) + : opcode_assembler_if_(opcode_assembler_if), + base_address_(base_address), + comment_re_("^\\s*(?:;(.*))?$"), + asm_line_re_("^(?:(?:(\\S+)\\s*:)?|\\s)\\s*([^;]*?)?\\s*(?:;(.*))?$"), + directive_re_( + "^\\.(align|bss|bytes|char|cstring|data|entry|global|long|sect" + "|short|space|string|type|text|uchar|ulong|ushort|uword|word)(?:\\s+(" + ".*)" + ")?\\s*" + "$") { + // Configure the ELF file writer. + writer_.create(ELFCLASS64, ELFDATA2LSB); + writer_.set_os_abi(os_abi); + writer_.set_type(ET_EXEC); + writer_.set_machine(machine); + // Create the symbol table section. + symtab_ = writer_.sections.add(".symtab"); + symtab_->set_type(SHT_SYMTAB); + symtab_->set_entry_size(sizeof(ELFIO::Elf64_Sym)); + // Create the string table section. + strtab_ = writer_.sections.add(".strtab"); + strtab_->set_type(SHT_STRTAB); + // Link the symbol table to the string table. + symtab_->set_link(strtab_->get_index()); + // Create the symbol and string table accessors. + symbol_accessor_ = new ELFIO::symbol_section_accessor(writer_, symtab_); + string_accessor_ = + new ELFIO::string_section_accessor(writer_.sections[".strtab"]); +} + +SimpleAssembler::~SimpleAssembler() { + delete symbol_resolver_; + delete symbol_accessor_; + delete string_accessor_; +} + +absl::Status SimpleAssembler::Parse(std::istream &is) { + // A trivial symbol resolver that always returns 0. + ZeroResolver zero_resolver; + // Create the sections we will need: .text, .data, and .bss. + ELFIO::section *text_section = writer_.sections.add(".text"); + text_section->set_type(SHT_PROGBITS); + text_section->set_flags(SHF_ALLOC | SHF_EXECINSTR); + text_section->set_addr_align(0x10); + ELFIO::section *data_section = writer_.sections.add(".data"); + data_section->set_type(SHT_PROGBITS); + data_section->set_flags(SHF_ALLOC | SHF_WRITE); + data_section->set_addr_align(0x10); + ELFIO::section *bss_section = writer_.sections.add(".bss"); + bss_section->set_type(SHT_NOBITS); + bss_section->set_flags(SHF_ALLOC | SHF_WRITE); + bss_section->set_addr_align(0x10); + + // First pass of parsing the input stream. This will add symbols to the symbol + // table and compute the sizes of all instructions and the sections. The + // section_address_map_ will keep track of the current location within each + // section (i.e., the offset within the section of the next + // instruction/object). + std::string line; + std::string label; + std::string statement; + while (is.good() && !is.eof()) { + getline(is, line); + if (RE2::FullMatch(line, comment_re_)) continue; + if (RE2::FullMatch(line, asm_line_re_, &label, &statement)) { + std::vector<uint8_t> byte_vector; + auto *section = current_section_; + uint64_t address = + (section == nullptr) ? 0 : section_address_map_[section]; + if (!statement.empty()) { + absl::Status status; + if (statement[0] == '.') { + status = ParseAsmDirective(statement, &zero_resolver, byte_vector); + } else { + status = ParseAsmStatement(statement, &zero_resolver, byte_vector); + } + if (!status.ok()) return status; + // Save the statements for processing in pass two. + lines_.push_back(statement); + } + + if (!label.empty()) { + // When initially adding symbols, the address is relative to the start + // of the containing section. This will be corrected later. + if (section == nullptr) { + return absl::InvalidArgumentError(absl::StrCat( + "Label: '", label, "' defined outside of a section")); + } + auto size = section_address_map_[section] - address; + auto status = + AddSymbol(label, address, size, STT_NOTYPE, STB_LOCAL, 0, section); + } + continue; + } + return absl::AbortedError(absl::StrCat("Parse failure: '", line, "'")); + } + if (!is.eof()) return absl::InternalError("Input stream entered bad state"); + + // Section sizes are now known. So let's compute the layout and update all + // the symbol values/addresses before the next pass. + // The layout is: + // text segment starting at base address + any alignment. + // data segment starting at the end of the text segment + any alignment. + // The bss section is added to the end of the data segment + any alignment. + + auto text_segment_start = base_address_ & ~4095ULL; + ELFIO::segment *text_segment = writer_.segments.add(); + text_segment->set_type(PT_LOAD); + text_segment->set_virtual_address(text_segment_start); + text_segment->set_physical_address(text_segment_start); + text_segment->set_flags(PF_X | PF_R); + text_segment->set_align(4096); + + uint64_t data_segment_start = (text_segment->get_virtual_address() + + section_address_map_[text_section] + 4095) & + ~4095ULL; + + ELFIO::segment *data_segment = writer_.segments.add(); + data_segment->set_type(PT_LOAD); + data_segment->set_virtual_address(data_segment_start); + data_segment->set_physical_address(data_segment_start); + data_segment->set_flags(PF_W | PF_R); + data_segment->set_align(4096); + + uint64_t bss_size = section_address_map_[bss_section]; + uint64_t bss_align = bss_section->get_addr_align() - 1; + uint64_t bss_segment_start = + (data_segment_start + section_address_map_[data_section] + bss_align) & + ~bss_align; + + // Now we can update the symbol table based on the new section sizes. + + // Copy the symbol table from the section data. + auto num_symbols = symbol_accessor_->get_symbols_num(); + auto size = symtab_->get_size(); + auto *symbols = new ELFIO::Elf64_Sym[num_symbols]; + std::memcpy(symbols, symtab_->get_data(), size); + // Convert the section offsets to the absolute addresses. + for (int i = 0; i < num_symbols; ++i) { + auto &sym = symbols[i]; + auto shndx = sym.st_shndx; + auto sym_name = string_accessor_->get_string(sym.st_name); + if (global_symbols_.contains(sym_name)) { + sym.st_info = ELF_ST_INFO(STB_GLOBAL, ELF_ST_TYPE(sym.st_info)); + } + if (shndx == text_section->get_index()) { + sym.st_value += text_segment_start; + } else if (shndx == data_section->get_index()) { + sym.st_value += data_segment_start; + } else if (shndx == bss_section->get_index()) { + sym.st_value += bss_segment_start; + } + } + // Update the symbol table section data with the updated symbols. + symtab_->set_data(reinterpret_cast<char *>(symbols), size); + delete[] symbols; + + // For the second pass, we need a symbol resolver that uses the symbol table + // and the symbol indices. + symbol_resolver_ = new SymbolResolver(symtab_, symbol_indices_); + + // Update the section address map so that each section starts at the right + // address, i.e., it no longer tracks the offset within each section, but the + // absolute address. + section_address_map_[text_section] = text_segment_start; + section_address_map_[data_section] = data_segment_start; + section_address_map_[bss_section] = bss_segment_start; + + // Now fill in the sections. Parse each of the lines saved in the first pass. + for (auto const &line : lines_) { + std::vector<uint8_t> byte_vector; + absl::Status status; + auto *section = current_section_; + if (line[0] == '.') { + auto status = ParseAsmDirective(line, symbol_resolver_, byte_vector); + } else { + auto status = ParseAsmStatement(line, symbol_resolver_, byte_vector); + } + if (!status.ok()) return status; + if (byte_vector.empty()) continue; + // Add data to the section, but first make sure it's not bss. + if (section != bss_section) { + section->append_data(reinterpret_cast<const char *>(byte_vector.data()), + byte_vector.size()); + } + } + + bss_section->set_size(bss_size); + + // Add sections to the segments. First segment gets the text section. The + // second segment gets the data and bss sections. + text_segment->add_section_index(text_section->get_index(), + text_section->get_addr_align()); + data_segment->add_section_index(data_section->get_index(), + data_section->get_addr_align()); + data_segment->add_section_index(bss_section->get_index(), + bss_section->get_addr_align()); + + return absl::OkStatus(); +} + +// Helper functions for parsing the assembly code. +namespace { + +// This template is used to convert the given type to the smallest valid type +// that absl Atoi functions can handle. +template <typename T> +struct AtoIType { + using type = T; +}; + +template <> +struct AtoIType<char> { + using type = int32_t; +}; + +template <> +struct AtoIType<uint8_t> { + using type = uint32_t; +}; + +template <> +struct AtoIType<uint16_t> { + using type = uint32_t; +}; + +template <> +struct AtoIType<int16_t> { + using type = int32_t; +}; + +template <> +struct AtoIType<int8_t> { + using type = int32_t; +}; + +// Convert the text to an integer. Checks for a leading 0x and then converts +// using absl::SimpleHexAtoi. If the text does not start with 0x, then it +// converts using absl::SimpleAtoi. If the text is not a valid integer, then +// it calls the resolver to see if it is a symbol name, in which case it returns +// the value of the symbol. If the text is not a valid integer or symbol name, +// then it returns an error. +template <typename T> +absl::StatusOr<T> SimpleTextToInt(absl::string_view text, + ResolverInterface *resolver = nullptr) { + T value; + if (text.substr(0, 2) == "0x") { + if (absl::SimpleHexAtoi(text.substr(2), &value)) return value; + return absl::InvalidArgumentError( + absl::StrCat("Invalid immediate: ", text)); + } + if (absl::SimpleAtoi(text, &value)) return value; + if (resolver == nullptr) { + return absl::InvalidArgumentError(absl::StrCat("Invalid argument: ", text)); + } + auto result = resolver->Resolve(text); + if (!result.ok()) { + return absl::InvalidArgumentError(absl::StrCat("Invalid argument: ", text)); + } + return static_cast<T>(result.value()); +} + +// Expand escaped characters in the given text. This is for use in parsing +// .string, .char, and .cstring directives. +std::string ExpandEscapes(absl::string_view text) { + std::string result; + bool in_escape = false; + for (auto c : text) { + if (in_escape) { + switch (c) { + case 'n': + result.push_back('\n'); + break; + case 'r': + result.push_back('\r'); + break; + case 'v': + result.push_back('\v'); + break; + case 'f': + result.push_back('\f'); + break; + case 'a': + result.push_back('\a'); + break; + case 'b': + result.push_back('\b'); + break; + case 't': + result.push_back('\t'); + break; + case '\\': + result.push_back('\\'); + break; + case '\'': + result.push_back('\''); + break; + case '"': + result.push_back('"'); + break; + case '\?': + result.push_back('?'); + break; + default: + result.push_back('\\'); + result.push_back(c); + break; + } + in_escape = false; + continue; + } + if (c == '\\') { + in_escape = true; + continue; + } + result.push_back(c); + } + if (in_escape) result.push_back('\\'); + return result; +} + +// This function is used to parse a list of values from the remainder of an +// assembly directive. The values are separated by commas. The type T is the +// type of the values, and must be an integer type or char. The resolver +// interface is optional and is used to resolve any symbol names in the text. +template <typename T> +absl::StatusOr<std::vector<T>> GetValues( + absl::string_view remainder, ResolverInterface *resolver = nullptr) { + std::vector<T> values; + static RE2 value_re("(0x[0-9a-fA-F]+|-?[0-9]+)\\s*(?:,|$)"); + std::string match; + while (RE2::Consume(&remainder, value_re, &match)) { + auto result = SimpleTextToInt<typename AtoIType<T>::type>(match); + if (!result.ok()) return result.status(); + T value = static_cast<T>(result.value()); + values.push_back(value); + } + return values; +} + +// Specialization of the above that handles char values. +template <> +absl::StatusOr<std::vector<char>> GetValues<char>(absl::string_view remainder, + ResolverInterface *resolver) { + std::vector<char> values; + static RE2 value_re("'(.{1,2})'\\s*(?:,|$)"); + std::string match; + while (RE2::Consume(&remainder, value_re, &match)) { + auto expanded = ExpandEscapes(match); + if (expanded.size() != 1) + return absl::InvalidArgumentError( + absl::StrCat("Invalid character: '", match, "'")); + values.push_back(expanded[0]); + } + return values; +} + +// Specialization of the above that handles double quoted string values. +template <> +absl::StatusOr<std::vector<std::string>> GetValues<std::string>( + absl::string_view remainder, ResolverInterface *resolver) { + std::vector<std::string> values; + std::string match; + static RE2 value_re("\"([^\"]*)\"\\s*(?:,|$)"); + while (RE2::Consume(&remainder, value_re, &match)) { + values.push_back(ExpandEscapes(match)); + } + return values; +} + +// Specialization of the above that handles labels (string values without +// quotes). +absl::StatusOr<std::vector<std::string>> GetLabels( + absl::string_view remainder) { + std::vector<std::string> values; + std::string match; + static RE2 label_re("([a-zA-Z_][a-zA-Z0-9_]*)\\s*(?:,|$)"); + while (RE2::Consume(&remainder, label_re, &match)) { + values.push_back(match); + } + return values; +} + +// Helper that converts a vector of integer values to a vector of bytes. +template <typename T> +inline void ConvertToBytes(const std::vector<T> &values, + std::vector<uint8_t> &bytes) { + union { + T i; + uint8_t b[sizeof(T)]; + } u; + for (auto value : values) { + u.i = value; + for (int i = sizeof(T) - 1; i >= 0; i--) { + bytes.push_back(u.b[i]); + } + } +} + +} // namespace + +absl::Status SimpleAssembler::SetEntryPoint(const std::string &value) { + auto res = SimpleTextToInt<uint64_t>(value, symbol_resolver_); + if (!res.ok()) return res.status(); + entry_point_ = res.value(); + return absl::OkStatus(); +} + +// Top level function that writes the ELF file out to disk. +absl::Status SimpleAssembler::Write(std::ostream &os) { + if (entry_point_.empty()) return absl::NotFoundError("Entry point not set"); + auto res = SimpleTextToInt<uint64_t>(entry_point_, symbol_resolver_); + if (!res.ok()) return res.status(); + symbol_accessor_->arrange_local_symbols(); + writer_.set_entry(res.value()); + writer_.save(os); + return absl::OkStatus(); +} + +// Parse and process an assembly directive. The assembly directive is expected +// to be in the form of a line starting with a period followed by a directive +// name and an optional argument. The argument is a string of tokens separated +// by spaces. The argument is parsed using regular expressions. The byte values +// are appended to the given vector. +absl::Status SimpleAssembler::ParseAsmDirective( + absl::string_view directive, ResolverInterface *resolver, + std::vector<uint8_t> &byte_values) { + std::string match; + std::string remainder; + ELFIO::section *section = current_section_; + uint64_t size = 0; + if (!RE2::FullMatch(directive, directive_re_, &match, &remainder)) { + return absl::InvalidArgumentError( + absl::StrCat("Invalid directive: '", directive, "'")); + } + if (match == "align") { + // .align <n> + if (section == nullptr) { + return absl::InvalidArgumentError( + absl::StrCat("No section for directive: '", directive, "'")); + } + auto res = SimpleTextToInt<uint64_t>(remainder); + if (!res.ok()) return res.status(); + uint64_t align = res.value(); + // Verify that the alignment is a power of two. + if ((align & (align - 1)) != 0) { + return absl::InvalidArgumentError( + absl::StrCat("Invalid alignment: '", directive, "'")); + } + uint64_t address = section_address_map_[section]; + size = ((address + align - 1) & ~(align - 1)) - address; + } else if (match == "bss") { + // .bss + SetBssSection(".bss"); + } else if (match == "bytes") { + // .bytes + auto res = GetValues<uint8_t>(remainder, resolver); + if (!res.ok()) return res.status(); + auto values = res.value(); + size = values.size(); + for (auto const &value : values) byte_values.push_back(value); + } else if (match == "char") { + // .char + auto res = GetValues<char>(remainder, resolver); + if (!res.ok()) return res.status(); + auto values = res.value(); + size = values.size(); + for (auto const &value : values) byte_values.push_back(value); + } else if (match == "cstring") { + // .cstring + auto res = GetValues<std::string>(remainder, resolver); + if (!res.ok()) return res.status(); + auto values = res.value(); + size = 0; + for (auto const &value : values) { + for (auto const &c : value) byte_values.push_back(c); + byte_values.push_back('\0'); + size += value.size() + 1; + } + } else if (match == "data") { + // .data + SetDataSection(".data"); + } else if (match == "entry") { + // .entry <name>|<address> + entry_point_ = remainder; + } else if (match == "global") { + // .global <name> + auto res = GetLabels(remainder); + if (!res.ok()) return res.status(); + auto values = res.value(); + for (auto const &value : values) { + global_symbols_.insert(value); + } + } else if (match == "long") { + // .long + auto res = GetValues<int64_t>(remainder); + if (!res.ok()) return res.status(); + auto values = res.value(); + size = values.size() * sizeof(int64_t); + ConvertToBytes<int64_t>(values, byte_values); + } else if (match == "section") { + // .section <name>,<type> + // TODO(torerik): Implement. + return absl::UnimplementedError("Section directive not implemented"); + } else if (match == "short") { + // .short + auto res = GetValues<int16_t>(remainder); + if (!res.ok()) return res.status(); + auto values = res.value(); + size = values.size() * sizeof(int16_t); + ConvertToBytes<int16_t>(values, byte_values); + } else if (match == "space") { + // .space <n> + auto res = SimpleTextToInt<uint64_t>(remainder); + if (!res.ok()) return res.status(); + size = res.value(); + } else if (match == "string") { + // .string + auto res = GetValues<std::string>(remainder); + if (!res.ok()) return res.status(); + auto values = res.value(); + size = 0; + for (auto const &value : values) { + for (auto const &c : value) byte_values.push_back(c); + size += value.size(); + } + } else if (match == "text") { + // .text + SetTextSection(".text"); + } else if (match == "uchar") { + // .uchar + auto res = GetValues<uint8_t>(remainder); + if (!res.ok()) return res.status(); + auto values = res.value(); + size = values.size(); + for (auto const &value : values) byte_values.push_back(value); + } else if (match == "ulong") { + // .ulong + auto res = GetValues<uint64_t>(remainder); + if (!res.ok()) return res.status(); + auto values = res.value(); + size = values.size() * sizeof(uint64_t); + ConvertToBytes<uint64_t>(values, byte_values); + } else if (match == "ushort") { + // .ushort + auto res = GetValues<uint16_t>(remainder); + if (!res.ok()) return res.status(); + auto values = res.value(); + size = values.size() * sizeof(uint16_t); + ConvertToBytes<uint16_t>(values, byte_values); + } else if (match == "uword") { + // .uword + auto res = GetValues<uint32_t>(remainder); + if (!res.ok()) return res.status(); + auto values = res.value(); + size = values.size() * sizeof(uint32_t); + ConvertToBytes<uint32_t>(values, byte_values); + } else if (match == "word") { + // .word + auto res = GetValues<int32_t>(remainder); + if (!res.ok()) return res.status(); + auto values = res.value(); + size = values.size() * sizeof(int32_t); + ConvertToBytes<int32_t>(values, byte_values); + } else { + return absl::InvalidArgumentError( + absl::StrCat("Unsupported directive: '", directive, "'")); + } + if ((size > 0) && (section != nullptr)) { + if (!section_address_map_.contains(section)) { + return absl::InternalError( + absl::StrCat("No address for section '", section->get_name(), "'")); + } + section_address_map_[section] += size; + } + return absl::OkStatus(); +} + +// Parse and process an assembly statement. The assembly statement is expected +// to be a single line of text. The byte values are appended to the given +// vector. +absl::Status SimpleAssembler::ParseAsmStatement( + absl::string_view statement, ResolverInterface *resolver, + std::vector<uint8_t> &byte_values) { + // Call the target specific assembler to encode the statement. + auto status = opcode_assembler_if_->Encode( + section_address_map_[current_section_], statement, resolver, byte_values); + if (!status.ok()) return status; + section_address_map_[current_section_] += byte_values.size(); + return absl::OkStatus(); +} + +void SimpleAssembler::SetTextSection(const std::string &name) { + // First check if the section already exists. + auto *section = writer_.sections[name]; + if (section != nullptr) { + current_section_ = section; + return; + } + section = writer_.sections.add(name); + section->set_type(SHT_PROGBITS); + section->set_flags(SHF_ALLOC | SHF_EXECINSTR); + section->set_addr_align(0x10); + // Should probably add the section symbol to the symbol table. + current_section_ = section; +} + +void SimpleAssembler::SetDataSection(const std::string &name) { + // First check if the section already exists. + auto *section = writer_.sections[name]; + if (section != nullptr) { + current_section_ = section; + return; + } + section = writer_.sections.add(name); + section->set_type(SHT_PROGBITS); + section->set_flags(SHF_ALLOC | SHF_WRITE); + section->set_addr_align(0x10); + // Should probably add the section symbol to the symbol table. + current_section_ = section; +} + +void SimpleAssembler::SetBssSection(const std::string &name) { + // First check if the section already exists. + auto *section = writer_.sections[name]; + if (section != nullptr) { + current_section_ = section; + return; + } + section = writer_.sections.add(name); + section->set_type(SHT_NOBITS); + section->set_flags(SHF_ALLOC); + section->set_addr_align(0x10); +} + +absl::Status SimpleAssembler::AddSymbol(const std::string &name, + ELFIO::Elf64_Addr value, + ELFIO::Elf_Xword size, uint8_t type, + uint8_t binding, uint8_t other, + ELFIO::section *section) { + if (symbol_indices_.contains(name)) { + return absl::AlreadyExistsError( + absl::StrCat("Symbol '", name, "' already exists")); + } + auto res = + symbol_accessor_->add_symbol(*string_accessor_, name.c_str(), value, size, + binding, type, other, section->get_index()); + symbol_indices_.insert({name, res}); + return absl::OkStatus(); +} + +absl::Status SimpleAssembler::AppendData(const char *data, size_t size) { + if (current_section_ == nullptr) { + return absl::FailedPreconditionError("No current section"); + } + current_section_->append_data(data, size); + return absl::OkStatus(); +} + +} // namespace assembler +} // namespace util +} // namespace sim +} // namespace mpact
diff --git a/mpact/sim/util/asm/simple_assembler.h b/mpact/sim/util/asm/simple_assembler.h new file mode 100644 index 0000000..5250b28 --- /dev/null +++ b/mpact/sim/util/asm/simple_assembler.h
@@ -0,0 +1,132 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MPACT_SIM_UTIL_ASM_SIMPLE_ASSEMBLER_H_ +#define MPACT_SIM_UTIL_ASM_SIMPLE_ASSEMBLER_H_ + +#include <cstddef> +#include <cstdint> +#include <istream> +#include <ostream> +#include <string> +#include <vector> + +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" +#include "absl/status/status.h" +#include "absl/strings/string_view.h" +#include "elfio/elf_types.hpp" +#include "elfio/elfio.hpp" +#include "elfio/elfio_section.hpp" +#include "elfio/elfio_strings.hpp" +#include "elfio/elfio_symbols.hpp" +#include "mpact/sim/util/asm/opcode_assembler_interface.h" +#include "mpact/sim/util/asm/resolver_interface.h" +#include "re2/re2.h" + +// This file declares the SimpleAssembler class, which provides simple handling +// of assembly source, including a number of assembly directives. It currently +// handles three sections: .text, .data, and .bss. It produces an executable +// ELF file with the text section in its own segment starting at the base +// address, followed by the data section, and then the bss section. The entry +// point is set either by calling SetEntryPoint(), or by specifying the entry +// symbol with the .entry directive inside the text section of the input +// assembly source. If SetEntryPoint() is called after parsing it overrides the +// entry point set by the .entry directive. +namespace mpact { +namespace sim { +namespace util { +namespace assembler { + +class SimpleAssembler { + public: + explicit SimpleAssembler(int os_abi, int type, int machine, + uint64_t base_address, + OpcodeAssemblerInterface *opcode_assembler_if); + SimpleAssembler(const SimpleAssembler &) = delete; + SimpleAssembler &operator=(const SimpleAssembler &) = delete; + virtual ~SimpleAssembler(); + + // Parse the input stream as assembly. + absl::Status Parse(std::istream &is); + // Set the entry point. Either pass a symbol or an address. + absl::Status SetEntryPoint(const std::string &value); + // Write out the ELF file. + absl::Status Write(std::ostream &os); + + ELFIO::elfio &writer() { return writer_; } + + private: + // Parse and process an assembly directive. + absl::Status ParseAsmDirective(absl::string_view directive, + ResolverInterface *resolver, + std::vector<uint8_t> &byte_values); + // Parse and process and assembly statement. + absl::Status ParseAsmStatement(absl::string_view statement, + ResolverInterface *resolver, + std::vector<uint8_t> &byte_values); + // Add the symbol to the symbol table. + absl::Status AddSymbol(const std::string &name, ELFIO::Elf64_Addr value, + ELFIO::Elf_Xword size, uint8_t type, uint8_t binding, + uint8_t other, ELFIO::section *section); + // Append the data to the current section. + absl::Status AppendData(const char *data, size_t size); + + // Set the the given section as the current section. Create if it has not + // already been created. + void SetTextSection(const std::string &name); + void SetDataSection(const std::string &name); + void SetBssSection(const std::string &name); + + // Elf file top level object. + ELFIO::elfio writer_; + // The current section being processed. + ELFIO::section *current_section_ = nullptr; + // Interface used to parse and encode assembly statements. + OpcodeAssemblerInterface *opcode_assembler_if_ = nullptr; + // Interface used to access strings in the string table. + ELFIO::string_section_accessor *string_accessor_ = nullptr; + // Interface used to access symbols in the symbol table. + ELFIO::symbol_section_accessor *symbol_accessor_ = nullptr; + // ELF symbol table section. + ELFIO::section *symtab_ = nullptr; + // Elf string table section. + ELFIO::section *strtab_ = nullptr; + // Map that tracks the current address of each section. + absl::flat_hash_map<ELFIO::section *, uint64_t> section_address_map_; + + // Base address of the ELF file that is to be written. + uint64_t base_address_ = 0; + // Program entry point. + std::string entry_point_; + // Current symbol resolver (looks up symbols in the symbol table and returns + // their values). + ResolverInterface *symbol_resolver_ = nullptr; + std::vector<std::string> lines_; + // Regular expressions used to parse the assembly source. + RE2 comment_re_; + RE2 asm_line_re_; + RE2 directive_re_; + // Set of symbol names declared as global. + absl::flat_hash_set<std::string> global_symbols_; + // Map from symbol name to symbol index in the symbol table. + absl::flat_hash_map<std::string, ELFIO::Elf_Xword> symbol_indices_; +}; + +} // namespace assembler +} // namespace util +} // namespace sim +} // namespace mpact + +#endif // MPACT_SIM_UTIL_ASM_SIMPLE_ASSEMBLER_H_
diff --git a/mpact/sim/util/asm/test/BUILD b/mpact/sim/util/asm/test/BUILD new file mode 100644 index 0000000..1e7763d --- /dev/null +++ b/mpact/sim/util/asm/test/BUILD
@@ -0,0 +1,104 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This file contains the build rules for tests related to assembler related libraries. + +load("//mpact/sim/decoder:mpact_sim_isa.bzl", "mpact_bin_fmt_decoder", "mpact_isa_decoder") + +package( + default_applicable_licenses = ["//:license"], + default_visibility = ["//visibility:public"], +) + +cc_library( + name = "riscv64x_instructions", + testonly = True, + srcs = ["riscv64x_instructions.cc"], + hdrs = ["riscv64x_instructions.h"], + deps = [ + "//mpact/sim/generic:instruction", + ], +) + +mpact_isa_decoder( + name = "riscv64x_isa", + testonly = True, + src = "riscv64x.isa", + includes = [], + isa_name = "RiscV64X", + deps = [ + ":riscv64x_instructions", + ], +) + +mpact_bin_fmt_decoder( + name = "riscv64x_bin_fmt", + testonly = True, + src = "riscv64x.bin_fmt", + decoder_name = "RiscV64X", + includes = [ + ], + prefix = "riscv64x", + deps = [ + ":riscv64x_isa", + ], +) + +cc_library( + name = "riscv64x_encoder", + testonly = True, + srcs = [ + "riscv64x_assembler.cc", + "riscv64x_bin_encoder_interface.cc", + ], + hdrs = [ + "riscv64x_assembler.h", + "riscv64x_bin_encoder_interface.h", + "riscv_bin_setters.h", + "riscv_getter_helpers.h", + ], + deps = [ + ":riscv64x_bin_fmt", + ":riscv64x_isa", + "//mpact/sim/generic:type_helpers", + "//mpact/sim/util/asm", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/functional:any_invocable", + "@com_google_absl//absl/log", + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/strings", + ], +) + +cc_test( + name = "riscv64x_asm_test", + srcs = ["riscv64x_asm_test.cc"], + deps = [ + ":riscv64x_bin_fmt", + ":riscv64x_encoder", + ":riscv64x_isa", + "//mpact/sim/util/asm", + "//mpact/sim/util/asm:simple_assembler", + "@com_github_serge1_elfio//:elfio", + "@com_google_absl//absl/base:no_destructor", + "@com_google_absl//absl/log", + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest_main", + ], +)
diff --git a/mpact/sim/util/asm/test/riscv64x.bin_fmt b/mpact/sim/util/asm/test/riscv64x.bin_fmt new file mode 100644 index 0000000..84ee048 --- /dev/null +++ b/mpact/sim/util/asm/test/riscv64x.bin_fmt
@@ -0,0 +1,108 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// RiscV 64 bit G instruction decoder for testing assembler. +decoder RiscV64X { + opcode_enum = "isa64::OpcodeEnum"; + includes { + #include "mpact/sim/util/asm/test/riscv64x_decoder.h" + } + namespace mpact::sim::riscv::encoding64; + RiscVXInst32; +}; + +format Inst32Format[32] { + fields: + unsigned bits[25]; + unsigned opcode[7]; +}; + +format RType[32] : Inst32Format { + fields: + unsigned func7[7]; + unsigned rs2[5]; + unsigned rs1[5]; + unsigned func3[3]; + unsigned rd[5]; + unsigned opcode[7]; + overlays: + unsigned r_uimm5[5] = rs2; +}; + +// Format for shift immediate for RV64, note 6 bit immediate. +format RSType[32] : Inst32Format { + fields: + unsigned func6[6]; + unsigned r_uimm6[6]; + unsigned rs1[5]; + unsigned func3[3]; + unsigned rd[5]; + unsigned opcode[7]; +}; + +format IType[32] : Inst32Format { + fields: + signed imm12[12]; + unsigned rs1[5]; + unsigned func3[3]; + unsigned rd[5]; + unsigned opcode[7]; + overlays: + unsigned u_imm12[12] = imm12; + unsigned i_uimm5[5] = rs1; +}; + +format SType[32] : Inst32Format { + fields: + unsigned imm7[7]; + unsigned rs2[5]; + unsigned rs1[5]; + unsigned func3[3]; + unsigned imm5[5]; + unsigned opcode[7]; + overlays: + signed s_imm[12] = imm7, imm5; +}; + + +format UType[32] : Inst32Format { + fields: + unsigned imm20[20]; + unsigned rd[5]; + unsigned opcode[7]; + overlays: + unsigned u_imm[32] = imm20, 0b0000'0000'0000; + signed s_imm[32] = imm20, 0b0000'0000'0000; +}; + +format JType[32] : Inst32Format { + fields: + signed imm20[20]; + unsigned rd[5]; + unsigned opcode[7]; + overlays: + signed j_imm[21] = imm20[19, 7..0, 8, 18..9], 0b0; +}; + +instruction group RiscVXInst32[32] : Inst32Format { + addi : IType : func3 == 0b000, opcode == 0b001'0011; + lui : UType : opcode == 0b011'0111; + sd : SType : func3 == 0b011, opcode == 0b010'0011; + jal : JType : rd != 0, opcode == 0b110'1111; + j : JType : rd == 0, opcode == 0b110'1111; + slli : RSType : func6 == 0b000'000, func3==0b001, opcode == 0b001'0011; + ebreak : Inst32Format : bits == 0b0000'0000'0001'00000'000'00000, opcode == 0b111'0011; + srai : RSType : func6 == 0b010'000, func3==0b101, opcode == 0b001'0011; + jr : IType : rd == 0, func3 == 0b000, opcode == 0b110'0111; +};
diff --git a/mpact/sim/util/asm/test/riscv64x.isa b/mpact/sim/util/asm/test/riscv64x.isa new file mode 100644 index 0000000..2ec7460 --- /dev/null +++ b/mpact/sim/util/asm/test/riscv64x.isa
@@ -0,0 +1,58 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file contains the subset of the ISA description for the RiscV64G architecture for +// the purpose of generating a simple "subset" assembler for testing + +// First disasm field is 18 char wide and left justified. +disasm widths = {-18}; + +int global_latency = 1; + +isa RiscV64X { + namespace mpact::sim::riscv::isa64; + slots { riscv64x; } +} + +// Minimal set of instructions for hello world. +slot riscv64x { + includes { + #include "mpact/sim/util/asm/test/riscv64x_instructions.h" + } + default size = 4; + default latency = global_latency; + default opcode = + disasm: "Illegal instruction at %(@:08x)", + semfunc: "&RiscVIllegalInstruction"; + opcodes { + addi{: rs1, I_imm12 : rd}, + disasm: "addi", "%rd, %rs1, %I_imm12"; + lui{: U_imm20 : rd}, + disasm: "lui", "%rd, %(U_imm20:08x)"; + sd{: rs1, S_imm12, rs2 : }, + disasm: "sd", "%rs2, %S_imm12(%rs1)"; + jal{: J_imm20 : next_pc, rd}, + disasm: "jal", "%rd, %(@+J_imm20:08x)"; + j{: J_imm20 : next_pc, rd}, + disasm: "j", "%(@+J_imm20:08x)"; + slli{: rs1, I_uimm6 : rd}, + disasm: "slli", "%rd, %rs1, %(I_uimm6:x)"; + ebreak{}, + disasm: "ebreak"; + srai{: rs1, I_uimm6 : rd}, + disasm: "srai", "%rd, %rs1, %(I_uimm6:x)"; + jr{: rs1, J_imm12 : next_pc, rd}, + disasm: "jr", "%rs1, %(J_imm12:08x)"; + } +}
diff --git a/mpact/sim/util/asm/test/riscv64x_asm_test.cc b/mpact/sim/util/asm/test/riscv64x_asm_test.cc new file mode 100644 index 0000000..3f2e647 --- /dev/null +++ b/mpact/sim/util/asm/test/riscv64x_asm_test.cc
@@ -0,0 +1,239 @@ +#include <cstdint> +#include <sstream> +#include <string> +#include <vector> + +#include "absl/base/no_destructor.h" +#include "absl/log/check.h" +#include "absl/status/status.h" +#include "absl/strings/string_view.h" +#include "elfio/elf_types.hpp" +#include "elfio/elfio.hpp" +#include "elfio/elfio_symbols.hpp" +#include "googlemock/include/gmock/gmock.h" // IWYU pragma: keep +#include "googletest/include/gtest/gtest.h" +#include "mpact/sim/util/asm/opcode_assembler_interface.h" +#include "mpact/sim/util/asm/resolver_interface.h" +#include "mpact/sim/util/asm/simple_assembler.h" +#include "mpact/sim/util/asm/test/riscv64x_bin_encoder_interface.h" +#include "mpact/sim/util/asm/test/riscv64x_encoder.h" + +// This file contains tests for the simple assembler using a very reduced +// subset of the RISC-V ISA. + +namespace { + +using ::mpact::sim::riscv::isa64::RiscV64XBinEncoderInterface; +using ::mpact::sim::riscv::isa64::Riscv64xSlotMatcher; +using ::mpact::sim::util::assembler::OpcodeAssemblerInterface; +using ::mpact::sim::util::assembler::ResolverInterface; +using ::mpact::sim::util::assembler::SimpleAssembler; + +// This class implements the OpcodeAssemblerInterface using the slot matcher. +class RiscV64XAssembler : public OpcodeAssemblerInterface { + public: + RiscV64XAssembler(Riscv64xSlotMatcher* matcher) : matcher_(matcher) {}; + ~RiscV64XAssembler() override = default; + absl::Status Encode(uint64_t address, absl::string_view text, + ResolverInterface* resolver, + std::vector<uint8_t>& bytes) override { + // Call the slot matcher to get the encoded value. + auto res = matcher_->Encode(address, text, 0, resolver); + if (!res.status().ok()) return res.status(); + // Convert the value to a byte array. + auto [value, size] = res.value(); + union { + uint64_t i; + uint8_t b[sizeof(uint64_t)]; + } u; + u.i = value; + for (int i = 0; i < size / 8; ++i) { + bytes.push_back(u.b[i]); + } + return absl::OkStatus(); + } + + private: + Riscv64xSlotMatcher* matcher_; +}; + +// Sample assembly code. +absl::NoDestructor<std::string> kTestAssembly(R"( +; text section + .text + .global main + .entry main +main: + addi a0, zero, 5 + lui a1, semihost_param + addi a1, a1, semihost_param + addi t0, zero, 2 + sd t0, 0(a1) + lui t2, hello + addi t2, t2, hello + sd t2, 8(a1) + addi t0, zero, 12 + sd t0, 0x10(a1) + jal ra, semihost + ; now exit + addi a0, zero, 24 + lui t0, 0x20026 + addi t0, t0, 0x20026 + sd t0, 0(a1) + jal ra, semihost +exit: + j exit + +semihost: + slli zero, zero, 0x1f + ebreak + srai zero, zero, 7 + jr ra, 0 + +; data section + + .data + .global hello +hello: + .cstring "Hello World\n" + .char '\n' + +; bss + + .bss + .global tohost +tohost: + .space 16 +semihost_param: + .space 16 +)"); + +// Test fixture. It creates the assembler and parses the assembly code. +class RiscV64XAssemblerTest : public ::testing::Test { + protected: + RiscV64XAssemblerTest() + : matcher_(&bin_encoder_interface_), riscv_64x_assembler_(&matcher_) { + CHECK_OK(matcher_.Initialize()); + // Create the assembler. + assembler_ = new SimpleAssembler(ELFOSABI_LINUX, ET_EXEC, EM_RISCV, 0x1000, + &riscv_64x_assembler_); + std::istringstream source(*kTestAssembly); + // Parse the assembly code. + CHECK_OK(assembler_->Parse(source)); + } + + ~RiscV64XAssemblerTest() override { delete assembler_; } + + // Access the ELF writer. + ELFIO::elfio& elf() { return assembler_->writer(); } + + private: + RiscV64XBinEncoderInterface bin_encoder_interface_; + Riscv64xSlotMatcher matcher_; + RiscV64XAssembler riscv_64x_assembler_; + SimpleAssembler* assembler_; +}; + +// Test that the expected sections are present. +TEST_F(RiscV64XAssemblerTest, Sections) { + auto sections = elf().sections; + // Null section and the 6 sections listed below. + EXPECT_EQ(sections.size(), 7); + EXPECT_NE(sections[".text"], nullptr); + EXPECT_NE(sections[".data"], nullptr); + EXPECT_NE(sections[".bss"], nullptr); + EXPECT_NE(sections[".shstrtab"], nullptr); + EXPECT_NE(sections[".strtab"], nullptr); + EXPECT_NE(sections[".symtab"], nullptr); +} + +// Verify that the information about the text section is as expected. +TEST_F(RiscV64XAssemblerTest, Text) { + auto* text = elf().sections[".text"]; + EXPECT_EQ(text->get_type(), SHT_PROGBITS); + EXPECT_EQ(text->get_flags(), SHF_ALLOC | SHF_EXECINSTR); + EXPECT_EQ(text->get_link(), SHN_UNDEF); + EXPECT_EQ(text->get_size(), /*num inst*/ 21 * /*bytes per inst*/ 4); +} + +TEST_F(RiscV64XAssemblerTest, Data) { + auto* data = elf().sections[".data"]; + EXPECT_EQ(data->get_type(), SHT_PROGBITS); + EXPECT_EQ(data->get_flags(), SHF_ALLOC | SHF_WRITE); + EXPECT_EQ(data->get_link(), SHN_UNDEF); + // Hello world is 12 bytes, plus the null terminator. + // Add one .char declaration. + EXPECT_EQ(data->get_size(), 14); +} + +TEST_F(RiscV64XAssemblerTest, Bss) { + auto* bss = elf().sections[".bss"]; + EXPECT_EQ(bss->get_type(), SHT_NOBITS); + EXPECT_EQ(bss->get_flags(), SHF_ALLOC | SHF_WRITE); + EXPECT_EQ(bss->get_link(), SHN_UNDEF); + // Two .space declarations, each 16 bytes. + EXPECT_EQ(bss->get_size(), 32); +} + +TEST_F(RiscV64XAssemblerTest, Symbols) { + auto* symtab = elf().sections[".symtab"]; + ELFIO::symbol_section_accessor symbols(elf(), symtab); + ELFIO::Elf64_Addr value; + ELFIO::Elf_Xword size; + unsigned char bind; + unsigned char type; + ELFIO::Elf_Half section_index; + unsigned char other; + // Verify that main is valued 0x1000, global and located in the text section. + symbols.get_symbol("main", value, size, bind, type, section_index, other); + EXPECT_EQ(value, 0x1000); + EXPECT_EQ(section_index, elf().sections[".text"]->get_index()); + EXPECT_EQ(type, STT_NOTYPE); + // Verify that exit is valued 0x1000 + 16 * 4, local and located in the text + // section. + symbols.get_symbol("exit", value, size, bind, type, section_index, other); + EXPECT_EQ(value, 0x1000 + 16 * 4); + EXPECT_EQ(bind, STB_LOCAL); + EXPECT_EQ(section_index, elf().sections[".text"]->get_index()); + EXPECT_EQ(type, STT_NOTYPE); + // Verify that hello is global and located in the data section at 0x2000. + symbols.get_symbol("hello", value, size, bind, type, section_index, other); + EXPECT_EQ(value, 0x2000); + EXPECT_EQ(section_index, elf().sections[".data"]->get_index()); + EXPECT_EQ(bind, STB_GLOBAL); + EXPECT_EQ(type, STT_NOTYPE); + // Verify that semihost_param is global and located in the bss section at + // 0x2000 + 14 + alignment to 16 byte boundary, plus 16 bytes. + symbols.get_symbol("semihost_param", value, size, bind, type, section_index, + other); + EXPECT_EQ(value, 0x2000 + 16 + 16); + EXPECT_EQ(section_index, elf().sections[".bss"]->get_index()); + EXPECT_EQ(bind, STB_LOCAL); + EXPECT_EQ(type, STT_NOTYPE); +} + +// Verify that the first 16 instructions were assembled correctly. +TEST_F(RiscV64XAssemblerTest, TextContent) { + auto* text = elf().sections[".text"]; + auto* data = text->get_data(); + auto* word_data = reinterpret_cast<const uint32_t*>(data); + // Verify the first 16 instructions. + EXPECT_EQ(word_data[0], 0x00500513); // addi a0, zero, 5 + EXPECT_EQ(word_data[1], 0x000025b7); // lui a1, semihost_param + EXPECT_EQ(word_data[2], 0x02058593); // addi a1, a1, semihost_param + EXPECT_EQ(word_data[3], 0x00200293); // addi t0, zero, 2 + EXPECT_EQ(word_data[4], 0x0055b023); // sd t0, 0(a1) + EXPECT_EQ(word_data[5], 0x000023b7); // lui t2, hello + EXPECT_EQ(word_data[6], 0x00038393); // addi t2, t2, hello + EXPECT_EQ(word_data[7], 0x0075b423); // sd t2, 8(a1) + EXPECT_EQ(word_data[8], 0x00c00293); // addi t0, zero, 12 + EXPECT_EQ(word_data[9], 0x0055b823); // sd t0, 0x10(a1) + EXPECT_EQ(word_data[10], 0x01c000ef); // jal ra, semihost + EXPECT_EQ(word_data[11], 0x01800513); // addi a0, zero, 24 + EXPECT_EQ(word_data[12], 0x000202b7); // lui t0, 0x20026 + EXPECT_EQ(word_data[13], 0x02628293); // addi t0, t0, 0x20026 + EXPECT_EQ(word_data[14], 0x0055b023); // sd t0, 0(a1) + EXPECT_EQ(word_data[15], 0x008000ef); // jal ra, semihost +} + +} // namespace
diff --git a/mpact/sim/util/asm/test/riscv64x_assembler.cc b/mpact/sim/util/asm/test/riscv64x_assembler.cc new file mode 100644 index 0000000..9b8efca --- /dev/null +++ b/mpact/sim/util/asm/test/riscv64x_assembler.cc
@@ -0,0 +1,51 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mpact/sim/util/asm/test/riscv64x_assembler.h" + +#include <cstdint> +#include <tuple> + +#include "absl/log/check.h" +#include "absl/status/statusor.h" +#include "absl/strings/string_view.h" +#include "mpact/sim/util/asm/resolver_interface.h" +#include "mpact/sim/util/asm/test/riscv64x_bin_encoder_interface.h" +#include "mpact/sim/util/asm/test/riscv64x_encoder.h" + +namespace mpact { +namespace sim { +namespace riscv { + +using ::mpact::sim::util::assembler::ResolverInterface; + +RiscV64XAssembler::RiscV64XAssembler() { + bin_encoder_interface_ = new isa64::RiscV64XBinEncoderInterface(); + matcher_ = new isa64::Riscv64xSlotMatcher(bin_encoder_interface_); + CHECK_OK(matcher_->Initialize()); +} + +RiscV64XAssembler::~RiscV64XAssembler() { + delete bin_encoder_interface_; + delete matcher_; +} + +absl::StatusOr<std::tuple<uint64_t, int>> RiscV64XAssembler::Assemble( + uint64_t address, absl::string_view text, ResolverInterface *resolver) { + return matcher_->Encode(address, text, 0, resolver); +} + +} // namespace riscv +} // namespace sim +} // namespace mpact
diff --git a/mpact/sim/util/asm/test/riscv64x_assembler.h b/mpact/sim/util/asm/test/riscv64x_assembler.h new file mode 100644 index 0000000..14a64f0 --- /dev/null +++ b/mpact/sim/util/asm/test/riscv64x_assembler.h
@@ -0,0 +1,54 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MPACT_SIM_UTIL_ASM_TEST_RISCV64X_ASSEMBLER_H_ +#define MPACT_SIM_UTIL_ASM_TEST_RISCV64X_ASSEMBLER_H_ + +#include <cstdint> +#include <tuple> + +#include "absl/status/statusor.h" +#include "absl/strings/string_view.h" +#include "mpact/sim/util/asm/resolver_interface.h" +#include "mpact/sim/util/asm/test/riscv64x_bin_encoder_interface.h" +#include "mpact/sim/util/asm/test/riscv64x_encoder.h" +#include "mpact/sim/util/asm/test/riscv64x_enums.h" + +namespace mpact { +namespace sim { +namespace riscv { + +using ::mpact::sim::util::assembler::ResolverInterface; + +class RiscV64XAssembler { + public: + using SlotEnum = isa64::SlotEnum; + using OpcodeEnum = isa64::OpcodeEnum; + + RiscV64XAssembler(); + virtual ~RiscV64XAssembler(); + + absl::StatusOr<std::tuple<uint64_t, int>> Assemble( + uint64_t address, absl::string_view text, ResolverInterface *resolver); + + private: + isa64::RiscV64XBinEncoderInterface *bin_encoder_interface_ = nullptr; + isa64::Riscv64xSlotMatcher *matcher_ = nullptr; +}; + +} // namespace riscv +} // namespace sim +} // namespace mpact + +#endif // MPACT_SIM_UTIL_ASM_TEST_RISCV64X_ASSEMBLER_H_
diff --git a/mpact/sim/util/asm/test/riscv64x_bin_encoder_interface.cc b/mpact/sim/util/asm/test/riscv64x_bin_encoder_interface.cc new file mode 100644 index 0000000..03734c1 --- /dev/null +++ b/mpact/sim/util/asm/test/riscv64x_bin_encoder_interface.cc
@@ -0,0 +1,114 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mpact/sim/util/asm/test/riscv64x_bin_encoder_interface.h" + +#include <cstdint> +#include <tuple> + +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" +#include "mpact/sim/generic/type_helpers.h" +#include "mpact/sim/util/asm/resolver_interface.h" +#include "mpact/sim/util/asm/test/riscv64x_bin_encoder.h" +#include "mpact/sim/util/asm/test/riscv64x_enums.h" +#include "mpact/sim/util/asm/test/riscv_bin_setters.h" + +namespace mpact { +namespace sim { +namespace riscv { +namespace isa64 { + +using ::mpact::sim::generic::operator*; // NOLINT(misc-unused-using-decls) +using ::mpact::sim::util::assembler::ResolverInterface; + +RiscV64XBinEncoderInterface::RiscV64XBinEncoderInterface() { + AddRiscvSourceOpBinSetters<SourceOpEnum, OpMap, encoding64::Encoder>( + source_op_map_); + AddRiscvDestOpBinSetters<DestOpEnum, OpMap, encoding64::Encoder>( + dest_op_map_); +} + +absl::StatusOr<std::tuple<uint64_t, int>> +RiscV64XBinEncoderInterface::GetOpcodeEncoding(SlotEnum slot, int entry, + OpcodeEnum opcode, + ResolverInterface *resolver) { + return encoding64::kOpcodeEncodings->at(opcode); +} + +absl::StatusOr<uint64_t> RiscV64XBinEncoderInterface::GetSrcOpEncoding( + uint64_t address, absl::string_view text, SlotEnum slot, int entry, + OpcodeEnum opcode, SourceOpEnum source_op, int source_num, + ResolverInterface *resolver) { + auto iter = source_op_map_.find(*source_op); + if (iter == source_op_map_.end()) { + return absl::NotFoundError(absl::StrCat( + "Source operand not found for op enum value ", *source_op)); + } + return iter->second(address, text, resolver); +} + +absl::StatusOr<uint64_t> RiscV64XBinEncoderInterface::GetDestOpEncoding( + uint64_t address, absl::string_view text, SlotEnum slot, int entry, + OpcodeEnum opcode, DestOpEnum dest_op, int dest_num, + ResolverInterface *resolver) { + auto iter = dest_op_map_.find(*dest_op); + if (iter == dest_op_map_.end()) { + return absl::NotFoundError( + absl::StrCat("Dest operand not found for op enum value ", *dest_op)); + } + return iter->second(address, text, resolver); +} + +absl::StatusOr<uint64_t> RiscV64XBinEncoderInterface::GetListDestOpEncoding( + uint64_t address, absl::string_view text, SlotEnum slot, int entry, + OpcodeEnum opcode, ListDestOpEnum dest_op, int dest_num, + ResolverInterface *resolver) { + auto iter = list_dest_op_map_.find(*dest_op); + if (iter == list_dest_op_map_.end()) { + return absl::NotFoundError(absl::StrCat( + "List dest operand not found for op enum value ", *dest_op)); + } + return iter->second(address, text, resolver); +} + +absl::StatusOr<uint64_t> RiscV64XBinEncoderInterface::GetListSourceOpEncoding( + uint64_t address, absl::string_view text, SlotEnum slot, int entry, + OpcodeEnum opcode, ListSourceOpEnum source_op, int source_num, + ResolverInterface *resolver) { + auto iter = list_source_op_map_.find(*source_op); + if (iter == list_source_op_map_.end()) { + return absl::NotFoundError(absl::StrCat( + "List source operand not found for op enum value ", *source_op)); + } + return iter->second(address, text, resolver); +} + +absl::StatusOr<uint64_t> RiscV64XBinEncoderInterface::GetPredOpEncoding( + uint64_t address, absl::string_view text, SlotEnum slot, int entry, + OpcodeEnum opcode, PredOpEnum pred_op, ResolverInterface *resolver) { + auto iter = pred_op_map_.find(*pred_op); + if (iter == pred_op_map_.end()) { + return absl::NotFoundError(absl::StrCat( + "Predicate operand not found for op enum value ", *pred_op)); + } + return iter->second(address, text, resolver); +} + +} // namespace isa64 +} // namespace riscv +} // namespace sim +} // namespace mpact
diff --git a/mpact/sim/util/asm/test/riscv64x_bin_encoder_interface.h b/mpact/sim/util/asm/test/riscv64x_bin_encoder_interface.h new file mode 100644 index 0000000..78bbf4c --- /dev/null +++ b/mpact/sim/util/asm/test/riscv64x_bin_encoder_interface.h
@@ -0,0 +1,85 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MPACT_SIM_UTIL_ASM_TEST_RISCV64X_BIN_ENCODER_INTERFACE_H_ +#define MPACT_SIM_UTIL_ASM_TEST_RISCV64X_BIN_ENCODER_INTERFACE_H_ + +#include <cstdint> +#include <functional> +#include <tuple> + +#include "absl/container/flat_hash_map.h" +#include "absl/status/statusor.h" +#include "absl/strings/string_view.h" +#include "mpact/sim/util/asm/resolver_interface.h" +#include "mpact/sim/util/asm/test/riscv64x_encoder.h" +#include "mpact/sim/util/asm/test/riscv64x_enums.h" + +namespace mpact { +namespace sim { +namespace riscv { +namespace isa64 { + +using ::mpact::sim::util::assembler::ResolverInterface; + +class RiscV64XBinEncoderInterface : public RiscV64XEncoderInterfaceBase { + public: + RiscV64XBinEncoderInterface(); + RiscV64XBinEncoderInterface(const RiscV64XBinEncoderInterface &) = delete; + RiscV64XBinEncoderInterface &operator=(const RiscV64XBinEncoderInterface &) = + delete; + ~RiscV64XBinEncoderInterface() override = default; + + absl::StatusOr<std::tuple<uint64_t, int>> GetOpcodeEncoding( + SlotEnum slot, int entry, OpcodeEnum opcode, + ResolverInterface *resolver) override; + absl::StatusOr<uint64_t> GetSrcOpEncoding( + uint64_t address, absl::string_view text, SlotEnum slot, int entry, + OpcodeEnum opcode, SourceOpEnum source_op, int source_num, + ResolverInterface *resolver) override; + absl::StatusOr<uint64_t> GetDestOpEncoding( + uint64_t address, absl::string_view text, SlotEnum slot, int entry, + OpcodeEnum opcode, DestOpEnum dest_op, int dest_num, + ResolverInterface *resolver) override; + absl::StatusOr<uint64_t> GetListDestOpEncoding( + uint64_t address, absl::string_view text, SlotEnum slot, int entry, + OpcodeEnum opcode, ListDestOpEnum dest_op, int dest_num, + ResolverInterface *resolver) override; + absl::StatusOr<uint64_t> GetListSourceOpEncoding( + uint64_t address, absl::string_view text, SlotEnum slot, int entry, + OpcodeEnum opcode, ListSourceOpEnum source_op, int source_num, + ResolverInterface *resolver) override; + absl::StatusOr<uint64_t> GetPredOpEncoding( + uint64_t address, absl::string_view text, SlotEnum slot, int entry, + OpcodeEnum opcode, PredOpEnum pred_op, + ResolverInterface *resolver) override; + + private: + using OpMap = absl::flat_hash_map< + int, std::function<absl::StatusOr<uint64_t>(uint64_t, absl::string_view, + ResolverInterface *)>>; + + OpMap source_op_map_; + OpMap dest_op_map_; + OpMap list_dest_op_map_; + OpMap list_source_op_map_; + OpMap pred_op_map_; +}; + +} // namespace isa64 +} // namespace riscv +} // namespace sim +} // namespace mpact + +#endif // MPACT_SIM_UTIL_ASM_TEST_RISCV64X_BIN_ENCODER_INTERFACE_H_
diff --git a/mpact/sim/util/asm/test/riscv64x_instructions.cc b/mpact/sim/util/asm/test/riscv64x_instructions.cc new file mode 100644 index 0000000..44990e6 --- /dev/null +++ b/mpact/sim/util/asm/test/riscv64x_instructions.cc
@@ -0,0 +1,27 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mpact/sim/util/asm/test/riscv64x_instructions.h" + +#include <iostream> + +#include "mpact/sim/generic/instruction.h" + +namespace mpact::sim::riscv { + +void RiscVIllegalInstruction(const generic::Instruction *inst) { + std::cerr << "Illegal instruction\n"; +} + +} // namespace mpact::sim::riscv
diff --git a/mpact/sim/util/asm/test/riscv64x_instructions.h b/mpact/sim/util/asm/test/riscv64x_instructions.h new file mode 100644 index 0000000..9dc1c42 --- /dev/null +++ b/mpact/sim/util/asm/test/riscv64x_instructions.h
@@ -0,0 +1,28 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MPACT_SIM_UTIL_ASM_TEST_RISC64X_INSTRUCTIONS_H_ +#define MPACT_SIM_UTIL_ASM_TEST_RISC64X_INSTRUCTIONS_H_ + +#include "mpact/sim/generic/instruction.h" + +namespace mpact::sim::riscv { + +using ::mpact::sim::generic::Instruction; + +void RiscVIllegalInstruction(const generic::Instruction *inst); + +} // namespace mpact::sim::riscv + +#endif // MPACT_SIM_UTIL_ASM_TEST_RISC64X_INSTRUCTIONS_H_
diff --git a/mpact/sim/util/asm/test/riscv_bin_setters.h b/mpact/sim/util/asm/test/riscv_bin_setters.h new file mode 100644 index 0000000..e34d36f --- /dev/null +++ b/mpact/sim/util/asm/test/riscv_bin_setters.h
@@ -0,0 +1,164 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MPACT_SIM_UTIL_ASM_TEST_RISCV_BIN_SETTERS_H_ +#define MPACT_SIM_UTIL_ASM_TEST_RISCV_BIN_SETTERS_H_ + +#include <cstdint> +#include <initializer_list> +#include <utility> + +#include "absl/container/flat_hash_map.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "absl/strings/numbers.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" +#include "mpact/sim/util/asm/resolver_interface.h" +#include "mpact/sim/util/asm/test/riscv_getter_helpers.h" + +namespace mpact { +namespace sim { +namespace riscv { + +using ::mpact::sim::util::assembler::ResolverInterface; + +constexpr std::initializer_list<const std::pair<absl::string_view, uint64_t>> + kRegisterList = { + {"x0", 0}, {"x1", 1}, {"x2", 2}, {"x3", 3}, {"x4", 4}, + {"x5", 5}, {"x6", 6}, {"x7", 7}, {"x8", 8}, {"x9", 9}, + {"x10", 10}, {"x11", 11}, {"x12", 12}, {"x13", 13}, {"x14", 14}, + {"x15", 15}, {"x16", 16}, {"x17", 17}, {"x18", 18}, {"x19", 19}, + {"x20", 20}, {"x21", 21}, {"x22", 22}, {"x23", 23}, {"x24", 24}, + {"x25", 25}, {"x26", 26}, {"x27", 27}, {"x28", 28}, {"x29", 29}, + {"x30", 30}, {"x31", 31}, {"zero", 0}, {"ra", 1}, {"sp", 2}, + {"gp", 3}, {"tp", 4}, {"t0", 5}, {"t1", 6}, {"t2", 7}, + {"s0", 8}, {"s1", 9}, {"a0", 10}, {"a1", 11}, {"a2", 12}, + {"a3", 13}, {"a4", 14}, {"a5", 15}, {"a6", 16}, {"a7", 17}, + {"s2", 18}, {"s3", 19}, {"s4", 20}, {"s5", 21}, {"s6", 22}, + {"s7", 23}, {"s8", 24}, {"s9", 25}, {"s10", 26}, {"s11", 27}, + {"t3", 28}, {"t4", 29}, {"t5", 30}, {"t6", 31}}; + +template <typename T> +absl::StatusOr<T> SimpleTextToInt(absl::string_view text, + ResolverInterface *resolver) { + T value; + if (text.substr(0, 2) == "0x") { + if (absl::SimpleHexAtoi(text.substr(2), &value)) return value; + return absl::InvalidArgumentError( + absl::StrCat("Invalid hexadecimal immediate: ", text)); + } + if (absl::SimpleAtoi(text, &value)) return value; + if (resolver != nullptr) { + auto res = resolver->Resolve(text); + if (!res.ok()) { + return res.status(); + } + return static_cast<T>(res.value()); + } + return absl::InvalidArgumentError(absl::StrCat("Invalid argument: ", text)); +} + +using ValueMap = absl::flat_hash_map<absl::string_view, uint64_t>; + +template <typename Enum, typename Map, typename Encoder> +void AddRiscvSourceOpBinSetters(Map &map) { + Insert(map, *Enum::kIImm12, + [](uint64_t address, absl::string_view text, + ResolverInterface *resolver) -> absl::StatusOr<uint64_t> { + auto res = SimpleTextToInt<int32_t>(text, resolver); + if (!res.ok()) return res.status(); + return Encoder::IType::InsertImm12(res.value(), 0ULL); + }); + Insert(map, *Enum::kIUimm6, + [](uint64_t address, absl::string_view text, + ResolverInterface *resolver) -> absl::StatusOr<uint64_t> { + auto res = SimpleTextToInt<uint32_t>(text, resolver); + if (!res.ok()) return res.status(); + return Encoder::RSType::InsertRUimm6(res.value(), 0ULL); + }); + Insert(map, *Enum::kJImm12, + [](uint64_t address, absl::string_view text, + ResolverInterface *resolver) -> absl::StatusOr<uint64_t> { + auto res = SimpleTextToInt<int32_t>(text, resolver); + if (!res.ok()) return res.status(); + return Encoder::IType::InsertImm12(res.value(), 0ULL); + }); + Insert(map, *Enum::kJImm20, + [](uint64_t address, absl::string_view text, + ResolverInterface *resolver) -> absl::StatusOr<uint64_t> { + auto res = SimpleTextToInt<int32_t>(text, resolver); + if (!res.ok()) return res.status(); + uint32_t delta = res.value() - address; + auto value = Encoder::JType::InsertJImm(delta, 0ULL); + return value; + }); + Insert(map, *Enum::kRs1, + [](uint64_t address, absl::string_view text, + ResolverInterface *resolver) -> absl::StatusOr<uint64_t> { + static ValueMap map(kRegisterList); + auto iter = map.find(text); + if (iter == map.end()) { + return absl::InvalidArgumentError( + absl::StrCat("Invalid source operand: ", text)); + } + return Encoder::RSType::InsertRs1(iter->second, 0ULL); + }); + Insert(map, *Enum::kRs2, + [](uint64_t address, absl::string_view text, + ResolverInterface *resolver) -> absl::StatusOr<uint64_t> { + static ValueMap map(kRegisterList); + auto iter = map.find(text); + if (iter == map.end()) { + return absl::InvalidArgumentError( + absl::StrCat("Invalid source operand: ", text)); + } + return Encoder::SType::InsertRs2(iter->second, 0ULL); + }); + Insert(map, *Enum::kSImm12, + [](uint64_t address, absl::string_view text, + ResolverInterface *resolver) -> absl::StatusOr<uint64_t> { + auto res = SimpleTextToInt<uint32_t>(text, resolver); + if (!res.ok()) return res.status(); + return Encoder::SType::InsertSImm(res.value(), 0ULL); + }); + Insert(map, *Enum::kUImm20, + [](uint64_t address, absl::string_view text, + ResolverInterface *resolver) -> absl::StatusOr<uint64_t> { + auto res = SimpleTextToInt<uint32_t>(text, resolver); + if (!res.ok()) return res.status(); + return Encoder::UType::InsertUImm(res.value(), 0ULL); + }); +} + +template <typename Enum, typename Map, typename Encoder> +void AddRiscvDestOpBinSetters(Map &map) { + Insert(map, *Enum::kRd, + [](uint64_t address, absl::string_view text, + ResolverInterface *resolver) -> absl::StatusOr<uint64_t> { + static ValueMap map(kRegisterList); + auto iter = map.find(text); + if (iter == map.end()) { + return absl::InvalidArgumentError( + absl::StrCat("Invalid destination operand: ", text)); + } + return Encoder::RSType::InsertRd(iter->second, 0ULL); + }); +} + +} // namespace riscv +} // namespace sim +} // namespace mpact + +#endif // MPACT_SIM_UTIL_ASM_TEST_RISCV_BIN_SETTERS_H_
diff --git a/mpact/sim/util/asm/test/riscv_getter_helpers.h b/mpact/sim/util/asm/test/riscv_getter_helpers.h new file mode 100644 index 0000000..160bbe4 --- /dev/null +++ b/mpact/sim/util/asm/test/riscv_getter_helpers.h
@@ -0,0 +1,69 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MPACT_SIM_UTIL_ASM_TEST_RISCV_GETTER_HELPERS_H_ +#define MPACT_SIM_UTIL_ASM_TEST_RISCV_GETTER_HELPERS_H_ + +#include <string> +#include <vector> + +#include "absl/container/flat_hash_map.h" +#include "absl/functional/any_invocable.h" +#include "absl/log/log.h" +#include "absl/strings/string_view.h" + +// This file contains helper functions that are used to create commonly used +// operands for RiscV instructions. + +namespace mpact { +namespace sim { +namespace riscv { + +// Helper function to insert and entry into a "getter" map. This is used in +// the riscv_*_getter.h files. +template <typename M, typename E, typename G> +inline void Insert(M &map, E entry, G getter) { + if (!map.contains(static_cast<int>(entry))) { + map.insert(std::make_pair(static_cast<int>(entry), getter)); + } else { + map.at(static_cast<int>(entry)) = getter; + } +} + +constexpr absl::string_view kXregNames[32] = { + "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", + "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19", "x20", "x21", + "x22", "x23", "x24", "x25", "x26", "x27", "x28", "x29", "x30", "x31"}; +// ABI names for the integer registers. +constexpr absl::string_view kXregAbiNames[32] = { + "zero", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "s0", "s1", "a0", + "a1", "a2", "a3", "a4", "a5", "a6", "a7", "s2", "s3", "s4", "s5", + "s6", "s7", "s8", "s9", "s10", "s11", "t3", "t4", "t5", "t6"}; +// Architectural names for the floating point registers. +constexpr absl::string_view kFregNames[32] = { + "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10", + "f11", "f12", "f13", "f14", "f15", "f16", "f17", "f18", "f19", "f20", "f21", + "f22", "f23", "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31"}; +// ABI names for the floating point registers. +constexpr absl::string_view kFregAbiNames[32] = { + "ft0", "ft1", "ft2", "ft3", "ft4", "ft5", "ft6", "ft7", + "fs0", "fs1", "fa0", "fa1", "fa2", "fa3", "fa4", "fa5", + "fa6", "fa7", "fs2", "fs3", "fs4", "fs5", "fs6", "fs7", + "fs8", "fs9", "fs10", "fs11", "ft8", "ft9", "ft10", "ft11"}; + +} // namespace riscv +} // namespace sim +} // namespace mpact + +#endif // MPACT_SIM_UTIL_ASM_TEST_RISCV_GETTER_HELPERS_H_