Updated assembler generator and SimpleAssembler to support relocatable files. PiperOrigin-RevId: 714964235 Change-Id: I4b93434f7a4e47042519fcea5b1be6e2c946a801
diff --git a/mpact/sim/decoder/instruction_set.cc b/mpact/sim/decoder/instruction_set.cc index aa2f0fc..31448c5 100644 --- a/mpact/sim/decoder/instruction_set.cc +++ b/mpact/sim/decoder/instruction_set.cc
@@ -698,6 +698,15 @@ std::string source_op = absl::StrCat("SourceOpEnum::k", ToPascalCase(op_name)); absl::StrAppend(&output, " // Source operand ", op_name, "\n"); + if (locator.is_reloc) { + absl::StrAppend(&output, + " auto status = encoder->AppendSrcOpRelocation(\n" + " address, operands[", + position, "], slot, entry, opcode, ", source_op, ", ", + locator.instance, + ", resolver, relocations);\n" + " if (!status.ok()) return status;\n"); + } absl::StrAppend(&output, " result = encoder->GetSrcOpEncoding(address, operands[", position, @@ -723,6 +732,15 @@ std::string dest_op = absl::StrCat("DestOpEnum::k", ToPascalCase(op_name)); absl::StrAppend(&output, " // Destination operand ", op_name, "\n"); + if (locator.is_reloc) { + absl::StrAppend(&output, + " auto status = encoder->AppendDestOpRelocation(\n" + " address, operands[", + position, "], slot, entry, opcode, ", dest_op, ", ", + locator.instance, + ", resolver, relocations);\n" + " if (!status.ok()) return status;\n"); + } absl::StrAppend( &output, " result = encoder->GetDestOpEncoding(address, operands[", position, @@ -763,6 +781,7 @@ std::string encoder = absl::StrCat(pascal_name(), "EncoderInterfaceBase"); // Generate the bin encoder base class. absl::StrAppend(&h_output, + "using ::mpact::sim::util::assembler::RelocationInfo;\n" "using ::mpact::sim::util::assembler::ResolverInterface;\n" "\n" "class ", @@ -779,9 +798,17 @@ virtual absl::StatusOr<uint64_t> GetSrcOpEncoding(uint64_t address, absl::string_view text, SlotEnum slot, int entry, OpcodeEnum opcode, SourceOpEnum source_op, int source_num, ResolverInterface *resolver) = 0; + virtual absl::Status AppendSrcOpRelocation(uint64_t address, + absl::string_view text, SlotEnum slot, int entry, OpcodeEnum opcode, + SourceOpEnum source_op, int source_num, ResolverInterface *resolver, + std::vector<RelocationInfo> &relocations) = 0; virtual absl::StatusOr<uint64_t> GetDestOpEncoding(uint64_t address, absl::string_view text, SlotEnum slot, int entry, OpcodeEnum opcode, DestOpEnum dest_op, int dest_num, ResolverInterface *resolver) = 0; + virtual absl::Status AppendDestOpRelocation(uint64_t address, + absl::string_view text, SlotEnum slot, int entry, OpcodeEnum opcode, + DestOpEnum dest_op, int dest_num, ResolverInterface *resolver, + std::vector<RelocationInfo> &relocations) = 0; virtual absl::StatusOr<uint64_t> GetListSrcOpEncoding( uint64_t address, absl::string_view text,SlotEnum slot, int entry, OpcodeEnum opcode, ListSourceOpEnum source_op, int source_num, ResolverInterface *resolver) = 0; @@ -802,7 +829,8 @@ "absl::StatusOr<std::tuple<uint64_t, int>> EncodeNone(", encoder, "*, SlotEnum, int, OpcodeEnum, uint64_t, const " - "std::vector<std::string> &, ResolverInterface *) {\n" + "std::vector<std::string> &, ResolverInterface *, " + "std::vector<RelocationInfo> &) {\n" " return absl::NotFoundError(\"No such opcode\");\n" "}\n\n"); std::string array; @@ -812,24 +840,26 @@ encoder, "*, SlotEnum, int, OpcodeEnum, uint64_t, const " "std::vector<std::string> " - "&, ResolverInterface *);\n" + "&, ResolverInterface *, std::vector<RelocationInfo> &);\n" "EncodeFcn encode_fcns[] = {\n" " EncodeNone,\n"); for (auto &[name, inst_ptr] : instruction_map_) { auto *opcode = inst_ptr->opcode(); absl::StrAppend(&array, " Encode", opcode->pascal_name(), ",\n"); - absl::StrAppend( - &cc_output, "absl::StatusOr<std::tuple<uint64_t, int>> Encode", - opcode->pascal_name(), "(\n ", encoder, - " *encoder, SlotEnum slot, int entry, OpcodeEnum opcode,\n" - " uint64_t address, const " - "std::vector<std::string> &operands, ResolverInterface *resolver) " - "{\n" - " auto res_opcode = encoder->GetOpcodeEncoding(slot, " - "entry, opcode, resolver);\n" - " if (!res_opcode.ok()) return res_opcode.status();\n" - " auto [encoding, bit_size] = res_opcode.value();\n" - " absl::StatusOr<uint64_t> result;\n"); + absl::StrAppend(&cc_output, + "absl::StatusOr<std::tuple<uint64_t, int>> Encode", + opcode->pascal_name(), "(\n ", encoder, + " *encoder, SlotEnum slot, int entry, OpcodeEnum opcode,\n" + " uint64_t address, const " + "std::vector<std::string> &operands,\n" + " ResolverInterface *resolver, " + "std::vector<RelocationInfo> &relocations) " + "{\n" + " auto res_opcode = encoder->GetOpcodeEncoding(slot, " + "entry, opcode, resolver);\n" + " if (!res_opcode.ok()) return res_opcode.status();\n" + " auto [encoding, bit_size] = res_opcode.value();\n" + " absl::StatusOr<uint64_t> result;\n"); int position = 0; for (auto const *disasm_format : inst_ptr->disasm_format_vec()) { for (auto const *format_info : disasm_format->format_info_vec) {
diff --git a/mpact/sim/decoder/instruction_set_visitor.cc b/mpact/sim/decoder/instruction_set_visitor.cc index 7783807..dae84eb 100644 --- a/mpact/sim/decoder/instruction_set_visitor.cc +++ b/mpact/sim/decoder/instruction_set_visitor.cc
@@ -2322,6 +2322,8 @@ "#include \"absl/status/status.h\"\n" "#include \"absl/status/statusor.h\"\n" "#include \"absl/strings/string_view.h\"\n" + "#include " + "\"mpact/sim/util/asm/opcode_assembler_interface.h\"\n" "#include \"mpact/sim/util/asm/resolver_interface.h\"\n" "#include \"re2/re2.h\"\n" "#include \"re2/set.h\"\n" @@ -2340,6 +2342,8 @@ "#include \"absl/status/statusor.h\"\n" "#include \"absl/strings/str_cat.h\"\n" "#include \"absl/strings/string_view.h\"\n" + "#include " + "\"mpact/sim/util/asm/opcode_assembler_interface.h\"\n" "#include \"mpact/sim/util/asm/resolver_interface.h\"\n" "#include \"re2/re2.h\"\n" "#include \"re2/set.h\"\n"
diff --git a/mpact/sim/decoder/slot.cc b/mpact/sim/decoder/slot.cc index d3e2964..6f6d417 100644 --- a/mpact/sim/decoder/slot.cc +++ b/mpact/sim/decoder/slot.cc
@@ -466,8 +466,9 @@ "();\n" " absl::Status Initialize();\n" "absl::StatusOr<std::tuple<uint64_t, int>> " - "Encode(uint64_t address, absl::string_view text, int entry, " - "ResolverInterface *resolver);\n\n" + " Encode(uint64_t address, absl::string_view text, int entry, " + "ResolverInterface *resolver, std::vector<RelocationInfo> " + "&relocations);\n\n" " private:\n" " bool Match(absl::string_view text, std::vector<int> &matches);\n" " bool Extract(absl::string_view text, int index, " @@ -553,7 +554,8 @@ pascal_name(), "SlotMatcher::Encode(\n" R"( - uint64_t address, absl::string_view text, int entry, ResolverInterface *resolver) { + uint64_t address, absl::string_view text, int entry, ResolverInterface *resolver, + std::vector<RelocationInfo> &relocations) { std::vector<int> matches; std::string error_message = absl::StrCat("Failed to encode '", text, "':"); if (!Match(text, matches) || (matches.size() == 0)) { @@ -568,7 +570,8 @@ pascal_name(), ", entry, \n" " " - "static_cast<OpcodeEnum>(index), address, values, resolver);\n", + "static_cast<OpcodeEnum>(index), address, values, resolver, " + "relocations);\n", R"( if (!result.status().ok()) { absl::StrAppend(&error_message, "\n ", result.status().message());
diff --git a/mpact/sim/util/asm/BUILD b/mpact/sim/util/asm/BUILD index ad67e8f..e02ba66 100644 --- a/mpact/sim/util/asm/BUILD +++ b/mpact/sim/util/asm/BUILD
@@ -43,6 +43,8 @@ "@com_github_serge1_elfio//:elfio", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/functional:any_invocable", + "@com_google_absl//absl/functional:bind_front", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings",
diff --git a/mpact/sim/util/asm/opcode_assembler_interface.h b/mpact/sim/util/asm/opcode_assembler_interface.h index a22c7c5..1c7553b 100644 --- a/mpact/sim/util/asm/opcode_assembler_interface.h +++ b/mpact/sim/util/asm/opcode_assembler_interface.h
@@ -16,6 +16,7 @@ #define MPACT_SIM_UTIL_ASM_OPCODE_ASSEMBLER_INTERFACE_H_ #include <cstdint> +#include <string> #include <vector> #include "absl/status/status.h" @@ -31,6 +32,14 @@ namespace util { namespace assembler { +struct RelocationInfo { + uint64_t offset; + std::string symbol; + uint32_t type; + uint64_t addend; + uint16_t section_index; +}; + class OpcodeAssemblerInterface { public: virtual ~OpcodeAssemblerInterface() = default; @@ -39,7 +48,8 @@ // encoded into the bytes vector. virtual absl::Status Encode(uint64_t address, absl::string_view text, ResolverInterface *resolver, - std::vector<uint8_t> &bytes) = 0; + std::vector<uint8_t> &bytes, + std::vector<RelocationInfo> &relocations) = 0; }; } // namespace assembler
diff --git a/mpact/sim/util/asm/simple_assembler.cc b/mpact/sim/util/asm/simple_assembler.cc index 0697ef3..76584f8 100644 --- a/mpact/sim/util/asm/simple_assembler.cc +++ b/mpact/sim/util/asm/simple_assembler.cc
@@ -20,9 +20,13 @@ #include <istream> #include <ostream> #include <string> +#include <utility> #include <vector> #include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" +#include "absl/functional/any_invocable.h" +#include "absl/functional/bind_front.h" #include "absl/status/status.h" #include "absl/status/statusor.h" #include "absl/strings/str_cat.h" @@ -46,9 +50,24 @@ // the symbols and computing the sizes of the sections. class ZeroResolver : public ResolverInterface { public: + // Constructor takes a callback function that will be called for each symbol + // name encountered so that it can be added to the symbol table. + template <typename T> + ZeroResolver(T add_symbol_fcn) : add_symbol_fcn_(add_symbol_fcn) {} absl::StatusOr<uint64_t> Resolve(absl::string_view text) override { + // Any symbol name should be added to the symbol table as an undefined + // symbol if it is not already there. When the symbol is defined, the + // symbol table will be updated. In the case of generating an executable + // ELF file, any unresolved symbols will result in an error. When generating + // an object file, any unresolved symbols will remain in the symbol table + // and must be handled by the linker. + add_symbol_fcn_(text); + // Return 0 for any symbol name. return 0; } + + private: + absl::AnyInvocable<void(absl::string_view)> add_symbol_fcn_; }; // A symbol resolver that uses the symbol table and the symbol indices to @@ -57,7 +76,7 @@ public: SymbolResolver( int elf_file_class, ELFIO::section *symtab, - const absl::flat_hash_map<std::string, ELFIO::Elf_Xword> &symbol_indices) + const absl::flat_hash_map<std::string, ELFIO::Elf_Word> &symbol_indices) : elf_file_class_(elf_file_class), symtab_(symtab), symbol_indices_(symbol_indices) {} @@ -86,217 +105,9 @@ // The symbol table ELF section. ELFIO::section *symtab_; // Map from symbol name to symbol index in the symbol table. - const absl::flat_hash_map<std::string, ELFIO::Elf_Xword> &symbol_indices_; + const absl::flat_hash_map<std::string, ELFIO::Elf_Word> &symbol_indices_; }; -SimpleAssembler::SimpleAssembler(int elf_file_class, int os_abi, int type, - int machine, uint64_t base_address, - OpcodeAssemblerInterface *opcode_assembler_if) - : elf_file_class_(elf_file_class), - opcode_assembler_if_(opcode_assembler_if), - base_address_(base_address), - comment_re_("^\\s*(?:;(.*))?$"), - asm_line_re_("^(?:(?:(\\S+)\\s*:)?|\\s)\\s*([^;]*?)?\\s*(?:;(.*))?$"), - directive_re_( - "^\\.(align|bss|bytes|char|cstring|data|entry|global|long|sect" - "|short|space|string|type|text|uchar|ulong|ushort|uword|word)(?:\\s+(" - ".*)" - ")?\\s*" - "$") { - // Configure the ELF file writer. - writer_.create(elf_file_class_, ELFDATA2LSB); - writer_.set_os_abi(os_abi); - writer_.set_type(ET_EXEC); - writer_.set_machine(machine); - // Create the symbol table section. - symtab_ = writer_.sections.add(".symtab"); - symtab_->set_type(SHT_SYMTAB); - symtab_->set_entry_size(elf_file_class_ == ELFCLASS64 - ? sizeof(ELFIO::Elf64_Sym) - : sizeof(ELFIO::Elf32_Sym)); - // Create the string table section. - strtab_ = writer_.sections.add(".strtab"); - strtab_->set_type(SHT_STRTAB); - // Link the symbol table to the string table. - symtab_->set_link(strtab_->get_index()); - // Create the symbol and string table accessors. - symbol_accessor_ = new ELFIO::symbol_section_accessor(writer_, symtab_); - string_accessor_ = - new ELFIO::string_section_accessor(writer_.sections[".strtab"]); -} - -SimpleAssembler::~SimpleAssembler() { - delete symbol_resolver_; - delete symbol_accessor_; - delete string_accessor_; -} - -absl::Status SimpleAssembler::Parse(std::istream &is) { - // A trivial symbol resolver that always returns 0. - ZeroResolver zero_resolver; - // Create the sections we will need: .text, .data, and .bss. - ELFIO::section *text_section = writer_.sections.add(".text"); - text_section->set_type(SHT_PROGBITS); - text_section->set_flags(SHF_ALLOC | SHF_EXECINSTR); - text_section->set_addr_align(0x10); - ELFIO::section *data_section = writer_.sections.add(".data"); - data_section->set_type(SHT_PROGBITS); - data_section->set_flags(SHF_ALLOC | SHF_WRITE); - data_section->set_addr_align(0x10); - ELFIO::section *bss_section = writer_.sections.add(".bss"); - bss_section->set_type(SHT_NOBITS); - bss_section->set_flags(SHF_ALLOC | SHF_WRITE); - bss_section->set_addr_align(0x10); - - // First pass of parsing the input stream. This will add symbols to the symbol - // table and compute the sizes of all instructions and the sections. The - // section_address_map_ will keep track of the current location within each - // section (i.e., the offset within the section of the next - // instruction/object). - std::string line; - std::string label; - std::string statement; - while (is.good() && !is.eof()) { - getline(is, line); - if (RE2::FullMatch(line, comment_re_)) continue; - if (RE2::FullMatch(line, asm_line_re_, &label, &statement)) { - std::vector<uint8_t> byte_vector; - auto *section = current_section_; - uint64_t address = - (section == nullptr) ? 0 : section_address_map_[section]; - if (!statement.empty()) { - absl::Status status; - if (statement[0] == '.') { - status = ParseAsmDirective(statement, &zero_resolver, byte_vector); - } else { - status = ParseAsmStatement(statement, &zero_resolver, byte_vector); - } - if (!status.ok()) return status; - // Save the statements for processing in pass two. - lines_.push_back(statement); - } - - if (!label.empty()) { - // When initially adding symbols, the address is relative to the start - // of the containing section. This will be corrected later. - if (section == nullptr) { - return absl::InvalidArgumentError(absl::StrCat( - "Label: '", label, "' defined outside of a section")); - } - auto size = section_address_map_[section] - address; - auto status = - AddSymbol(label, address, size, STT_NOTYPE, STB_LOCAL, 0, section); - } - continue; - } - return absl::AbortedError(absl::StrCat("Parse failure: '", line, "'")); - } - if (!is.eof()) return absl::InternalError("Input stream entered bad state"); - - // Section sizes are now known. So let's compute the layout and update all - // the symbol values/addresses before the next pass. - // The layout is: - // text segment starting at base address + any alignment. - // data segment starting at the end of the text segment + any alignment. - // The bss section is added to the end of the data segment + any alignment. - - auto text_segment_start = base_address_ & ~4095ULL; - ELFIO::segment *text_segment = writer_.segments.add(); - text_segment->set_type(PT_LOAD); - text_segment->set_virtual_address(text_segment_start); - text_segment->set_physical_address(text_segment_start); - text_segment->set_flags(PF_X | PF_R); - text_segment->set_align(4096); - - uint64_t data_segment_start = (text_segment->get_virtual_address() + - section_address_map_[text_section] + 4095) & - ~4095ULL; - - ELFIO::segment *data_segment = writer_.segments.add(); - data_segment->set_type(PT_LOAD); - data_segment->set_virtual_address(data_segment_start); - data_segment->set_physical_address(data_segment_start); - data_segment->set_flags(PF_W | PF_R); - data_segment->set_align(4096); - - uint64_t bss_size = section_address_map_[bss_section]; - uint64_t bss_align = bss_section->get_addr_align() - 1; - uint64_t bss_segment_start = - (data_segment_start + section_address_map_[data_section] + bss_align) & - ~bss_align; - - // Now we can update the symbol table based on the new section sizes. - - // Copy the symbol table from the section data. - auto num_symbols = symbol_accessor_->get_symbols_num(); - auto size = symtab_->get_size(); - auto *symbols = new ELFIO::Elf64_Sym[num_symbols]; - std::memcpy(symbols, symtab_->get_data(), size); - // Convert the section offsets to the absolute addresses. - for (int i = 0; i < num_symbols; ++i) { - auto &sym = symbols[i]; - auto shndx = sym.st_shndx; - auto sym_name = string_accessor_->get_string(sym.st_name); - if (global_symbols_.contains(sym_name)) { - sym.st_info = ELF_ST_INFO(STB_GLOBAL, ELF_ST_TYPE(sym.st_info)); - } - if (shndx == text_section->get_index()) { - sym.st_value += text_segment_start; - } else if (shndx == data_section->get_index()) { - sym.st_value += data_segment_start; - } else if (shndx == bss_section->get_index()) { - sym.st_value += bss_segment_start; - } - } - // Update the symbol table section data with the updated symbols. - symtab_->set_data(reinterpret_cast<char *>(symbols), size); - delete[] symbols; - - // For the second pass, we need a symbol resolver that uses the symbol table - // and the symbol indices. - symbol_resolver_ = - new SymbolResolver(elf_file_class_, symtab_, symbol_indices_); - - // Update the section address map so that each section starts at the right - // address, i.e., it no longer tracks the offset within each section, but the - // absolute address. - section_address_map_[text_section] = text_segment_start; - section_address_map_[data_section] = data_segment_start; - section_address_map_[bss_section] = bss_segment_start; - - // Now fill in the sections. Parse each of the lines saved in the first pass. - for (auto const &line : lines_) { - std::vector<uint8_t> byte_vector; - absl::Status status; - auto *section = current_section_; - if (line[0] == '.') { - auto status = ParseAsmDirective(line, symbol_resolver_, byte_vector); - } else { - auto status = ParseAsmStatement(line, symbol_resolver_, byte_vector); - } - if (!status.ok()) return status; - if (byte_vector.empty()) continue; - // Add data to the section, but first make sure it's not bss. - if (section != bss_section) { - section->append_data(reinterpret_cast<const char *>(byte_vector.data()), - byte_vector.size()); - } - } - - bss_section->set_size(bss_size); - - // Add sections to the segments. First segment gets the text section. The - // second segment gets the data and bss sections. - text_segment->add_section_index(text_section->get_index(), - text_section->get_addr_align()); - data_segment->add_section_index(data_section->get_index(), - data_section->get_addr_align()); - data_segment->add_section_index(bss_section->get_index(), - bss_section->get_addr_align()); - - return absl::OkStatus(); -} - // Helper functions for parsing the assembly code. namespace { @@ -497,34 +308,413 @@ } // namespace -absl::Status SimpleAssembler::SetEntryPoint(const std::string &value) { - auto res = SimpleTextToInt<uint64_t>(value, symbol_resolver_); - if (!res.ok()) return res.status(); - entry_point_ = res.value(); +SimpleAssembler::SimpleAssembler(int elf_file_class, int os_abi, int type, + int machine, + OpcodeAssemblerInterface *opcode_assembler_if) + : elf_file_class_(elf_file_class), + opcode_assembler_if_(opcode_assembler_if), + comment_re_("^\\s*(?:;(.*))?$"), + asm_line_re_("^(?:(?:(\\S+)\\s*:)?|\\s)\\s*([^;]*?)?\\s*(?:;(.*))?$"), + directive_re_( + "^\\.(align|bss|bytes|char|cstring|data|global|long|sect" + "|short|space|string|type|text|uchar|ulong|ushort|uword|word)(?:\\s+(" + ".*)" + ")?\\s*" + "$") { + // Configure the ELF file writer. + writer_.create(elf_file_class_, ELFDATA2LSB); + writer_.set_os_abi(os_abi); + writer_.set_type(ET_EXEC); + writer_.set_machine(machine); + // Create the symbol table section. + symtab_ = writer_.sections.add(".symtab"); + section_index_map_.insert({symtab_->get_index(), symtab_}); + symtab_->set_type(SHT_SYMTAB); + symtab_->set_entry_size(elf_file_class_ == ELFCLASS64 + ? sizeof(ELFIO::Elf64_Sym) + : sizeof(ELFIO::Elf32_Sym)); + // Create the string table section. + strtab_ = writer_.sections.add(".strtab"); + section_index_map_.insert({strtab_->get_index(), strtab_}); + strtab_->set_type(SHT_STRTAB); + // Link the symbol table to the string table. + symtab_->set_link(strtab_->get_index()); + // Create the symbol and string table accessors. + symbol_accessor_ = new ELFIO::symbol_section_accessor(writer_, symtab_); + string_accessor_ = + new ELFIO::string_section_accessor(writer_.sections[".strtab"]); +} + +SimpleAssembler::~SimpleAssembler() { + delete symbol_resolver_; + delete symbol_accessor_; + delete string_accessor_; +} + +absl::Status SimpleAssembler::Parse(std::istream &is) { + // A trivial symbol resolver that always returns 0. + ZeroResolver zero_resolver( + absl::bind_front(&SimpleAssembler::SimpleAddSymbol, this)); + // First pass of parsing the input stream. This will add symbols to the symbol + // table and compute the sizes of all instructions and the sections. The + // section_address_map_ will keep track of the current location within each + // section (i.e., the offset within the section of the next + // instruction/object). + std::string line; + std::string label; + std::string statement; + while (is.good() && !is.eof()) { + getline(is, line); + if (RE2::FullMatch(line, comment_re_)) continue; + if (RE2::FullMatch(line, asm_line_re_, &label, &statement)) { + std::vector<uint8_t> byte_vector; + std::vector<RelocationInfo> relo_vector; + auto *section = current_section_; + uint64_t address = + (section == nullptr) ? 0 : section_address_map_[section]; + if (!statement.empty()) { + absl::Status status; + if (statement[0] == '.') { + status = ParseAsmDirective(statement, &zero_resolver, byte_vector); + } else { + status = ParseAsmStatement(statement, &zero_resolver, byte_vector, + relo_vector); + } + if (!status.ok()) return status; + // Save the statements for processing in pass two. + lines_.push_back(statement); + } + + if (!label.empty()) { + // When initially adding symbols, the address is relative to the start + // of the containing section. This will be corrected later. + if (section == nullptr) { + return absl::InvalidArgumentError(absl::StrCat( + "Label: '", label, "' defined outside of a section")); + } + auto size = section_address_map_[section] - address; + auto status = + AddSymbol(label, address, size, STT_NOTYPE, STB_LOCAL, 0, section); + } + continue; + } + return absl::AbortedError(absl::StrCat("Parse failure: '", line, "'")); + } + + if (!is.eof()) return absl::InternalError("Input stream entered bad state"); + + // Add undefined symbols to the symbol table. + for (auto const &symbol : undefined_symbols_) { + auto status = AddSymbol(symbol, 0, 0, STT_NOTYPE, 0, 0, nullptr); + if (!status.ok()) { + return absl::InternalError( + absl::StrCat("Failed to add undefined symbol: ", symbol)); + } + } + + if (bss_section_ != nullptr) { + bss_section_->set_size(section_address_map_[bss_section_]); + } return absl::OkStatus(); } -absl::Status SimpleAssembler::SetEntryPoint(uint64_t value) { - entry_point_ = value; +absl::Status SimpleAssembler::CreateExecutable(uint64_t base_address, + uint64_t entry_point) { + return CreateExecutable(base_address, absl::StrCat(entry_point)); +} + +// Helper function to update the symbol table entries for an executable file. +template <typename SymbolType> +void SimpleAssembler::UpdateSymbolsForExecutable(uint64_t text_segment_start, + uint64_t data_segment_start, + uint64_t bss_segment_start) { + auto num_symbols = symtab_->get_size() / sizeof(SymbolType); + auto size = num_symbols * sizeof(SymbolType); + auto *symbols = new SymbolType[num_symbols]; + std::memcpy(symbols, symtab_->get_data(), size); + for (int i = 0; i < num_symbols; ++i) { + auto &sym = symbols[i]; + auto shndx = sym.st_shndx; + std::string name = string_accessor_->get_string(sym.st_name); + if (global_symbols_.contains(name)) { + sym.st_info = ELF_ST_INFO(STB_GLOBAL, ELF_ST_TYPE(sym.st_info)); + } + if ((text_section_ != nullptr) && (shndx == text_section_->get_index())) { + sym.st_value += text_segment_start; + } else if ((data_section_ != nullptr) && + (shndx == data_section_->get_index())) { + sym.st_value += data_segment_start; + } else if ((bss_section_ != nullptr) && + (shndx == bss_section_->get_index())) { + sym.st_value += bss_segment_start; + } + } + symtab_->set_data(reinterpret_cast<char *>(symbols), size); + delete[] symbols; +} + +template <typename SymbolType> +void SimpleAssembler::UpdateSymbolsForRelocatable() { + auto num_symbols = symtab_->get_size() / sizeof(SymbolType); + auto size = num_symbols * sizeof(SymbolType); + auto *symbols = new SymbolType[num_symbols]; + std::memcpy(symbols, symtab_->get_data(), size); + for (int i = 0; i < num_symbols; ++i) { + auto &sym = symbols[i]; + std::string name = string_accessor_->get_string(sym.st_name); + symbol_indices_.insert({name, i}); + if (global_symbols_.contains(name)) { + sym.st_info = ELF_ST_INFO(STB_GLOBAL, ELF_ST_TYPE(sym.st_info)); + } + } + symtab_->set_data(reinterpret_cast<char *>(symbols), size); + delete[] symbols; +} + +absl::Status SimpleAssembler::CreateExecutable(uint64_t base_address, + const std::string &entry_point) { + if (!undefined_symbols_.empty()) { + std::string message; + absl::StrAppend( + &message, + "Cannot create executable with the following undefined symbols: "); + for (auto const &symbol : undefined_symbols_) { + absl::StrAppend(&message, " ", symbol, "\n"); + } + return absl::InvalidArgumentError(message); + } + // Section sizes are now known. So let's compute the layout and update all + // the symbol values/addresses before the next pass. + // The layout is: + // text segment starting at base address + any alignment. + // data segment starting at the end of the text segment + any alignment. + // The bss section is added to the end of the data segment + any alignment. + + ELFIO::segment *text_segment = nullptr; + uint64_t text_segment_start = 0; + if (text_section_ != nullptr) { + text_segment_start = base_address & ~4095ULL; + ELFIO::segment *text_segment = writer_.segments.add(); + text_segment->set_type(PT_LOAD); + text_segment->set_virtual_address(text_segment_start); + text_segment->set_physical_address(text_segment_start); + text_segment->set_flags(PF_X | PF_R); + text_segment->set_align(4096); + } + + ELFIO::segment *data_segment = nullptr; + uint64_t data_segment_start = 0; + uint64_t bss_segment_start = 0; + if ((data_section_ != nullptr) || (bss_section_ != nullptr)) { + data_segment_start = + (text_segment_start + section_address_map_[text_section_] + 4095) & + ~4095ULL; + + ELFIO::segment *data_segment = writer_.segments.add(); + data_segment->set_type(PT_LOAD); + data_segment->set_virtual_address(data_segment_start); + data_segment->set_physical_address(data_segment_start); + data_segment->set_flags(PF_W | PF_R); + data_segment->set_align(4096); + + uint64_t bss_align = bss_section_->get_addr_align() - 1; + bss_segment_start = + (data_segment_start + section_address_map_[data_section_] + bss_align) & + ~bss_align; + } + + // Now we can update the symbol table based on the new section sizes. + + // Different size symbol table entries for 32 and 64 bit ELF files. + if (elf_file_class_ == ELFCLASS64) { + UpdateSymbolsForExecutable<ELFIO::Elf64_Sym>( + text_segment_start, data_segment_start, bss_segment_start); + } else if (elf_file_class_ == ELFCLASS32) { + UpdateSymbolsForExecutable<ELFIO::Elf32_Sym>( + text_segment_start, data_segment_start, bss_segment_start); + } else { + return absl::InternalError( + absl::StrCat("Unsupported ELF file class: ", elf_file_class_)); + } + + // Update the section address map so that each section starts at the right + // address, i.e., it no longer tracks the offset within each section, but the + // absolute address. + section_address_map_[text_section_] = text_segment_start; + section_address_map_[data_section_] = data_segment_start; + section_address_map_[bss_section_] = bss_segment_start; + + // Pass in the relocation vector to the second pass of parsing, but ignore + // the values, since we are creating an executable file, and all the symbols + // are resolved. + std::vector<RelocationInfo> relo_vector; + auto status = ParsePassTwo(relo_vector); + if (!status.ok()) return status; + + // Add sections to the segments. First segment gets the text section. The + // second segment gets the data and bss sections. + if (text_segment != nullptr) { + text_segment->add_section_index(text_section_->get_index(), + text_section_->get_addr_align()); + } + if (data_segment != nullptr) { + data_segment->add_section_index(data_section_->get_index(), + data_section_->get_addr_align()); + data_segment->add_section_index(bss_section_->get_index(), + bss_section_->get_addr_align()); + } + + auto res = SimpleTextToInt<uint64_t>(entry_point, symbol_resolver_); + if (!res.ok()) return res.status(); + uint64_t entry_point_value = res.value(); + + symbol_accessor_->arrange_local_symbols(); + writer_.set_entry(entry_point_value); + return absl::OkStatus(); +} + +namespace { + +// Helper function to add a relocation entry to a relocation section. +template <typename RelocaType> +absl::Status AddRelocationEntries( + const std::vector<RelocationInfo> &relo_vector, + absl::flat_hash_map<std::string, ELFIO::Elf_Word> &symbol_indices, + ELFIO::section *reloca_section) { + for (auto const &relo : relo_vector) { + RelocaType rela; + rela.r_offset = relo.offset; + rela.r_addend = relo.addend; + auto iter = symbol_indices.find(relo.symbol); + if (iter == symbol_indices.end()) { + return absl::InvalidArgumentError( + absl::StrCat("Symbol '", relo.symbol, "' not found")); + } + if (sizeof(RelocaType) == sizeof(ELFIO::Elf64_Rela)) { + rela.r_info = ELF64_R_INFO(iter->second, relo.type); + } else { + rela.r_info = ELF32_R_INFO(iter->second, relo.type); + } + reloca_section->append_data(reinterpret_cast<const char *>(&rela), + sizeof(RelocaType)); + } + return absl::OkStatus(); +} + +} // namespace + +absl::Status SimpleAssembler::CreateRelocatable() { + // Reset the section address map to zero since we are creating a relocatable + // file. + section_address_map_[text_section_] = 0; + section_address_map_[data_section_] = 0; + section_address_map_[bss_section_] = 0; + + // Rearrange local symbols. + symbol_accessor_->arrange_local_symbols(nullptr); + // Since the symbols now are rearranged, recompute the symbol index map, and + // also set global symbols flag for those in the global_symbols_ set. + symbol_indices_.clear(); + // Different size symbol table entries for 32 and 64 bit ELF files. + if (elf_file_class_ == ELFCLASS64) { + UpdateSymbolsForRelocatable<ELFIO::Elf64_Sym>(); + } else if (elf_file_class_ == ELFCLASS32) { + UpdateSymbolsForRelocatable<ELFIO::Elf32_Sym>(); + } else { + return absl::InternalError( + absl::StrCat("Unsupported ELF file class: ", elf_file_class_)); + } + + // Parse the source again, collect relocations. + std::vector<RelocationInfo> relo_vector; + auto status = ParsePassTwo(relo_vector); + if (!status.ok()) return status; + + // Handle relocations if there are any. + if (!relo_vector.empty()) { + // First scan through the entries relocation vector and group them by + // the section in which the relocation is to be applied. + absl::flat_hash_map<uint16_t, std::vector<RelocationInfo>> relo_map; + for (auto const &relo : relo_vector) { + relo_map[relo.section_index].push_back(relo); + } + for (auto const &[section_index, relo_vec] : relo_map) { + if (section_index == 0) { + return absl::InternalError( + "Relocation entry with section index 0 not supported"); + } + if (!section_index_map_.contains(section_index)) { + return absl::InternalError( + absl::StrCat("Section index not found: ", section_index)); + } + // Now, create a relocation section for each key in the map. + std::string name = + absl::StrCat(".rela", section_index_map_[section_index]->get_name()); + auto *rela_section = writer_.sections.add(name); + // Process the relocation vector entries. + absl::Status status; + if (elf_file_class_ == ELFCLASS64) { + status = AddRelocationEntries<ELFIO::Elf64_Rela>( + relo_vec, symbol_indices_, rela_section); + } else if (elf_file_class_ == ELFCLASS32) { + status = AddRelocationEntries<ELFIO::Elf32_Rela>( + relo_vec, symbol_indices_, rela_section); + } else { + return absl::InternalError( + absl::StrCat("Unsupported ELF file class: ", elf_file_class_)); + } + if (!status.ok()) return status; + } + } + return absl::OkStatus(); +} + +absl::Status SimpleAssembler::ParsePassTwo( + std::vector<RelocationInfo> &relo_vector) { + // For the second pass, we need a symbol resolver that uses the symbol + // table and the symbol indices. + symbol_resolver_ = + new SymbolResolver(elf_file_class_, symtab_, symbol_indices_); + + // Now fill in the sections. Parse each of the lines saved in the first + // pass. + for (auto const &line : lines_) { + std::vector<uint8_t> byte_vector; + absl::Status status; + auto *section = current_section_; + if (line[0] == '.') { + auto status = ParseAsmDirective(line, symbol_resolver_, byte_vector); + } else { + auto relo_size = relo_vector.size(); + auto status = + ParseAsmStatement(line, symbol_resolver_, byte_vector, relo_vector); + // Update section information in the relocation vector. + for (int i = relo_size; i < relo_vector.size(); ++i) { + relo_vector[i].section_index = section->get_index(); + } + } + if (!status.ok()) return status; + if (byte_vector.empty()) continue; + // Add data to the section, but first make sure it's not bss. + if (section != bss_section_) { + section->append_data(reinterpret_cast<const char *>(byte_vector.data()), + byte_vector.size()); + } + } return absl::OkStatus(); } // Top level function that writes the ELF file out to disk. absl::Status SimpleAssembler::Write(std::ostream &os) { - if (entry_point_.empty()) return absl::NotFoundError("Entry point not set"); - auto res = SimpleTextToInt<uint64_t>(entry_point_, symbol_resolver_); - if (!res.ok()) return res.status(); - symbol_accessor_->arrange_local_symbols(); - writer_.set_entry(res.value()); writer_.save(os); return absl::OkStatus(); } -// Parse and process an assembly directive. The assembly directive is expected -// to be in the form of a line starting with a period followed by a directive -// name and an optional argument. The argument is a string of tokens separated -// by spaces. The argument is parsed using regular expressions. The byte values -// are appended to the given vector. +// Parse and process an assembly directive. The assembly directive is +// expected to be in the form of a line starting with a period followed by a +// directive name and an optional argument. The argument is a string of +// tokens separated by spaces. The argument is parsed using regular +// expressions. The byte values are appended to the given vector. absl::Status SimpleAssembler::ParseAsmDirective( absl::string_view directive, ResolverInterface *resolver, std::vector<uint8_t> &byte_values) { @@ -583,9 +773,6 @@ } else if (match == "data") { // .data SetDataSection(".data"); - } else if (match == "entry") { - // .entry <name>|<address> - entry_point_ = remainder; } else if (match == "global") { // .global <name> auto res = GetLabels(remainder); @@ -679,15 +866,17 @@ return absl::OkStatus(); } -// Parse and process an assembly statement. The assembly statement is expected -// to be a single line of text. The byte values are appended to the given -// vector. +// Parse and process an assembly statement. The assembly statement is +// expected to be a single line of text. The byte values are appended to the +// given vector. absl::Status SimpleAssembler::ParseAsmStatement( absl::string_view statement, ResolverInterface *resolver, - std::vector<uint8_t> &byte_values) { + std::vector<uint8_t> &byte_values, + std::vector<RelocationInfo> &relocations) { // Call the target specific assembler to encode the statement. auto status = opcode_assembler_if_->Encode( - section_address_map_[current_section_], statement, resolver, byte_values); + section_address_map_[current_section_], statement, resolver, byte_values, + relocations); if (!status.ok()) return status; section_address_map_[current_section_] += byte_values.size(); return absl::OkStatus(); @@ -706,6 +895,8 @@ section->set_addr_align(0x10); // Should probably add the section symbol to the symbol table. current_section_ = section; + text_section_ = section; + section_index_map_.insert({section->get_index(), text_section_}); } void SimpleAssembler::SetDataSection(const std::string &name) { @@ -721,6 +912,8 @@ section->set_addr_align(0x10); // Should probably add the section symbol to the symbol table. current_section_ = section; + data_section_ = section; + section_index_map_.insert({section->get_index(), data_section_}); } void SimpleAssembler::SetBssSection(const std::string &name) { @@ -732,8 +925,12 @@ } section = writer_.sections.add(name); section->set_type(SHT_NOBITS); - section->set_flags(SHF_ALLOC); + section->set_flags(SHF_ALLOC | SHF_WRITE); section->set_addr_align(0x10); + // Should probably add the section symbol to the symbol table. + current_section_ = section; + bss_section_ = section; + section_index_map_.insert({section->get_index(), bss_section_}); } absl::Status SimpleAssembler::AddSymbol(const std::string &name, @@ -741,17 +938,28 @@ ELFIO::Elf_Xword size, uint8_t type, uint8_t binding, uint8_t other, ELFIO::section *section) { - if (symbol_indices_.contains(name)) { + auto iter = symbol_indices_.find(name); + if (iter != symbol_indices_.end()) { return absl::AlreadyExistsError( absl::StrCat("Symbol '", name, "' already exists")); } - auto res = - symbol_accessor_->add_symbol(*string_accessor_, name.c_str(), value, size, - binding, type, other, section->get_index()); - symbol_indices_.insert({name, res}); + auto index = symbol_accessor_->add_symbol( + *string_accessor_, name.c_str(), value, size, binding, type, other, + section == nullptr ? SHN_UNDEF : section->get_index()); + symbol_indices_.insert({name, index}); + // If the symbol was marked undefined previously, remove it from the set. + if (undefined_symbols_.contains(name)) undefined_symbols_.erase(name); return absl::OkStatus(); } +void SimpleAssembler::SimpleAddSymbol(absl::string_view name) { + // If the symbol exists, then just return. + if (symbol_indices_.contains(name)) return; + if (undefined_symbols_.contains(name)) return; + std::string name_str(name); + undefined_symbols_.insert(name_str); +} + absl::Status SimpleAssembler::AppendData(const char *data, size_t size) { if (current_section_ == nullptr) { return absl::FailedPreconditionError("No current section");
diff --git a/mpact/sim/util/asm/simple_assembler.h b/mpact/sim/util/asm/simple_assembler.h index ce72af9..e40f5d1 100644 --- a/mpact/sim/util/asm/simple_assembler.h +++ b/mpact/sim/util/asm/simple_assembler.h
@@ -52,7 +52,6 @@ class SimpleAssembler { public: SimpleAssembler(int elf_file_class, int os_abi, int type, int machine, - uint64_t base_address, OpcodeAssemblerInterface *opcode_assembler_if); SimpleAssembler(const SimpleAssembler &) = delete; SimpleAssembler &operator=(const SimpleAssembler &) = delete; @@ -60,15 +59,32 @@ // Parse the input stream as assembly. absl::Status Parse(std::istream &is); - // Set the entry point. Either pass a symbol or an address. - absl::Status SetEntryPoint(const std::string &value); - absl::Status SetEntryPoint(uint64_t value); + // Create executable ELF file with the given value as the entry point. + // The text segment will be laid out starting at base address, followed by + // the data segment. + absl::Status CreateExecutable(uint64_t base_address, + const std::string &entry_point); + absl::Status CreateExecutable(uint64_t base_address, uint64_t entry_point); + // Helper function called during symbol accessor arrange_local_symbols() to + // swap the local and non-local symbols. + void SwapSymbols(ELFIO::Elf_Half non_local, ELFIO::Elf_Half local); + // Create a relocatable ELF file. + absl::Status CreateRelocatable(); // Write out the ELF file. absl::Status Write(std::ostream &os); ELFIO::elfio &writer() { return writer_; } private: + // Helper function to update the symbol table entries. + template <typename SymbolType> + void UpdateSymbolsForExecutable(uint64_t text_segment_start, + uint64_t data_segment_start, + uint64_t bss_segment_start); + template <typename SymbolType> + void UpdateSymbolsForRelocatable(); + // Perform second pass of parsing. + absl::Status ParsePassTwo(std::vector<RelocationInfo> &relo_vector); // Parse and process an assembly directive. absl::Status ParseAsmDirective(absl::string_view directive, ResolverInterface *resolver, @@ -76,11 +92,14 @@ // Parse and process and assembly statement. absl::Status ParseAsmStatement(absl::string_view statement, ResolverInterface *resolver, - std::vector<uint8_t> &byte_values); + std::vector<uint8_t> &byte_values, + std::vector<RelocationInfo> &relocations); // Add the symbol to the symbol table. absl::Status AddSymbol(const std::string &name, ELFIO::Elf64_Addr value, ELFIO::Elf_Xword size, uint8_t type, uint8_t binding, uint8_t other, ELFIO::section *section); + // Add a symbol reference to the symbol table if it is not already defined. + void SimpleAddSymbol(absl::string_view name); // Append the data to the current section. absl::Status AppendData(const char *data, size_t size); @@ -96,6 +115,8 @@ ELFIO::elfio writer_; // The current section being processed. ELFIO::section *current_section_ = nullptr; + // Map from section index to section pointer. + absl::flat_hash_map<uint16_t, ELFIO::section *> section_index_map_; // Interface used to parse and encode assembly statements. OpcodeAssemblerInterface *opcode_assembler_if_ = nullptr; // Interface used to access strings in the string table. @@ -109,14 +130,14 @@ // Map that tracks the current address of each section. absl::flat_hash_map<ELFIO::section *, uint64_t> section_address_map_; - // Base address of the ELF file that is to be written. - uint64_t base_address_ = 0; - // Program entry point. - std::string entry_point_; // Current symbol resolver (looks up symbols in the symbol table and returns // their values). ResolverInterface *symbol_resolver_ = nullptr; std::vector<std::string> lines_; + // Section pointers. + ELFIO::section *text_section_ = nullptr; + ELFIO::section *data_section_ = nullptr; + ELFIO::section *bss_section_ = nullptr; // Regular expressions used to parse the assembly source. RE2 comment_re_; RE2 asm_line_re_; @@ -124,7 +145,9 @@ // Set of symbol names declared as global. absl::flat_hash_set<std::string> global_symbols_; // Map from symbol name to symbol index in the symbol table. - absl::flat_hash_map<std::string, ELFIO::Elf_Xword> symbol_indices_; + absl::flat_hash_map<std::string, ELFIO::Elf_Word> symbol_indices_; + // Set of undefined symbols. + absl::flat_hash_set<std::string> undefined_symbols_; }; } // namespace assembler
diff --git a/mpact/sim/util/asm/test/BUILD b/mpact/sim/util/asm/test/BUILD index 38287ba..355a061 100644 --- a/mpact/sim/util/asm/test/BUILD +++ b/mpact/sim/util/asm/test/BUILD
@@ -60,6 +60,7 @@ testonly = True, srcs = [ "riscv64x_bin_encoder_interface.cc", + "riscv_bin_setters.cc", ], hdrs = [ "riscv64x_bin_encoder_interface.h", @@ -71,6 +72,7 @@ ":riscv64x_isa", "//mpact/sim/generic:type_helpers", "//mpact/sim/util/asm", + "@com_google_absl//absl/base:no_destructor", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/functional:any_invocable", "@com_google_absl//absl/log", @@ -78,11 +80,13 @@ "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", + "@com_googlesource_code_re2//:re2", ], ) cc_test( name = "riscv64x_asm_test", + size = "small", srcs = ["riscv64x_asm_test.cc"], deps = [ ":riscv64x_bin_fmt", @@ -97,6 +101,7 @@ "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:span", "@com_google_googletest//:gtest_main", ], )
diff --git a/mpact/sim/util/asm/test/riscv64x.isa b/mpact/sim/util/asm/test/riscv64x.isa index 2ec7460..b93fc56 100644 --- a/mpact/sim/util/asm/test/riscv64x.isa +++ b/mpact/sim/util/asm/test/riscv64x.isa
@@ -36,15 +36,15 @@ disasm: "Illegal instruction at %(@:08x)", semfunc: "&RiscVIllegalInstruction"; opcodes { - addi{: rs1, I_imm12 : rd}, + addi{: rs1, %reloc(I_imm12) : rd}, disasm: "addi", "%rd, %rs1, %I_imm12"; - lui{: U_imm20 : rd}, + lui{: %reloc(U_imm20) : rd}, disasm: "lui", "%rd, %(U_imm20:08x)"; - sd{: rs1, S_imm12, rs2 : }, + sd{: rs1, %reloc(S_imm12), rs2 : }, disasm: "sd", "%rs2, %S_imm12(%rs1)"; - jal{: J_imm20 : next_pc, rd}, + jal{: %reloc(J_imm20) : next_pc, rd}, disasm: "jal", "%rd, %(@+J_imm20:08x)"; - j{: J_imm20 : next_pc, rd}, + j{: %reloc(J_imm20) : next_pc, rd}, disasm: "j", "%(@+J_imm20:08x)"; slli{: rs1, I_uimm6 : rd}, disasm: "slli", "%rd, %rs1, %(I_uimm6:x)"; @@ -52,7 +52,7 @@ disasm: "ebreak"; srai{: rs1, I_uimm6 : rd}, disasm: "srai", "%rd, %rs1, %(I_uimm6:x)"; - jr{: rs1, J_imm12 : next_pc, rd}, + jr{: rs1, %reloc(J_imm12) : next_pc, rd}, disasm: "jr", "%rs1, %(J_imm12:08x)"; } }
diff --git a/mpact/sim/util/asm/test/riscv64x_asm_test.cc b/mpact/sim/util/asm/test/riscv64x_asm_test.cc index 00c2558..ceeb124 100644 --- a/mpact/sim/util/asm/test/riscv64x_asm_test.cc +++ b/mpact/sim/util/asm/test/riscv64x_asm_test.cc
@@ -1,3 +1,17 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #include <cstdint> #include <sstream> #include <string> @@ -7,6 +21,7 @@ #include "absl/log/check.h" #include "absl/status/status.h" #include "absl/strings/string_view.h" +#include "absl/types/span.h" #include "elfio/elf_types.hpp" #include "elfio/elfio.hpp" #include "elfio/elfio_symbols.hpp" @@ -26,6 +41,7 @@ using ::mpact::sim::riscv::isa64::RiscV64XBinEncoderInterface; using ::mpact::sim::riscv::isa64::Riscv64xSlotMatcher; using ::mpact::sim::util::assembler::OpcodeAssemblerInterface; +using ::mpact::sim::util::assembler::RelocationInfo; using ::mpact::sim::util::assembler::ResolverInterface; using ::mpact::sim::util::assembler::SimpleAssembler; @@ -35,10 +51,10 @@ RiscV64XAssembler(Riscv64xSlotMatcher* matcher) : matcher_(matcher) {}; ~RiscV64XAssembler() override = default; absl::Status Encode(uint64_t address, absl::string_view text, - ResolverInterface* resolver, - std::vector<uint8_t>& bytes) override { + ResolverInterface* resolver, std::vector<uint8_t>& bytes, + std::vector<RelocationInfo>& relocations) override { // Call the slot matcher to get the encoded value. - auto res = matcher_->Encode(address, text, 0, resolver); + auto res = matcher_->Encode(address, text, 0, resolver, relocations); if (!res.status().ok()) return res.status(); // Convert the value to a byte array. auto [value, size] = res.value(); @@ -62,15 +78,14 @@ ; text section .text .global main - .entry main main: addi a0, zero, 5 - lui a1, semihost_param - addi a1, a1, semihost_param + lui a1, %hi(semihost_param) + addi a1, a1, %lo(semihost_param) addi t0, zero, 2 sd t0, 0(a1) - lui t2, hello - addi t2, t2, hello + lui t2, %hi(hello) + addi t2, t2, %lo(hello) sd t2, 8(a1) addi t0, zero, 12 sd t0, 0x10(a1) @@ -116,7 +131,7 @@ CHECK_OK(matcher_.Initialize()); // Create the assembler. assembler_ = new SimpleAssembler(ELFCLASS64, ELFOSABI_LINUX, ET_EXEC, - EM_RISCV, 0x1000, &riscv_64x_assembler_); + EM_RISCV, &riscv_64x_assembler_); std::istringstream source(*kTestAssembly); // Parse the assembly code. auto status = assembler_->Parse(source); @@ -127,6 +142,7 @@ // Access the ELF writer. ELFIO::elfio& elf() { return assembler_->writer(); } + SimpleAssembler* assembler() const { return assembler_; } private: RiscV64XBinEncoderInterface bin_encoder_interface_; @@ -150,6 +166,8 @@ // Verify that the information about the text section is as expected. TEST_F(RiscV64XAssemblerTest, Text) { + auto status = assembler()->CreateExecutable(0x1000, "main"); + CHECK_OK(status) << status.message(); auto* text = elf().sections[".text"]; EXPECT_EQ(text->get_type(), SHT_PROGBITS); EXPECT_EQ(text->get_flags(), SHF_ALLOC | SHF_EXECINSTR); @@ -158,6 +176,8 @@ } TEST_F(RiscV64XAssemblerTest, Data) { + auto status = assembler()->CreateExecutable(0x1000, "main"); + CHECK_OK(status) << status.message(); auto* data = elf().sections[".data"]; EXPECT_EQ(data->get_type(), SHT_PROGBITS); EXPECT_EQ(data->get_flags(), SHF_ALLOC | SHF_WRITE); @@ -168,6 +188,8 @@ } TEST_F(RiscV64XAssemblerTest, Bss) { + auto status = assembler()->CreateExecutable(0x1000, "main"); + CHECK_OK(status) << status.message(); auto* bss = elf().sections[".bss"]; EXPECT_EQ(bss->get_type(), SHT_NOBITS); EXPECT_EQ(bss->get_flags(), SHF_ALLOC | SHF_WRITE); @@ -176,7 +198,48 @@ EXPECT_EQ(bss->get_size(), 32); } -TEST_F(RiscV64XAssemblerTest, Symbols) { +TEST_F(RiscV64XAssemblerTest, RelocatableSymbols) { + auto status = assembler()->CreateRelocatable(); + CHECK_OK(status) << status.message(); + auto* symtab = elf().sections[".symtab"]; + ELFIO::symbol_section_accessor symbols(elf(), symtab); + ELFIO::Elf64_Addr value; + ELFIO::Elf_Xword size; + unsigned char bind; + unsigned char type; + ELFIO::Elf_Half section_index; + unsigned char other; + // Verify that main is valued 0x0, global and located in the text section. + symbols.get_symbol("main", value, size, bind, type, section_index, other); + EXPECT_EQ(value, 0x0); + EXPECT_EQ(section_index, elf().sections[".text"]->get_index()); + EXPECT_EQ(type, STT_NOTYPE); + // Verify that exit is valued 16 * 4, local and located in the text + // section. + symbols.get_symbol("exit", value, size, bind, type, section_index, other); + EXPECT_EQ(value, 16 * 4); + EXPECT_EQ(bind, STB_LOCAL); + EXPECT_EQ(section_index, elf().sections[".text"]->get_index()); + EXPECT_EQ(type, STT_NOTYPE); + // Verify that hello is global and located in the data section at 0x2000. + symbols.get_symbol("hello", value, size, bind, type, section_index, other); + EXPECT_EQ(value, 0); + EXPECT_EQ(section_index, elf().sections[".data"]->get_index()); + EXPECT_EQ(bind, STB_GLOBAL); + EXPECT_EQ(type, STT_NOTYPE); + // Verify that semihost_param is global and located in the bss section at + // 16 bytes. + symbols.get_symbol("semihost_param", value, size, bind, type, section_index, + other); + EXPECT_EQ(value, 16); + EXPECT_EQ(section_index, elf().sections[".bss"]->get_index()); + EXPECT_EQ(bind, STB_LOCAL); + EXPECT_EQ(type, STT_NOTYPE); +} + +TEST_F(RiscV64XAssemblerTest, ExecutableSymbols) { + auto status = assembler()->CreateExecutable(0x1000, "main"); + CHECK_OK(status) << status.message(); auto* symtab = elf().sections[".symtab"]; ELFIO::symbol_section_accessor symbols(elf(), symtab); ELFIO::Elf64_Addr value; @@ -214,7 +277,9 @@ } // Verify that the first 16 instructions were assembled correctly. -TEST_F(RiscV64XAssemblerTest, TextContent) { +TEST_F(RiscV64XAssemblerTest, ExecutableTextContent) { + auto status = assembler()->CreateExecutable(0x1000, "main"); + CHECK_OK(status) << status.message(); auto* text = elf().sections[".text"]; auto* data = text->get_data(); auto* word_data = reinterpret_cast<const uint32_t*>(data); @@ -237,4 +302,44 @@ EXPECT_EQ(word_data[15], 0x008000ef); // jal ra, semihost } +// Verify that the first 16 instructions were assembled correctly. +TEST_F(RiscV64XAssemblerTest, RelocatableTextContent) { + auto status = assembler()->CreateRelocatable(); + CHECK_OK(status) << status.message(); + auto* text = elf().sections[".text"]; + auto* data = text->get_data(); + auto* word_data = reinterpret_cast<const uint32_t*>(data); + // Verify the first 16 instructions. These will be slightly different from + // the executable version since the symbol values are not relocated to their + // final memory values. + EXPECT_EQ(word_data[0], 0x00500513); // addi a0, zero, 5 + EXPECT_EQ(word_data[1], 0x000005b7); // lui a1, semihost_param + EXPECT_EQ(word_data[2], 0x01058593); // addi a1, a1, semihost_param + EXPECT_EQ(word_data[3], 0x00200293); // addi t0, zero, 2 + EXPECT_EQ(word_data[4], 0x0055b023); // sd t0, 0(a1) + EXPECT_EQ(word_data[5], 0x000003b7); // lui t2, hello + EXPECT_EQ(word_data[6], 0x00038393); // addi t2, t2, hello + EXPECT_EQ(word_data[7], 0x0075b423); // sd t2, 8(a1) + EXPECT_EQ(word_data[8], 0x00c00293); // addi t0, zero, 12 + EXPECT_EQ(word_data[9], 0x0055b823); // sd t0, 0x10(a1) + EXPECT_EQ(word_data[10], 0x01c000ef); // jal ra, semihost + EXPECT_EQ(word_data[11], 0x01800513); // addi a0, zero, 24 + EXPECT_EQ(word_data[12], 0x000202b7); // lui t0, 0x20026 + EXPECT_EQ(word_data[13], 0x02628293); // addi t0, t0, 0x20026 + EXPECT_EQ(word_data[14], 0x0055b023); // sd t0, 0(a1) + EXPECT_EQ(word_data[15], 0x008000ef); // jal ra, semihost +} + +TEST_F(RiscV64XAssemblerTest, TextRelocations) { + auto status = assembler()->CreateRelocatable(); + CHECK_OK(status) << status.message(); + auto* rela_section = elf().sections[".rela.text"]; + EXPECT_NE(rela_section, nullptr); + auto* rela_data = rela_section->get_data(); + auto rela = + absl::MakeSpan(reinterpret_cast<const ELFIO::Elf64_Rela*>(rela_data), + rela_section->get_size() / sizeof(ELFIO::Elf64_Rela)); + EXPECT_EQ(rela.size(), 4); +} + } // namespace
diff --git a/mpact/sim/util/asm/test/riscv64x_bin_encoder_interface.cc b/mpact/sim/util/asm/test/riscv64x_bin_encoder_interface.cc index eab7024..9f45dbf 100644 --- a/mpact/sim/util/asm/test/riscv64x_bin_encoder_interface.cc +++ b/mpact/sim/util/asm/test/riscv64x_bin_encoder_interface.cc
@@ -16,6 +16,7 @@ #include <cstdint> #include <tuple> +#include <vector> #include "absl/status/status.h" #include "absl/status/statusor.h" @@ -24,6 +25,7 @@ #include "mpact/sim/generic/type_helpers.h" #include "mpact/sim/util/asm/resolver_interface.h" #include "mpact/sim/util/asm/test/riscv64x_bin_encoder.h" +#include "mpact/sim/util/asm/test/riscv64x_encoder.h" #include "mpact/sim/util/asm/test/riscv64x_enums.h" #include "mpact/sim/util/asm/test/riscv_bin_setters.h" @@ -40,6 +42,8 @@ source_op_map_); AddRiscvDestOpBinSetters<DestOpEnum, OpMap, encoding64::Encoder>( dest_op_map_); + AddRiscvSourceOpRelocationSetters<OpcodeEnum, SourceOpEnum, RelocationMap>( + relocation_source_op_map_); } absl::StatusOr<std::tuple<uint64_t, int>> @@ -61,6 +65,15 @@ return iter->second(address, text, resolver); } +absl::Status RiscV64XBinEncoderInterface::AppendSrcOpRelocation( + uint64_t address, absl::string_view text, SlotEnum slot, int entry, + OpcodeEnum opcode, SourceOpEnum source_op, int source_num, + ResolverInterface *resolver, std::vector<RelocationInfo> &relocations) { + auto iter = relocation_source_op_map_.find(std::tie(opcode, source_op)); + if (iter == relocation_source_op_map_.end()) return absl::OkStatus(); + return iter->second(address, text, resolver, relocations); +} + absl::StatusOr<uint64_t> RiscV64XBinEncoderInterface::GetDestOpEncoding( uint64_t address, absl::string_view text, SlotEnum slot, int entry, OpcodeEnum opcode, DestOpEnum dest_op, int dest_num, @@ -73,6 +86,14 @@ return iter->second(address, text, resolver); } +absl::Status RiscV64XBinEncoderInterface::AppendDestOpRelocation( + uint64_t address, absl::string_view text, SlotEnum slot, int entry, + OpcodeEnum opcode, DestOpEnum dest_op, int dest_num, + ResolverInterface *resolver, std::vector<RelocationInfo> &relocations) { + // There are no destination operands that require relocation. + return absl::OkStatus(); +} + absl::StatusOr<uint64_t> RiscV64XBinEncoderInterface::GetListDestOpEncoding( uint64_t address, absl::string_view text, SlotEnum slot, int entry, OpcodeEnum opcode, ListDestOpEnum dest_op, int dest_num,
diff --git a/mpact/sim/util/asm/test/riscv64x_bin_encoder_interface.h b/mpact/sim/util/asm/test/riscv64x_bin_encoder_interface.h index e837a64..4c1d341 100644 --- a/mpact/sim/util/asm/test/riscv64x_bin_encoder_interface.h +++ b/mpact/sim/util/asm/test/riscv64x_bin_encoder_interface.h
@@ -18,8 +18,10 @@ #include <cstdint> #include <functional> #include <tuple> +#include <vector> #include "absl/container/flat_hash_map.h" +#include "absl/status/status.h" #include "absl/status/statusor.h" #include "absl/strings/string_view.h" #include "mpact/sim/util/asm/resolver_interface.h" @@ -48,10 +50,20 @@ uint64_t address, absl::string_view text, SlotEnum slot, int entry, OpcodeEnum opcode, SourceOpEnum source_op, int source_num, ResolverInterface *resolver) override; + absl::Status AppendSrcOpRelocation( + uint64_t address, absl::string_view text, SlotEnum slot, int entry, + OpcodeEnum opcode, SourceOpEnum source_op, int source_num, + ResolverInterface *resolver, + std::vector<RelocationInfo> &relocations) override; absl::StatusOr<uint64_t> GetDestOpEncoding( uint64_t address, absl::string_view text, SlotEnum slot, int entry, OpcodeEnum opcode, DestOpEnum dest_op, int dest_num, ResolverInterface *resolver) override; + absl::Status AppendDestOpRelocation( + uint64_t address, absl::string_view text, SlotEnum slot, int entry, + OpcodeEnum opcode, DestOpEnum dest_op, int dest_num, + ResolverInterface *resolver, + std::vector<RelocationInfo> &relocations) override; absl::StatusOr<uint64_t> GetListDestOpEncoding( uint64_t address, absl::string_view text, SlotEnum slot, int entry, OpcodeEnum opcode, ListDestOpEnum dest_op, int dest_num, @@ -69,8 +81,14 @@ using OpMap = absl::flat_hash_map< int, std::function<absl::StatusOr<uint64_t>(uint64_t, absl::string_view, ResolverInterface *)>>; + using RelocationMap = + absl::flat_hash_map<std::tuple<OpcodeEnum, SourceOpEnum>, + std::function<absl::Status( + uint64_t, absl::string_view, ResolverInterface *, + std::vector<RelocationInfo> &)>>; OpMap source_op_map_; + RelocationMap relocation_source_op_map_; OpMap dest_op_map_; OpMap list_dest_op_map_; OpMap list_source_op_map_;
diff --git a/mpact/sim/util/asm/test/riscv_bin_setters.cc b/mpact/sim/util/asm/test/riscv_bin_setters.cc new file mode 100644 index 0000000..165b407 --- /dev/null +++ b/mpact/sim/util/asm/test/riscv_bin_setters.cc
@@ -0,0 +1,123 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mpact/sim/util/asm/test/riscv_bin_setters.h" + +#include <cstdint> +#include <string> +#include <vector> + +#include "absl/base/no_destructor.h" +#include "absl/status/status.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" +#include "mpact/sim/generic/type_helpers.h" +#include "re2/re2.h" + +namespace mpact { +namespace sim { +namespace riscv { + +namespace internal { + +enum class RelocType { + kNone = 0, + kBranch = 16, + kJal = 17, + kPcrelHi20 = 23, + kPcrelLo12I = 24, + kPcrelLo12S = 25, + kHi20 = 26, + kLo12I = 27, + kLo12S = 28, +}; + +using ::mpact::sim::generic::operator*; // NOLINT(misc-unused-using-decls) + +absl::NoDestructor<RE2> kSymRe("^\\s*(%[a-zA-Z0-9_]+)\\s*\\(?([^)]+)\\)?\\s*$"); + +absl::Status RelocateAddiIImm12(uint64_t address, absl::string_view text, + ResolverInterface *resolver, + std::vector<RelocationInfo> &relocations) { + std::string relo; + std::string sym; + if (!RE2::FullMatch(text, *kSymRe, &relo, &sym)) return absl::OkStatus(); + if (relo == "%lo") { + relocations.emplace_back(0, sym, *RelocType::kLo12I, 0, 0); + return absl::OkStatus(); + } + if (relo == "%pcrel_lo") { + relocations.emplace_back(0, sym, *RelocType::kPcrelLo12I, 0, 0); + return absl::OkStatus(); + } + if (!relo.empty()) { + return absl::InvalidArgumentError( + absl::StrCat("Invalid relocation: '", relo, "'")); + } + return absl::OkStatus(); +} + +absl::Status RelocateJJImm20(uint64_t address, absl::string_view text, + ResolverInterface *resolver, + std::vector<RelocationInfo> &relocations) { + std::string relo; + std::string sym; + if (!RE2::FullMatch(text, *kSymRe, &relo, &sym)) return absl::OkStatus(); + + relocations.emplace_back(0, sym, *RelocType::kJal, 0, 0); + return absl::OkStatus(); +} + +absl::Status RelocateJrJImm12(uint64_t address, absl::string_view text, + ResolverInterface *resolver, + std::vector<RelocationInfo> &relocations) { + return absl::OkStatus(); +} + +absl::Status RelocateLuiUImm20(uint64_t address, absl::string_view text, + ResolverInterface *resolver, + std::vector<RelocationInfo> &relocations) { + std::string relo; + std::string sym; + if (!RE2::FullMatch(text, *kSymRe, &relo, &sym)) return absl::OkStatus(); + relocations.emplace_back(0, sym, *RelocType::kHi20, 0, 0); + return absl::OkStatus(); +} + +absl::Status RelocateSdSImm12(uint64_t address, absl::string_view text, + ResolverInterface *resolver, + std::vector<RelocationInfo> &relocations) { + std::string relo; + std::string sym; + if (!RE2::FullMatch(text, *kSymRe, &relo, &sym)) return absl::OkStatus(); + if (relo == "%lo") { + relocations.emplace_back(0, sym, *RelocType::kLo12S, 0, 0); + return absl::OkStatus(); + } + if (relo == "%pcrel_lo") { + relocations.emplace_back(0, sym, *RelocType::kPcrelLo12S, 0, 0); + return absl::OkStatus(); + } + if (!relo.empty()) { + return absl::InvalidArgumentError( + absl::StrCat("Invalid relocation: '", relo, "'")); + } + return absl::OkStatus(); +} + +} // namespace internal + +} // namespace riscv +} // namespace sim +} // namespace mpact
diff --git a/mpact/sim/util/asm/test/riscv_bin_setters.h b/mpact/sim/util/asm/test/riscv_bin_setters.h index e34d36f..d461381 100644 --- a/mpact/sim/util/asm/test/riscv_bin_setters.h +++ b/mpact/sim/util/asm/test/riscv_bin_setters.h
@@ -17,7 +17,9 @@ #include <cstdint> #include <initializer_list> +#include <string> #include <utility> +#include <vector> #include "absl/container/flat_hash_map.h" #include "absl/status/status.h" @@ -25,13 +27,16 @@ #include "absl/strings/numbers.h" #include "absl/strings/str_cat.h" #include "absl/strings/string_view.h" +#include "mpact/sim/util/asm/opcode_assembler_interface.h" #include "mpact/sim/util/asm/resolver_interface.h" #include "mpact/sim/util/asm/test/riscv_getter_helpers.h" +#include "re2/re2.h" namespace mpact { namespace sim { namespace riscv { +using ::mpact::sim::util::assembler::RelocationInfo; using ::mpact::sim::util::assembler::ResolverInterface; constexpr std::initializer_list<const std::pair<absl::string_view, uint64_t>> @@ -51,21 +56,41 @@ {"t3", 28}, {"t4", 29}, {"t5", 30}, {"t6", 31}}; template <typename T> -absl::StatusOr<T> SimpleTextToInt(absl::string_view text, +absl::StatusOr<T> SimpleTextToInt(absl::string_view op_text, ResolverInterface *resolver) { T value; - if (text.substr(0, 2) == "0x") { - if (absl::SimpleHexAtoi(text.substr(2), &value)) return value; + static RE2 hex_re("^\\s*0x([0-9a-fA-F]+)\\s*$"); + static RE2 dec_re("^\\s*(-?[0-9]+)\\s*$"); + static RE2 relo_re("^\\s*\\%[a-zA-Z0-9_]+\\s*\\(([a-zA-Z0-9_]+)\\s*\\)\\s*$"); + static RE2 symbol_re("^\\s*([a-zA-Z0-9_]+)\\s*$"); + std::string str; + std::string text(op_text); + // First see if the operand is a relocation function, and extract the text + // argument. A relocation function is on the form of %name(arg). + if (RE2::FullMatch(op_text, relo_re, &str)) { + text = str; + } + // Extract the hex immediate. + if (RE2::FullMatch(text, hex_re, &str)) { + if (absl::SimpleHexAtoi(str, &value)) return value; return absl::InvalidArgumentError( absl::StrCat("Invalid hexadecimal immediate: ", text)); } - if (absl::SimpleAtoi(text, &value)) return value; - if (resolver != nullptr) { - auto res = resolver->Resolve(text); - if (!res.ok()) { - return res.status(); + // Extract the decimal immediate. + if (RE2::FullMatch(text, dec_re, &str)) { + if (absl::SimpleAtoi(str, &value)) return value; + return absl::InvalidArgumentError( + absl::StrCat("Invalid decimal immediate: ", text)); + } + // Extract the symbol. + if (RE2::FullMatch(text, symbol_re, &str)) { + if (resolver != nullptr) { + auto res = resolver->Resolve(str); + if (!res.ok()) { + return res.status(); + } + return static_cast<T>(res.value()); } - return static_cast<T>(res.value()); } return absl::InvalidArgumentError(absl::StrCat("Invalid argument: ", text)); } @@ -157,6 +182,41 @@ }); } +namespace internal { + +absl::Status RelocateAddiIImm12(uint64_t address, absl::string_view text, + ResolverInterface *resolver, + std::vector<RelocationInfo> &relocations); +absl::Status RelocateJJImm20(uint64_t address, absl::string_view text, + ResolverInterface *resolver, + std::vector<RelocationInfo> &relocations); +absl::Status RelocateJrJImm12(uint64_t address, absl::string_view text, + ResolverInterface *resolver, + std::vector<RelocationInfo> &relocations); +absl::Status RelocateLuiUImm20(uint64_t address, absl::string_view text, + ResolverInterface *resolver, + std::vector<RelocationInfo> &relocations); +absl::Status RelocateSdSImm12(uint64_t address, absl::string_view text, + ResolverInterface *resolver, + std::vector<RelocationInfo> &relocations); + +} // namespace internal + +template <typename OpcodeEnum, typename SourceOpEnum, typename Map> +void AddRiscvSourceOpRelocationSetters(Map &map) { + Insert(map, OpcodeEnum::kAddi, SourceOpEnum::kIImm12, + internal::RelocateAddiIImm12); + Insert(map, OpcodeEnum::kJal, SourceOpEnum::kJImm20, + internal::RelocateJJImm20); + Insert(map, OpcodeEnum::kJ, SourceOpEnum::kJImm20, internal::RelocateJJImm20); + Insert(map, OpcodeEnum::kJr, SourceOpEnum::kJImm12, + internal::RelocateJrJImm12); + Insert(map, OpcodeEnum::kLui, SourceOpEnum::kUImm20, + internal::RelocateLuiUImm20); + Insert(map, OpcodeEnum::kSd, SourceOpEnum::kSImm12, + internal::RelocateSdSImm12); +} + } // namespace riscv } // namespace sim } // namespace mpact
diff --git a/mpact/sim/util/asm/test/riscv_getter_helpers.h b/mpact/sim/util/asm/test/riscv_getter_helpers.h index 160bbe4..5c37128 100644 --- a/mpact/sim/util/asm/test/riscv_getter_helpers.h +++ b/mpact/sim/util/asm/test/riscv_getter_helpers.h
@@ -15,12 +15,6 @@ #ifndef MPACT_SIM_UTIL_ASM_TEST_RISCV_GETTER_HELPERS_H_ #define MPACT_SIM_UTIL_ASM_TEST_RISCV_GETTER_HELPERS_H_ -#include <string> -#include <vector> - -#include "absl/container/flat_hash_map.h" -#include "absl/functional/any_invocable.h" -#include "absl/log/log.h" #include "absl/strings/string_view.h" // This file contains helper functions that are used to create commonly used @@ -41,6 +35,16 @@ } } +template <typename M, typename E1, typename E2, typename G> +inline void Insert(M &map, E1 entry1, E2 entry2, G getter) { + auto key = std::tie(entry1, entry2); + if (!map.contains(key)) { + map.insert(std::make_pair(key, getter)); + } else { + map.at(key) = getter; + } +} + constexpr absl::string_view kXregNames[32] = { "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19", "x20", "x21",