Fixed some issues in the ELF file when generating relocatable output. PiperOrigin-RevId: 715520295 Change-Id: Ib66f8f8188d39dfa57b9d9700d5d67e2449e029c
diff --git a/mpact/sim/util/asm/BUILD b/mpact/sim/util/asm/BUILD index e02ba66..5df423f 100644 --- a/mpact/sim/util/asm/BUILD +++ b/mpact/sim/util/asm/BUILD
@@ -45,9 +45,11 @@ "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/functional:any_invocable", "@com_google_absl//absl/functional:bind_front", + "@com_google_absl//absl/log", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:span", "@com_googlesource_code_re2//:re2", ], )
diff --git a/mpact/sim/util/asm/simple_assembler.cc b/mpact/sim/util/asm/simple_assembler.cc index 76584f8..98c60c0 100644 --- a/mpact/sim/util/asm/simple_assembler.cc +++ b/mpact/sim/util/asm/simple_assembler.cc
@@ -27,10 +27,12 @@ #include "absl/container/flat_hash_set.h" #include "absl/functional/any_invocable.h" #include "absl/functional/bind_front.h" +#include "absl/log/log.h" #include "absl/status/status.h" #include "absl/status/statusor.h" #include "absl/strings/str_cat.h" #include "absl/strings/string_view.h" +#include "absl/types/span.h" #include "elfio/elf_types.hpp" #include "elfio/elfio_section.hpp" #include "elfio/elfio_segment.hpp" @@ -84,7 +86,7 @@ auto iter = symbol_indices_.find(text); if (iter == symbol_indices_.end()) { return absl::InvalidArgumentError( - absl::StrCat("Symbol '", text, "' not found")); + absl::StrCat("SymbolResolver: Symbol '", text, "' not found")); } auto index = iter->second; if (elf_file_class_ == ELFCLASS64) { @@ -308,12 +310,12 @@ } // namespace -SimpleAssembler::SimpleAssembler(int elf_file_class, int os_abi, int type, - int machine, +SimpleAssembler::SimpleAssembler(absl::string_view comment, int elf_file_class, + int os_abi, int machine, OpcodeAssemblerInterface *opcode_assembler_if) : elf_file_class_(elf_file_class), opcode_assembler_if_(opcode_assembler_if), - comment_re_("^\\s*(?:;(.*))?$"), + comment_re_(absl::StrCat("^\\s*(?:", comment, "(.*))?$")), asm_line_re_("^(?:(?:(\\S+)\\s*:)?|\\s)\\s*([^;]*?)?\\s*(?:;(.*))?$"), directive_re_( "^\\.(align|bss|bytes|char|cstring|data|global|long|sect" @@ -324,12 +326,12 @@ // Configure the ELF file writer. writer_.create(elf_file_class_, ELFDATA2LSB); writer_.set_os_abi(os_abi); - writer_.set_type(ET_EXEC); writer_.set_machine(machine); // Create the symbol table section. symtab_ = writer_.sections.add(".symtab"); section_index_map_.insert({symtab_->get_index(), symtab_}); symtab_->set_type(SHT_SYMTAB); + symtab_->set_addr_align(0x8); symtab_->set_entry_size(elf_file_class_ == ELFCLASS64 ? sizeof(ELFIO::Elf64_Sym) : sizeof(ELFIO::Elf32_Sym)); @@ -337,12 +339,17 @@ strtab_ = writer_.sections.add(".strtab"); section_index_map_.insert({strtab_->get_index(), strtab_}); strtab_->set_type(SHT_STRTAB); + strtab_->set_addr_align(0x1); // Link the symbol table to the string table. symtab_->set_link(strtab_->get_index()); // Create the symbol and string table accessors. symbol_accessor_ = new ELFIO::symbol_section_accessor(writer_, symtab_); string_accessor_ = new ELFIO::string_section_accessor(writer_.sections[".strtab"]); + // Create .text, .data. and .bss sections. + SetTextSection(".text"); + SetDataSection(".data"); + SetBssSection(".bss"); } SimpleAssembler::~SimpleAssembler() { @@ -407,10 +414,11 @@ for (auto const &symbol : undefined_symbols_) { auto status = AddSymbol(symbol, 0, 0, STT_NOTYPE, 0, 0, nullptr); if (!status.ok()) { - return absl::InternalError( - absl::StrCat("Failed to add undefined symbol: ", symbol)); + return absl::InternalError(absl::StrCat( + "Failed to add undefined symbol '", symbol, "': ", status.message())); } } + undefined_symbols_.clear(); if (bss_section_ != nullptr) { bss_section_->set_size(section_address_map_[bss_section_]); @@ -462,7 +470,6 @@ for (int i = 0; i < num_symbols; ++i) { auto &sym = symbols[i]; std::string name = string_accessor_->get_string(sym.st_name); - symbol_indices_.insert({name, i}); if (global_symbols_.contains(name)) { sym.st_info = ELF_ST_INFO(STB_GLOBAL, ELF_ST_TYPE(sym.st_info)); } @@ -483,6 +490,7 @@ } return absl::InvalidArgumentError(message); } + writer_.set_type(ET_EXEC); // Section sizes are now known. So let's compute the layout and update all // the symbol values/addresses before the next pass. // The layout is: @@ -603,18 +611,30 @@ } // namespace +template <typename SymbolType> +void SimpleAssembler::UpdateSymtabHeaderInfo() { + int last_local = 0; + auto syms = + absl::MakeSpan(reinterpret_cast<const SymbolType *>(symtab_->get_data()), + symtab_->get_size() / sizeof(SymbolType)); + for (int i = 0; i < syms.size(); ++i) { + auto name = string_accessor_->get_string(syms[i].st_name); + symbol_indices_.insert({name, i}); + if (ELF_ST_BIND(syms[i].st_info) == STB_LOCAL) last_local = i; + } + symtab_->set_info(last_local + 1); +} + absl::Status SimpleAssembler::CreateRelocatable() { + writer_.set_type(ET_REL); // Reset the section address map to zero since we are creating a relocatable // file. section_address_map_[text_section_] = 0; section_address_map_[data_section_] = 0; section_address_map_[bss_section_] = 0; - // Rearrange local symbols. - symbol_accessor_->arrange_local_symbols(nullptr); - // Since the symbols now are rearranged, recompute the symbol index map, and - // also set global symbols flag for those in the global_symbols_ set. - symbol_indices_.clear(); + // Since the symbols now are rearranged, we need to set global symbols flag + // for those in the global_symbols_ set. // Different size symbol table entries for 32 and 64 bit ELF files. if (elf_file_class_ == ELFCLASS64) { UpdateSymbolsForRelocatable<ELFIO::Elf64_Sym>(); @@ -624,6 +644,17 @@ return absl::InternalError( absl::StrCat("Unsupported ELF file class: ", elf_file_class_)); } + // Rearrange local symbols in the symbol table so that they are at the + // beginning (ELF requirement). + symbol_accessor_->arrange_local_symbols(nullptr); + // Find the last local symbol and set the section header info for symbtab + // to point to 1 past that. Update the symbol_indices_ map. + symbol_indices_.clear(); + if (elf_file_class_ == ELFCLASS64) { + UpdateSymtabHeaderInfo<ELFIO::Elf64_Sym>(); + } else { + UpdateSymtabHeaderInfo<ELFIO::Elf32_Sym>(); + } // Parse the source again, collect relocations. std::vector<RelocationInfo> relo_vector; @@ -651,6 +682,14 @@ std::string name = absl::StrCat(".rela", section_index_map_[section_index]->get_name()); auto *rela_section = writer_.sections.add(name); + rela_section->set_type(SHT_RELA); + rela_section->set_flags(SHF_INFO_LINK); + rela_section->set_entry_size(elf_file_class_ == ELFCLASS64 + ? sizeof(ELFIO::Elf64_Rela) + : sizeof(ELFIO::Elf32_Rela)); + rela_section->set_link(symtab_->get_index()); + rela_section->set_info(text_section_->get_index()); + rela_section->set_addr_align(8); // Process the relocation vector entries. absl::Status status; if (elf_file_class_ == ELFCLASS64) { @@ -686,11 +725,13 @@ auto status = ParseAsmDirective(line, symbol_resolver_, byte_vector); } else { auto relo_size = relo_vector.size(); + auto address = section_address_map_[section]; auto status = ParseAsmStatement(line, symbol_resolver_, byte_vector, relo_vector); // Update section information in the relocation vector. for (int i = relo_size; i < relo_vector.size(); ++i) { relo_vector[i].section_index = section->get_index(); + relo_vector[i].offset = address; } } if (!status.ok()) return status; @@ -890,6 +931,10 @@ return; } section = writer_.sections.add(name); + auto status = AddSymbol(name, 0, 0, STT_SECTION, STB_LOCAL, 0, section); + if (!status.ok()) { + LOG(ERROR) << "Failed to add symbol for data section: " << status.message(); + } section->set_type(SHT_PROGBITS); section->set_flags(SHF_ALLOC | SHF_EXECINSTR); section->set_addr_align(0x10); @@ -907,6 +952,10 @@ return; } section = writer_.sections.add(name); + auto status = AddSymbol(name, 0, 0, STT_SECTION, STB_LOCAL, 0, section); + if (!status.ok()) { + LOG(ERROR) << "Failed to add symbol for data section: " << status.message(); + } section->set_type(SHT_PROGBITS); section->set_flags(SHF_ALLOC | SHF_WRITE); section->set_addr_align(0x10); @@ -924,6 +973,10 @@ return; } section = writer_.sections.add(name); + auto status = AddSymbol(name, 0, 0, STT_SECTION, STB_LOCAL, 0, section); + if (!status.ok()) { + LOG(ERROR) << "Failed to add symbol for bss section: " << status.message(); + } section->set_type(SHT_NOBITS); section->set_flags(SHF_ALLOC | SHF_WRITE); section->set_addr_align(0x10); @@ -940,6 +993,7 @@ ELFIO::section *section) { auto iter = symbol_indices_.find(name); if (iter != symbol_indices_.end()) { + if (section == nullptr) return absl::OkStatus(); return absl::AlreadyExistsError( absl::StrCat("Symbol '", name, "' already exists")); } @@ -947,8 +1001,14 @@ *string_accessor_, name.c_str(), value, size, binding, type, other, section == nullptr ? SHN_UNDEF : section->get_index()); symbol_indices_.insert({name, index}); - // If the symbol was marked undefined previously, remove it from the set. - if (undefined_symbols_.contains(name)) undefined_symbols_.erase(name); + // If this is not an undefined symbol reference, then see if the symbol name + // is part of the "current" undefined symbols, and if so, remove it. + if (section != nullptr) { + auto iter = undefined_symbols_.find(name); + if (iter != undefined_symbols_.end()) { + undefined_symbols_.erase(iter); + } + } return absl::OkStatus(); }
diff --git a/mpact/sim/util/asm/simple_assembler.h b/mpact/sim/util/asm/simple_assembler.h index e40f5d1..82a56fc 100644 --- a/mpact/sim/util/asm/simple_assembler.h +++ b/mpact/sim/util/asm/simple_assembler.h
@@ -51,8 +51,8 @@ class SimpleAssembler { public: - SimpleAssembler(int elf_file_class, int os_abi, int type, int machine, - OpcodeAssemblerInterface *opcode_assembler_if); + SimpleAssembler(absl::string_view comment, int elf_file_class, int os_abi, + int machine, OpcodeAssemblerInterface *opcode_assembler_if); SimpleAssembler(const SimpleAssembler &) = delete; SimpleAssembler &operator=(const SimpleAssembler &) = delete; virtual ~SimpleAssembler(); @@ -83,6 +83,8 @@ uint64_t bss_segment_start); template <typename SymbolType> void UpdateSymbolsForRelocatable(); + template <typename SymbolType> + void UpdateSymtabHeaderInfo(); // Perform second pass of parsing. absl::Status ParsePassTwo(std::vector<RelocationInfo> &relo_vector); // Parse and process an assembly directive.
diff --git a/mpact/sim/util/asm/test/BUILD b/mpact/sim/util/asm/test/BUILD index 355a061..42299da 100644 --- a/mpact/sim/util/asm/test/BUILD +++ b/mpact/sim/util/asm/test/BUILD
@@ -96,6 +96,7 @@ "//mpact/sim/util/asm:simple_assembler", "@com_github_serge1_elfio//:elfio", "@com_google_absl//absl/base:no_destructor", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/log", "@com_google_absl//absl/log:check", "@com_google_absl//absl/status",
diff --git a/mpact/sim/util/asm/test/riscv64x_asm_test.cc b/mpact/sim/util/asm/test/riscv64x_asm_test.cc index ceeb124..a50e97e 100644 --- a/mpact/sim/util/asm/test/riscv64x_asm_test.cc +++ b/mpact/sim/util/asm/test/riscv64x_asm_test.cc
@@ -18,12 +18,14 @@ #include <vector> #include "absl/base/no_destructor.h" +#include "absl/container/flat_hash_map.h" #include "absl/log/check.h" #include "absl/status/status.h" #include "absl/strings/string_view.h" #include "absl/types/span.h" #include "elfio/elf_types.hpp" #include "elfio/elfio.hpp" +#include "elfio/elfio_strings.hpp" #include "elfio/elfio_symbols.hpp" #include "googlemock/include/gmock/gmock.h" // IWYU pragma: keep #include "googletest/include/gtest/gtest.h" @@ -130,8 +132,8 @@ : matcher_(&bin_encoder_interface_), riscv_64x_assembler_(&matcher_) { CHECK_OK(matcher_.Initialize()); // Create the assembler. - assembler_ = new SimpleAssembler(ELFCLASS64, ELFOSABI_LINUX, ET_EXEC, - EM_RISCV, &riscv_64x_assembler_); + assembler_ = new SimpleAssembler(";", ELFCLASS64, ELFOSABI_LINUX, EM_RISCV, + &riscv_64x_assembler_); std::istringstream source(*kTestAssembly); // Parse the assembly code. auto status = assembler_->Parse(source); @@ -209,32 +211,46 @@ unsigned char type; ELFIO::Elf_Half section_index; unsigned char other; + int num_symbols = symtab->get_size() / sizeof(ELFIO::Elf64_Sym); + auto symspan = absl::MakeSpan( + reinterpret_cast<const ELFIO::Elf64_Sym*>(symtab->get_data()), + num_symbols); + absl::flat_hash_map<std::string, int> symbol_map; + auto* string_accessor = + new ELFIO::string_section_accessor(elf().sections[".strtab"]); + for (int i = 0; i < num_symbols; ++i) { + auto name = string_accessor->get_string(symspan[i].st_name); + symbol_map.insert({name, i}); + } // Verify that main is valued 0x0, global and located in the text section. symbols.get_symbol("main", value, size, bind, type, section_index, other); - EXPECT_EQ(value, 0x0); - EXPECT_EQ(section_index, elf().sections[".text"]->get_index()); - EXPECT_EQ(type, STT_NOTYPE); + auto* sym = &symspan[symbol_map["main"]]; + EXPECT_EQ(sym->st_value, 0x0); + EXPECT_EQ(ELF_ST_BIND(sym->st_info), STB_GLOBAL); + EXPECT_EQ(sym->st_shndx, elf().sections[".text"]->get_index()); + EXPECT_EQ(ELF_ST_TYPE(sym->st_info), STT_NOTYPE); // Verify that exit is valued 16 * 4, local and located in the text // section. - symbols.get_symbol("exit", value, size, bind, type, section_index, other); - EXPECT_EQ(value, 16 * 4); - EXPECT_EQ(bind, STB_LOCAL); - EXPECT_EQ(section_index, elf().sections[".text"]->get_index()); - EXPECT_EQ(type, STT_NOTYPE); + sym = &symspan[symbol_map["exit"]]; + EXPECT_EQ(sym->st_value, 16 * 4); + EXPECT_EQ(ELF_ST_BIND(sym->st_info), STB_LOCAL); + EXPECT_EQ(sym->st_shndx, elf().sections[".text"]->get_index()); + EXPECT_EQ(ELF_ST_TYPE(sym->st_info), STT_NOTYPE); // Verify that hello is global and located in the data section at 0x2000. symbols.get_symbol("hello", value, size, bind, type, section_index, other); - EXPECT_EQ(value, 0); - EXPECT_EQ(section_index, elf().sections[".data"]->get_index()); - EXPECT_EQ(bind, STB_GLOBAL); - EXPECT_EQ(type, STT_NOTYPE); + sym = &symspan[symbol_map["hello"]]; + EXPECT_EQ(sym->st_value, 0); + EXPECT_EQ(sym->st_shndx, elf().sections[".data"]->get_index()); + EXPECT_EQ(ELF_ST_BIND(sym->st_info), STB_GLOBAL); + EXPECT_EQ(ELF_ST_TYPE(sym->st_info), STT_NOTYPE); // Verify that semihost_param is global and located in the bss section at // 16 bytes. - symbols.get_symbol("semihost_param", value, size, bind, type, section_index, - other); - EXPECT_EQ(value, 16); - EXPECT_EQ(section_index, elf().sections[".bss"]->get_index()); - EXPECT_EQ(bind, STB_LOCAL); - EXPECT_EQ(type, STT_NOTYPE); + sym = &symspan[symbol_map["semihost_param"]]; + EXPECT_EQ(sym->st_value, 16); + EXPECT_EQ(sym->st_shndx, elf().sections[".bss"]->get_index()); + EXPECT_EQ(ELF_ST_BIND(sym->st_info), STB_LOCAL); + EXPECT_EQ(ELF_ST_TYPE(sym->st_info), STT_NOTYPE); + delete string_accessor; } TEST_F(RiscV64XAssemblerTest, ExecutableSymbols) {