Fixed some issues in the ELF file when generating relocatable output.

PiperOrigin-RevId: 715520295
Change-Id: Ib66f8f8188d39dfa57b9d9700d5d67e2449e029c
diff --git a/mpact/sim/util/asm/BUILD b/mpact/sim/util/asm/BUILD
index e02ba66..5df423f 100644
--- a/mpact/sim/util/asm/BUILD
+++ b/mpact/sim/util/asm/BUILD
@@ -45,9 +45,11 @@
         "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/functional:any_invocable",
         "@com_google_absl//absl/functional:bind_front",
+        "@com_google_absl//absl/log",
         "@com_google_absl//absl/status",
         "@com_google_absl//absl/status:statusor",
         "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:span",
         "@com_googlesource_code_re2//:re2",
     ],
 )
diff --git a/mpact/sim/util/asm/simple_assembler.cc b/mpact/sim/util/asm/simple_assembler.cc
index 76584f8..98c60c0 100644
--- a/mpact/sim/util/asm/simple_assembler.cc
+++ b/mpact/sim/util/asm/simple_assembler.cc
@@ -27,10 +27,12 @@
 #include "absl/container/flat_hash_set.h"
 #include "absl/functional/any_invocable.h"
 #include "absl/functional/bind_front.h"
+#include "absl/log/log.h"
 #include "absl/status/status.h"
 #include "absl/status/statusor.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/string_view.h"
+#include "absl/types/span.h"
 #include "elfio/elf_types.hpp"
 #include "elfio/elfio_section.hpp"
 #include "elfio/elfio_segment.hpp"
@@ -84,7 +86,7 @@
     auto iter = symbol_indices_.find(text);
     if (iter == symbol_indices_.end()) {
       return absl::InvalidArgumentError(
-          absl::StrCat("Symbol '", text, "' not found"));
+          absl::StrCat("SymbolResolver: Symbol '", text, "' not found"));
     }
     auto index = iter->second;
     if (elf_file_class_ == ELFCLASS64) {
@@ -308,12 +310,12 @@
 
 }  // namespace
 
-SimpleAssembler::SimpleAssembler(int elf_file_class, int os_abi, int type,
-                                 int machine,
+SimpleAssembler::SimpleAssembler(absl::string_view comment, int elf_file_class,
+                                 int os_abi, int machine,
                                  OpcodeAssemblerInterface *opcode_assembler_if)
     : elf_file_class_(elf_file_class),
       opcode_assembler_if_(opcode_assembler_if),
-      comment_re_("^\\s*(?:;(.*))?$"),
+      comment_re_(absl::StrCat("^\\s*(?:", comment, "(.*))?$")),
       asm_line_re_("^(?:(?:(\\S+)\\s*:)?|\\s)\\s*([^;]*?)?\\s*(?:;(.*))?$"),
       directive_re_(
           "^\\.(align|bss|bytes|char|cstring|data|global|long|sect"
@@ -324,12 +326,12 @@
   // Configure the ELF file writer.
   writer_.create(elf_file_class_, ELFDATA2LSB);
   writer_.set_os_abi(os_abi);
-  writer_.set_type(ET_EXEC);
   writer_.set_machine(machine);
   // Create the symbol table section.
   symtab_ = writer_.sections.add(".symtab");
   section_index_map_.insert({symtab_->get_index(), symtab_});
   symtab_->set_type(SHT_SYMTAB);
+  symtab_->set_addr_align(0x8);
   symtab_->set_entry_size(elf_file_class_ == ELFCLASS64
                               ? sizeof(ELFIO::Elf64_Sym)
                               : sizeof(ELFIO::Elf32_Sym));
@@ -337,12 +339,17 @@
   strtab_ = writer_.sections.add(".strtab");
   section_index_map_.insert({strtab_->get_index(), strtab_});
   strtab_->set_type(SHT_STRTAB);
+  strtab_->set_addr_align(0x1);
   // Link the symbol table to the string table.
   symtab_->set_link(strtab_->get_index());
   // Create the symbol and string table accessors.
   symbol_accessor_ = new ELFIO::symbol_section_accessor(writer_, symtab_);
   string_accessor_ =
       new ELFIO::string_section_accessor(writer_.sections[".strtab"]);
+  // Create .text, .data. and .bss sections.
+  SetTextSection(".text");
+  SetDataSection(".data");
+  SetBssSection(".bss");
 }
 
 SimpleAssembler::~SimpleAssembler() {
@@ -407,10 +414,11 @@
   for (auto const &symbol : undefined_symbols_) {
     auto status = AddSymbol(symbol, 0, 0, STT_NOTYPE, 0, 0, nullptr);
     if (!status.ok()) {
-      return absl::InternalError(
-          absl::StrCat("Failed to add undefined symbol: ", symbol));
+      return absl::InternalError(absl::StrCat(
+          "Failed to add undefined symbol '", symbol, "': ", status.message()));
     }
   }
+  undefined_symbols_.clear();
 
   if (bss_section_ != nullptr) {
     bss_section_->set_size(section_address_map_[bss_section_]);
@@ -462,7 +470,6 @@
   for (int i = 0; i < num_symbols; ++i) {
     auto &sym = symbols[i];
     std::string name = string_accessor_->get_string(sym.st_name);
-    symbol_indices_.insert({name, i});
     if (global_symbols_.contains(name)) {
       sym.st_info = ELF_ST_INFO(STB_GLOBAL, ELF_ST_TYPE(sym.st_info));
     }
@@ -483,6 +490,7 @@
     }
     return absl::InvalidArgumentError(message);
   }
+  writer_.set_type(ET_EXEC);
   // Section sizes are now known. So let's compute the layout and update all
   // the symbol values/addresses before the next pass.
   // The layout is:
@@ -603,18 +611,30 @@
 
 }  // namespace
 
+template <typename SymbolType>
+void SimpleAssembler::UpdateSymtabHeaderInfo() {
+  int last_local = 0;
+  auto syms =
+      absl::MakeSpan(reinterpret_cast<const SymbolType *>(symtab_->get_data()),
+                     symtab_->get_size() / sizeof(SymbolType));
+  for (int i = 0; i < syms.size(); ++i) {
+    auto name = string_accessor_->get_string(syms[i].st_name);
+    symbol_indices_.insert({name, i});
+    if (ELF_ST_BIND(syms[i].st_info) == STB_LOCAL) last_local = i;
+  }
+  symtab_->set_info(last_local + 1);
+}
+
 absl::Status SimpleAssembler::CreateRelocatable() {
+  writer_.set_type(ET_REL);
   // Reset the section address map to zero since we are creating a relocatable
   // file.
   section_address_map_[text_section_] = 0;
   section_address_map_[data_section_] = 0;
   section_address_map_[bss_section_] = 0;
 
-  // Rearrange local symbols.
-  symbol_accessor_->arrange_local_symbols(nullptr);
-  // Since the symbols now are rearranged, recompute the symbol index map, and
-  // also set global symbols flag for those in the global_symbols_ set.
-  symbol_indices_.clear();
+  // Since the symbols now are rearranged, we need to set global symbols flag
+  // for those in the global_symbols_ set.
   // Different size symbol table entries for 32 and 64 bit ELF files.
   if (elf_file_class_ == ELFCLASS64) {
     UpdateSymbolsForRelocatable<ELFIO::Elf64_Sym>();
@@ -624,6 +644,17 @@
     return absl::InternalError(
         absl::StrCat("Unsupported ELF file class: ", elf_file_class_));
   }
+  // Rearrange local symbols in the symbol table so that they are at the
+  // beginning (ELF requirement).
+  symbol_accessor_->arrange_local_symbols(nullptr);
+  // Find the last local symbol and set the section header info for symbtab
+  // to point to 1 past that. Update the symbol_indices_ map.
+  symbol_indices_.clear();
+  if (elf_file_class_ == ELFCLASS64) {
+    UpdateSymtabHeaderInfo<ELFIO::Elf64_Sym>();
+  } else {
+    UpdateSymtabHeaderInfo<ELFIO::Elf32_Sym>();
+  }
 
   // Parse the source again, collect relocations.
   std::vector<RelocationInfo> relo_vector;
@@ -651,6 +682,14 @@
       std::string name =
           absl::StrCat(".rela", section_index_map_[section_index]->get_name());
       auto *rela_section = writer_.sections.add(name);
+      rela_section->set_type(SHT_RELA);
+      rela_section->set_flags(SHF_INFO_LINK);
+      rela_section->set_entry_size(elf_file_class_ == ELFCLASS64
+                                       ? sizeof(ELFIO::Elf64_Rela)
+                                       : sizeof(ELFIO::Elf32_Rela));
+      rela_section->set_link(symtab_->get_index());
+      rela_section->set_info(text_section_->get_index());
+      rela_section->set_addr_align(8);
       // Process the relocation vector entries.
       absl::Status status;
       if (elf_file_class_ == ELFCLASS64) {
@@ -686,11 +725,13 @@
       auto status = ParseAsmDirective(line, symbol_resolver_, byte_vector);
     } else {
       auto relo_size = relo_vector.size();
+      auto address = section_address_map_[section];
       auto status =
           ParseAsmStatement(line, symbol_resolver_, byte_vector, relo_vector);
       // Update section information in the relocation vector.
       for (int i = relo_size; i < relo_vector.size(); ++i) {
         relo_vector[i].section_index = section->get_index();
+        relo_vector[i].offset = address;
       }
     }
     if (!status.ok()) return status;
@@ -890,6 +931,10 @@
     return;
   }
   section = writer_.sections.add(name);
+  auto status = AddSymbol(name, 0, 0, STT_SECTION, STB_LOCAL, 0, section);
+  if (!status.ok()) {
+    LOG(ERROR) << "Failed to add symbol for data section: " << status.message();
+  }
   section->set_type(SHT_PROGBITS);
   section->set_flags(SHF_ALLOC | SHF_EXECINSTR);
   section->set_addr_align(0x10);
@@ -907,6 +952,10 @@
     return;
   }
   section = writer_.sections.add(name);
+  auto status = AddSymbol(name, 0, 0, STT_SECTION, STB_LOCAL, 0, section);
+  if (!status.ok()) {
+    LOG(ERROR) << "Failed to add symbol for data section: " << status.message();
+  }
   section->set_type(SHT_PROGBITS);
   section->set_flags(SHF_ALLOC | SHF_WRITE);
   section->set_addr_align(0x10);
@@ -924,6 +973,10 @@
     return;
   }
   section = writer_.sections.add(name);
+  auto status = AddSymbol(name, 0, 0, STT_SECTION, STB_LOCAL, 0, section);
+  if (!status.ok()) {
+    LOG(ERROR) << "Failed to add symbol for bss section: " << status.message();
+  }
   section->set_type(SHT_NOBITS);
   section->set_flags(SHF_ALLOC | SHF_WRITE);
   section->set_addr_align(0x10);
@@ -940,6 +993,7 @@
                                         ELFIO::section *section) {
   auto iter = symbol_indices_.find(name);
   if (iter != symbol_indices_.end()) {
+    if (section == nullptr) return absl::OkStatus();
     return absl::AlreadyExistsError(
         absl::StrCat("Symbol '", name, "' already exists"));
   }
@@ -947,8 +1001,14 @@
       *string_accessor_, name.c_str(), value, size, binding, type, other,
       section == nullptr ? SHN_UNDEF : section->get_index());
   symbol_indices_.insert({name, index});
-  // If the symbol was marked undefined previously, remove it from the set.
-  if (undefined_symbols_.contains(name)) undefined_symbols_.erase(name);
+  // If this is not an undefined symbol reference, then see if the symbol name
+  // is part of the "current" undefined symbols, and if so, remove it.
+  if (section != nullptr) {
+    auto iter = undefined_symbols_.find(name);
+    if (iter != undefined_symbols_.end()) {
+      undefined_symbols_.erase(iter);
+    }
+  }
   return absl::OkStatus();
 }
 
diff --git a/mpact/sim/util/asm/simple_assembler.h b/mpact/sim/util/asm/simple_assembler.h
index e40f5d1..82a56fc 100644
--- a/mpact/sim/util/asm/simple_assembler.h
+++ b/mpact/sim/util/asm/simple_assembler.h
@@ -51,8 +51,8 @@
 
 class SimpleAssembler {
  public:
-  SimpleAssembler(int elf_file_class, int os_abi, int type, int machine,
-                  OpcodeAssemblerInterface *opcode_assembler_if);
+  SimpleAssembler(absl::string_view comment, int elf_file_class, int os_abi,
+                  int machine, OpcodeAssemblerInterface *opcode_assembler_if);
   SimpleAssembler(const SimpleAssembler &) = delete;
   SimpleAssembler &operator=(const SimpleAssembler &) = delete;
   virtual ~SimpleAssembler();
@@ -83,6 +83,8 @@
                                   uint64_t bss_segment_start);
   template <typename SymbolType>
   void UpdateSymbolsForRelocatable();
+  template <typename SymbolType>
+  void UpdateSymtabHeaderInfo();
   // Perform second pass of parsing.
   absl::Status ParsePassTwo(std::vector<RelocationInfo> &relo_vector);
   // Parse and process an assembly directive.
diff --git a/mpact/sim/util/asm/test/BUILD b/mpact/sim/util/asm/test/BUILD
index 355a061..42299da 100644
--- a/mpact/sim/util/asm/test/BUILD
+++ b/mpact/sim/util/asm/test/BUILD
@@ -96,6 +96,7 @@
         "//mpact/sim/util/asm:simple_assembler",
         "@com_github_serge1_elfio//:elfio",
         "@com_google_absl//absl/base:no_destructor",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/log",
         "@com_google_absl//absl/log:check",
         "@com_google_absl//absl/status",
diff --git a/mpact/sim/util/asm/test/riscv64x_asm_test.cc b/mpact/sim/util/asm/test/riscv64x_asm_test.cc
index ceeb124..a50e97e 100644
--- a/mpact/sim/util/asm/test/riscv64x_asm_test.cc
+++ b/mpact/sim/util/asm/test/riscv64x_asm_test.cc
@@ -18,12 +18,14 @@
 #include <vector>
 
 #include "absl/base/no_destructor.h"
+#include "absl/container/flat_hash_map.h"
 #include "absl/log/check.h"
 #include "absl/status/status.h"
 #include "absl/strings/string_view.h"
 #include "absl/types/span.h"
 #include "elfio/elf_types.hpp"
 #include "elfio/elfio.hpp"
+#include "elfio/elfio_strings.hpp"
 #include "elfio/elfio_symbols.hpp"
 #include "googlemock/include/gmock/gmock.h"  // IWYU pragma: keep
 #include "googletest/include/gtest/gtest.h"
@@ -130,8 +132,8 @@
       : matcher_(&bin_encoder_interface_), riscv_64x_assembler_(&matcher_) {
     CHECK_OK(matcher_.Initialize());
     // Create the assembler.
-    assembler_ = new SimpleAssembler(ELFCLASS64, ELFOSABI_LINUX, ET_EXEC,
-                                     EM_RISCV, &riscv_64x_assembler_);
+    assembler_ = new SimpleAssembler(";", ELFCLASS64, ELFOSABI_LINUX, EM_RISCV,
+                                     &riscv_64x_assembler_);
     std::istringstream source(*kTestAssembly);
     // Parse the assembly code.
     auto status = assembler_->Parse(source);
@@ -209,32 +211,46 @@
   unsigned char type;
   ELFIO::Elf_Half section_index;
   unsigned char other;
+  int num_symbols = symtab->get_size() / sizeof(ELFIO::Elf64_Sym);
+  auto symspan = absl::MakeSpan(
+      reinterpret_cast<const ELFIO::Elf64_Sym*>(symtab->get_data()),
+      num_symbols);
+  absl::flat_hash_map<std::string, int> symbol_map;
+  auto* string_accessor =
+      new ELFIO::string_section_accessor(elf().sections[".strtab"]);
+  for (int i = 0; i < num_symbols; ++i) {
+    auto name = string_accessor->get_string(symspan[i].st_name);
+    symbol_map.insert({name, i});
+  }
   // Verify that main is valued 0x0, global and located in the text section.
   symbols.get_symbol("main", value, size, bind, type, section_index, other);
-  EXPECT_EQ(value, 0x0);
-  EXPECT_EQ(section_index, elf().sections[".text"]->get_index());
-  EXPECT_EQ(type, STT_NOTYPE);
+  auto* sym = &symspan[symbol_map["main"]];
+  EXPECT_EQ(sym->st_value, 0x0);
+  EXPECT_EQ(ELF_ST_BIND(sym->st_info), STB_GLOBAL);
+  EXPECT_EQ(sym->st_shndx, elf().sections[".text"]->get_index());
+  EXPECT_EQ(ELF_ST_TYPE(sym->st_info), STT_NOTYPE);
   // Verify that exit is valued 16 * 4, local and located in the text
   // section.
-  symbols.get_symbol("exit", value, size, bind, type, section_index, other);
-  EXPECT_EQ(value, 16 * 4);
-  EXPECT_EQ(bind, STB_LOCAL);
-  EXPECT_EQ(section_index, elf().sections[".text"]->get_index());
-  EXPECT_EQ(type, STT_NOTYPE);
+  sym = &symspan[symbol_map["exit"]];
+  EXPECT_EQ(sym->st_value, 16 * 4);
+  EXPECT_EQ(ELF_ST_BIND(sym->st_info), STB_LOCAL);
+  EXPECT_EQ(sym->st_shndx, elf().sections[".text"]->get_index());
+  EXPECT_EQ(ELF_ST_TYPE(sym->st_info), STT_NOTYPE);
   // Verify that hello is global and located in the data section at 0x2000.
   symbols.get_symbol("hello", value, size, bind, type, section_index, other);
-  EXPECT_EQ(value, 0);
-  EXPECT_EQ(section_index, elf().sections[".data"]->get_index());
-  EXPECT_EQ(bind, STB_GLOBAL);
-  EXPECT_EQ(type, STT_NOTYPE);
+  sym = &symspan[symbol_map["hello"]];
+  EXPECT_EQ(sym->st_value, 0);
+  EXPECT_EQ(sym->st_shndx, elf().sections[".data"]->get_index());
+  EXPECT_EQ(ELF_ST_BIND(sym->st_info), STB_GLOBAL);
+  EXPECT_EQ(ELF_ST_TYPE(sym->st_info), STT_NOTYPE);
   // Verify that semihost_param is global and located in the bss section at
   // 16 bytes.
-  symbols.get_symbol("semihost_param", value, size, bind, type, section_index,
-                     other);
-  EXPECT_EQ(value, 16);
-  EXPECT_EQ(section_index, elf().sections[".bss"]->get_index());
-  EXPECT_EQ(bind, STB_LOCAL);
-  EXPECT_EQ(type, STT_NOTYPE);
+  sym = &symspan[symbol_map["semihost_param"]];
+  EXPECT_EQ(sym->st_value, 16);
+  EXPECT_EQ(sym->st_shndx, elf().sections[".bss"]->get_index());
+  EXPECT_EQ(ELF_ST_BIND(sym->st_info), STB_LOCAL);
+  EXPECT_EQ(ELF_ST_TYPE(sym->st_info), STT_NOTYPE);
+  delete string_accessor;
 }
 
 TEST_F(RiscV64XAssemblerTest, ExecutableSymbols) {