blob: e2ed32462890543afd9ff61faa56010f9d20aac5 [file]
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MPACT_SIM_UTIL_ASM_SIMPLE_ASSEMBLER_H_
#define MPACT_SIM_UTIL_ASM_SIMPLE_ASSEMBLER_H_
#include <cstddef>
#include <cstdint>
#include <istream>
#include <ostream>
#include <string>
#include <vector>
#include "absl/container/flat_hash_map.h"
#include "absl/container/flat_hash_set.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "elfio/elf_types.hpp"
#include "elfio/elfio.hpp"
#include "elfio/elfio_section.hpp"
#include "elfio/elfio_strings.hpp"
#include "elfio/elfio_symbols.hpp"
#include "mpact/sim/util/asm/opcode_assembler_interface.h"
#include "mpact/sim/util/asm/resolver_interface.h"
#include "re2/re2.h"
// This file declares the SimpleAssembler class, which provides simple handling
// of assembly source, including a number of assembly directives. It currently
// handles three sections: .text, .data, and .bss. It produces either a
// relocatable file or an executable ELF file with the text section in its own
// segment starting at the base address, followed by the data section, and then
// the bss section. For the executable file, the entry point is set by calling
// SetEntryPoint().
//
// Only little-endian ELF files are currently supported.
//
// The ELF file class is specified in the constructor. Any other ELF header
// values have to be set using methods in the ELFIO writer that is accessed
// using the writer() method.
// At the very least, the following methods should be called:
// writer().set_os_abi()
// writer().set_machine()
// If additional sections need to be created, use the add_section() method of
// the writer (see ELFIO documentation for details).
namespace mpact {
namespace sim {
namespace util {
namespace assembler {
class SimpleAssembler {
public:
// The constructor takes the following parameters:
// comment: The comment string or character that starts a comment.
// elf_file_class: The ELF file class (32 or 64 bit). Use either ELFCLASS32
// or ELFCLASS64 from ELFIO.
// opcode_assembler_if: The opcode assembler interface to use for parsing
// and encoding assembly statements.
SimpleAssembler(absl::string_view comment, int elf_file_class,
OpcodeAssemblerInterface* opcode_assembler_if);
SimpleAssembler(const SimpleAssembler&) = delete;
SimpleAssembler& operator=(const SimpleAssembler&) = delete;
virtual ~SimpleAssembler();
// Parse the input stream as assembly.
absl::Status Parse(std::istream& is,
ResolverInterface* zero_resolver = nullptr);
// Add the symbol to the symbol table for the current section. See ELFIO
// documentation for details of the meaning of the parameters.
absl::Status AddSymbolToCurrentSection(const std::string& name,
ELFIO::Elf64_Addr value,
ELFIO::Elf_Xword size, uint8_t type,
uint8_t binding, uint8_t other);
// Add the symbol to the symbol table for the named section. See ELFIO
// documentation for details of the meaning of the parameters.
absl::Status AddSymbol(const std::string& name, ELFIO::Elf64_Addr value,
ELFIO::Elf_Xword size, uint8_t type, uint8_t binding,
uint8_t other, const std::string& section_name);
// Create executable ELF file with the given value as the entry point.
// The text segment will be laid out starting at base address, followed by
// the data segment.
absl::Status CreateExecutable(uint64_t base_address,
const std::string& entry_point,
ResolverInterface* symbol_resolver = nullptr);
absl::Status CreateExecutable(uint64_t base_address, uint64_t entry_point,
ResolverInterface* symbol_resolver = nullptr);
// Create a relocatable ELF file.
absl::Status CreateRelocatable(ResolverInterface* symbol_resolver = nullptr);
// Write the ELF file to the given output stream.
absl::Status Write(std::ostream& os);
// Access the ELF writer.
ELFIO::elfio& writer() { return writer_; }
// Add a symbol reference to the symbol table if it is not already defined.
void SimpleAddSymbol(absl::string_view name);
// Getters and setters.
absl::flat_hash_map<std::string, ELFIO::Elf_Word>& symbol_indices() {
return symbol_indices_;
}
ELFIO::section* symtab() { return symtab_; }
unsigned data_address_unit() { return data_address_unit_; }
void set_data_address_unit(unsigned data_address_unit) {
data_address_unit_ = data_address_unit;
}
void set_data_segment_start(uint64_t data_segment_start) {
data_segment_start_ = data_segment_start;
}
private:
// Helper function to update the symbol table entries.
template <typename SymbolType>
void UpdateSymbolsForExecutable(uint64_t text_segment_start,
uint64_t data_segment_start,
uint64_t bss_segment_start);
template <typename SymbolType>
void UpdateSymbolsForRelocatable();
template <typename SymbolType>
void UpdateSymtabHeaderInfo();
// Perform second pass of parsing.
absl::Status ParsePassTwo(std::vector<RelocationInfo>& relo_vector,
ResolverInterface* symbol_resolver);
// Parse and process an assembly directive.
absl::Status ParseAsmDirective(absl::string_view line, uint64_t address,
ResolverInterface* resolver,
std::vector<uint8_t>& byte_values,
std::vector<RelocationInfo>& relocations);
// Parse and process and assembly statement.
absl::Status ParseAsmStatement(absl::string_view line, uint64_t address,
ResolverInterface* resolver,
std::vector<uint8_t>& byte_values,
std::vector<RelocationInfo>& relocations);
// Add the symbol to the symbol table.
absl::Status AddSymbol(const std::string& name, ELFIO::Elf64_Addr value,
ELFIO::Elf_Xword size, uint8_t type, uint8_t binding,
uint8_t other, ELFIO::section* section);
// Append the data to the current section.
absl::Status AppendData(const char* data, size_t size);
// Set the the given section as the current section. Create if it has not
// already been created.
void SetTextSection(const std::string& name);
void SetDataSection(const std::string& name);
void SetBssSection(const std::string& name);
// ELF file class.
int elf_file_class_ = 0;
// Elf file top level object.
ELFIO::elfio writer_;
// The current section being processed.
ELFIO::section* current_section_ = nullptr;
// Map from section index to section pointer.
absl::flat_hash_map<uint16_t, ELFIO::section*> section_index_map_;
// Interface used to parse and encode assembly statements.
OpcodeAssemblerInterface* opcode_assembler_if_ = nullptr;
// Interface used to access strings in the string table.
ELFIO::string_section_accessor* string_accessor_ = nullptr;
// Interface used to access symbols in the symbol table.
ELFIO::symbol_section_accessor* symbol_accessor_ = nullptr;
// ELF symbol table section.
ELFIO::section* symtab_ = nullptr;
// Elf string table section.
ELFIO::section* strtab_ = nullptr;
// Map that tracks the current address of each section.
absl::flat_hash_map<ELFIO::section*, uint64_t> section_address_map_;
std::vector<std::string> lines_;
// Section pointers.
ELFIO::section* text_section_ = nullptr;
ELFIO::section* data_section_ = nullptr;
ELFIO::section* bss_section_ = nullptr;
// Regular expressions used to parse the assembly source.
RE2 comment_re_;
RE2 asm_line_re_;
RE2 directive_re_;
// Set of symbol names declared as global.
absl::flat_hash_set<std::string> global_symbols_;
// Map from symbol name to symbol index in the symbol table.
absl::flat_hash_map<std::string, ELFIO::Elf_Word> symbol_indices_;
// Set of undefined symbols.
absl::flat_hash_set<std::string> undefined_symbols_;
// Data address unit - by default 1 for byte addressable.
unsigned data_address_unit_ = 1;
uint64_t data_segment_start_ = 0;
};
} // namespace assembler
} // namespace util
} // namespace sim
} // namespace mpact
#endif // MPACT_SIM_UTIL_ASM_SIMPLE_ASSEMBLER_H_