blob: ff5faa181618fcb05f19bf6cb41d2e078f3a94ef [file]
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mpact/sim/util/asm/simple_assembler.h"
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <functional>
#include <istream>
#include <ostream>
#include <string>
#include <vector>
#include "absl/container/flat_hash_map.h"
#include "absl/container/flat_hash_set.h"
#include "absl/functional/bind_front.h"
#include "absl/log/log.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
#include "elfio/elf_types.hpp"
#include "elfio/elfio.hpp" // IWYU pragma: keep
#include "elfio/elfio_section.hpp"
#include "elfio/elfio_segment.hpp"
#include "elfio/elfio_strings.hpp"
#include "elfio/elfio_symbols.hpp"
#include "mpact/sim/util/asm/opcode_assembler_interface.h"
#include "mpact/sim/util/asm/resolver.h"
#include "mpact/sim/util/asm/resolver_interface.h"
#include "util/regexp/re2/re2.h"
namespace mpact {
namespace sim {
namespace util {
namespace assembler {
// Helper functions for parsing the assembly code.
namespace {
// This template is used to convert the given type to the smallest valid type
// that absl Atoi functions can handle.
template <typename T>
struct AtoIType {
using type = T;
};
template <>
struct AtoIType<char> {
using type = int32_t;
};
template <>
struct AtoIType<uint8_t> {
using type = uint32_t;
};
template <>
struct AtoIType<uint16_t> {
using type = uint32_t;
};
template <>
struct AtoIType<int16_t> {
using type = int32_t;
};
template <>
struct AtoIType<int8_t> {
using type = int32_t;
};
// Convert the text to an integer. Checks for a leading 0x and then converts
// using absl::SimpleHexAtoi. If the text does not start with 0x, then it
// converts using absl::SimpleAtoi. If the text is not a valid integer, then
// it calls the resolver to see if it is a symbol name, in which case it returns
// the value of the symbol. If the text is not a valid integer or symbol name,
// then it returns an error.
template <typename T>
absl::StatusOr<T> SimpleTextToInt(absl::string_view text,
ResolverInterface* resolver = nullptr) {
T value;
if (text.substr(0, 2) == "0x") {
if (absl::SimpleHexAtoi(text.substr(2), &value)) return value;
return absl::InvalidArgumentError(
absl::StrCat("Invalid immediate: ", text));
}
if (absl::SimpleAtoi(text, &value)) return value;
if (resolver == nullptr) {
return absl::InvalidArgumentError(absl::StrCat("Invalid argument: ", text));
}
auto result = resolver->Resolve(text);
if (!result.ok()) {
return absl::InvalidArgumentError(absl::StrCat("Invalid argument: ", text));
}
return static_cast<T>(result.value());
}
// Expand escaped characters in the given text. This is for use in parsing
// .string, .char, and .cstring directives.
std::string ExpandEscapes(absl::string_view text) {
std::string result;
bool in_escape = false;
for (auto c : text) {
if (in_escape) {
switch (c) {
case 'n':
result.push_back('\n');
break;
case 'r':
result.push_back('\r');
break;
case 'v':
result.push_back('\v');
break;
case 'f':
result.push_back('\f');
break;
case 'a':
result.push_back('\a');
break;
case 'b':
result.push_back('\b');
break;
case 't':
result.push_back('\t');
break;
case '\\':
result.push_back('\\');
break;
case '\'':
result.push_back('\'');
break;
case '"':
result.push_back('"');
break;
case '\?':
result.push_back('?');
break;
default:
result.push_back('\\');
result.push_back(c);
break;
}
in_escape = false;
continue;
}
if (c == '\\') {
in_escape = true;
continue;
}
result.push_back(c);
}
if (in_escape) result.push_back('\\');
return result;
}
// This function is used to parse a list of values from the remainder of an
// assembly directive. The values are separated by commas. The type T is the
// type of the values, and must be an integer type or char. The resolver
// interface is optional and is used to resolve any symbol names in the text.
template <typename T>
absl::StatusOr<std::vector<T>> GetValues(
absl::string_view remainder, ResolverInterface* resolver = nullptr) {
std::vector<T> values;
static RE2 value_re("\\s*(0x[0-9a-fA-F]+|-?[0-9]+)\\s*(?:,|$)");
std::string match;
while (RE2::Consume(&remainder, value_re, &match)) {
auto result = SimpleTextToInt<typename AtoIType<T>::type>(match);
if (!result.ok()) return result.status();
T value = static_cast<T>(result.value());
values.push_back(value);
}
return values;
}
// Specialization of the above that handles char values.
template <>
absl::StatusOr<std::vector<char>> GetValues<char>(absl::string_view remainder,
ResolverInterface* resolver) {
std::vector<char> values;
static RE2 value_re("\\s*'(.{1,2})'\\s*(?:,|$)");
std::string match;
while (RE2::Consume(&remainder, value_re, &match)) {
auto expanded = ExpandEscapes(match);
if (expanded.size() != 1)
return absl::InvalidArgumentError(
absl::StrCat("Invalid character: '", match, "'"));
values.push_back(expanded[0]);
}
return values;
}
// Specialization of the above that handles double quoted string values.
template <>
absl::StatusOr<std::vector<std::string>> GetValues<std::string>(
absl::string_view remainder, ResolverInterface* resolver) {
std::vector<std::string> values;
std::string match;
static RE2 value_re("\\s*\"([^\"]*)\"\\s*(?:,|$)");
while (RE2::Consume(&remainder, value_re, &match)) {
values.push_back(ExpandEscapes(match));
}
return values;
}
// Specialization of the above that handles labels (string values without
// quotes).
absl::StatusOr<std::vector<std::string>> GetLabels(
absl::string_view remainder) {
std::vector<std::string> values;
std::string match;
static RE2 label_re("\\s*([a-zA-Z_][a-zA-Z0-9_]*)\\s*(?:,|$)");
while (RE2::Consume(&remainder, label_re, &match)) {
values.push_back(match);
}
return values;
}
// Helper that converts a vector of integer values to a vector of bytes.
template <typename T>
inline void ConvertToBytes(const std::vector<T>& values,
std::vector<uint8_t>& bytes) {
union {
T i;
uint8_t b[sizeof(T)];
} u;
for (auto value : values) {
u.i = value;
for (int i = 0; i < sizeof(T); i++) {
bytes.push_back(u.b[i]);
}
}
}
} // namespace
SimpleAssembler::SimpleAssembler(absl::string_view comment, int elf_file_class,
OpcodeAssemblerInterface* opcode_assembler_if)
: elf_file_class_(elf_file_class),
opcode_assembler_if_(opcode_assembler_if),
comment_re_(absl::StrCat("^(.*?)(?:", comment, ".*?)?(\\\\)?$")),
asm_line_re_("^(?:(?:(\\S+)\\s*:)?|\\s)\\s*(.*)\\s*$"),
directive_re_(
"^\\.(align|bss|bytes|char|cstring|data|global|long|sect"
"|short|space|string|type|text|uchar|ulong|ushort|uword|word)(?:\\s+("
".*)"
")?\\s*"
"$") {
// Configure the ELF file writer.
writer_.create(elf_file_class_, ELFIO::ELFDATA2LSB);
writer_.set_os_abi(ELFIO::ELFOSABI_NONE);
writer_.set_machine(ELFIO::EM_NONE);
// Create the symbol table section.
symtab_ = writer_.sections.add(".symtab");
section_index_map_.insert({symtab_->get_index(), symtab_});
symtab_->set_type(ELFIO::SHT_SYMTAB);
symtab_->set_addr_align(0x8);
symtab_->set_entry_size(elf_file_class_ == ELFIO::ELFCLASS64
? sizeof(ELFIO::Elf64_Sym)
: sizeof(ELFIO::Elf32_Sym));
// Create the string table section.
strtab_ = writer_.sections.add(".strtab");
section_index_map_.insert({strtab_->get_index(), strtab_});
strtab_->set_type(ELFIO::SHT_STRTAB);
strtab_->set_addr_align(0x1);
// Link the symbol table to the string table.
symtab_->set_link(strtab_->get_index());
// Create the symbol and string table accessors.
symbol_accessor_ = new ELFIO::symbol_section_accessor(writer_, symtab_);
string_accessor_ =
new ELFIO::string_section_accessor(writer_.sections[".strtab"]);
// Create .text, .data. and .bss sections.
SetTextSection(".text");
SetDataSection(".data");
SetBssSection(".bss");
// Clear the current section.
current_section_ = nullptr;
}
SimpleAssembler::~SimpleAssembler() {
delete symbol_accessor_;
symbol_accessor_ = nullptr;
delete string_accessor_;
string_accessor_ = nullptr;
}
absl::Status SimpleAssembler::Parse(std::istream& is,
ResolverInterface* zero_resolver) {
// A trivial symbol resolver that always returns 0.
std::function<void()> cleanup = []() {};
if (zero_resolver == nullptr) {
zero_resolver = new ZeroResolver(
absl::bind_front(&SimpleAssembler::SimpleAddSymbol, this));
cleanup = [zero_resolver]() { delete zero_resolver; };
}
// First pass of parsing the input stream. This will add symbols to the symbol
// table and compute the sizes of all instructions and the sections. The
// section_address_map_ will keep track of the current location within each
// section (i.e., the offset within the section of the next
// instruction/object).
std::string label;
std::string statement;
while (is.good() && !is.eof()) {
std::string line;
while (true) {
std::string tmp;
if (!is.good() || is.eof()) break;
getline(is, tmp);
std::string prefix;
std::string suffix;
// Remove comments from the input line.
if (!RE2::FullMatch(tmp, comment_re_, &prefix, &suffix)) {
return absl::InternalError("Failed to parse comment");
}
tmp = absl::StrCat(prefix, suffix);
int len = tmp.length();
// If there is an escaped newline then append the line, up to the '\',
// and continue.
if ((len >= 1) && (tmp[len - 1] == '\\')) {
// Insert the escaped newline that getline removed.
absl::StrAppend(&line, tmp, "\n");
continue;
}
absl::StrAppend(&line, tmp);
break;
}
if (line.empty()) continue;
// Parse the line into a label and a statement. This is done to determine if
// the line contains a label, only a label, or if the statement is directive
// or not.
if (RE2::FullMatch(line, asm_line_re_, &label, &statement)) {
std::vector<uint8_t> byte_vector;
std::vector<RelocationInfo> relo_vector;
auto* section = current_section_;
uint64_t address =
(section == nullptr) ? 0 : section_address_map_[section];
if (!statement.empty()) {
absl::Status status;
// Pass the full line into the parse functions, they are responsible
// for handling the labels in pass one.
if (statement[0] == '.') {
status = ParseAsmDirective(line, address, zero_resolver, byte_vector,
relo_vector);
} else {
status = ParseAsmStatement(line, address, zero_resolver, byte_vector,
relo_vector);
}
if (!status.ok()) return status;
// Save the statements for processing in pass two (labels are all
// processed in pass one).
lines_.push_back(statement);
} else if (!label.empty()) {
// This is just a single label definition. Add it to the symbol table.
uint64_t symbol_address = address;
if ((current_section_ == data_section_) ||
(current_section_ == bss_section_)) {
symbol_address = address / data_address_unit_;
}
auto status = AddSymbolToCurrentSection(label, symbol_address, 0,
ELFIO::STT_NOTYPE, 0, 0);
if (!status.ok()) return status;
}
continue;
}
// Parse failure.
cleanup();
return absl::AbortedError(absl::StrCat("Parse failure: '", line, "'"));
}
if (!is.eof()) {
cleanup();
return absl::InternalError("Input stream entered bad state");
}
// Add undefined symbols to the symbol table.
for (auto const& symbol : undefined_symbols_) {
auto status = AddSymbol(symbol, 0, 0, ELFIO::STT_NOTYPE, 0, 0, nullptr);
if (!status.ok()) {
cleanup();
return absl::InternalError(absl::StrCat(
"Failed to add undefined symbol '", symbol, "': ", status.message()));
}
}
undefined_symbols_.clear();
if (bss_section_ != nullptr) {
bss_section_->set_size(section_address_map_[bss_section_]);
}
cleanup();
return absl::OkStatus();
}
absl::Status SimpleAssembler::CreateExecutable(
uint64_t base_address, uint64_t entry_point,
ResolverInterface* symbol_resolver) {
return CreateExecutable(base_address, absl::StrCat(entry_point),
symbol_resolver);
}
// Helper function to update the symbol table entries for an executable file.
template <typename SymbolType>
void SimpleAssembler::UpdateSymbolsForExecutable(uint64_t text_segment_start,
uint64_t data_segment_start,
uint64_t bss_segment_start) {
auto num_symbols = symtab_->get_size() / sizeof(SymbolType);
auto size = num_symbols * sizeof(SymbolType);
auto* symbols = new SymbolType[num_symbols];
std::memcpy(symbols, symtab_->get_data(), size);
for (int i = 0; i < num_symbols; ++i) {
auto& sym = symbols[i];
auto shndx = sym.st_shndx;
std::string name = string_accessor_->get_string(sym.st_name);
if (global_symbols_.contains(name)) {
sym.st_info = ELF_ST_INFO(ELFIO::STB_GLOBAL, ELF_ST_TYPE(sym.st_info));
}
if ((text_section_ != nullptr) && (shndx == text_section_->get_index())) {
sym.st_value += text_segment_start;
} else if ((data_section_ != nullptr) &&
(shndx == data_section_->get_index())) {
sym.st_value += data_segment_start;
} else if ((bss_section_ != nullptr) &&
(shndx == bss_section_->get_index())) {
sym.st_value += bss_segment_start;
}
}
symtab_->set_data(reinterpret_cast<char*>(symbols), size);
delete[] symbols;
}
template <typename SymbolType>
void SimpleAssembler::UpdateSymbolsForRelocatable() {
auto num_symbols = symtab_->get_size() / sizeof(SymbolType);
auto size = num_symbols * sizeof(SymbolType);
auto* symbols = new SymbolType[num_symbols];
std::memcpy(symbols, symtab_->get_data(), size);
for (int i = 0; i < num_symbols; ++i) {
auto& sym = symbols[i];
std::string name = string_accessor_->get_string(sym.st_name);
if (global_symbols_.contains(name)) {
sym.st_info = ELF_ST_INFO(ELFIO::STB_GLOBAL, ELF_ST_TYPE(sym.st_info));
}
}
symtab_->set_data(reinterpret_cast<char*>(symbols), size);
delete[] symbols;
}
absl::Status SimpleAssembler::CreateExecutable(
uint64_t base_address, const std::string& entry_point,
ResolverInterface* symbol_resolver) {
if (!undefined_symbols_.empty()) {
std::string message;
absl::StrAppend(
&message,
"Cannot create executable with the following undefined symbols: ");
for (auto const& symbol : undefined_symbols_) {
absl::StrAppend(&message, " ", symbol, "\n");
}
return absl::InvalidArgumentError(message);
}
writer_.set_type(ELFIO::ET_EXEC);
// Section sizes are now known. So let's compute the layout and update all
// the symbol values/addresses before the next pass.
// The layout is:
// text segment starting at base address + any alignment.
// data segment starting at the end of the text segment + any alignment.
// The bss section is added to the end of the data segment + any alignment.
ELFIO::segment* text_segment = nullptr;
uint64_t text_segment_start = 0;
if (text_section_ != nullptr) {
text_segment_start = base_address & ~4095ULL;
text_segment = writer_.segments.add();
if (text_segment == nullptr) {
return absl::InternalError("Failed to create elf segment for text");
}
text_segment->set_type(ELFIO::PT_LOAD);
text_segment->set_virtual_address(text_segment_start);
text_segment->set_physical_address(text_segment_start);
text_segment->set_flags(ELFIO::PF_X | ELFIO::PF_R);
text_segment->set_align(4096);
}
ELFIO::segment* data_segment = nullptr;
uint64_t data_segment_start = 0;
uint64_t bss_segment_start = 0;
if ((data_section_ != nullptr) || (bss_section_ != nullptr)) {
if (data_segment_start_ != 0) {
data_segment_start = data_segment_start_;
} else {
data_segment_start =
(text_segment_start + section_address_map_[text_section_] + 4095) &
~4095ULL;
}
data_segment = writer_.segments.add();
if (data_segment == nullptr) {
return absl::InternalError("Failed to create elf segment for data");
}
data_segment->set_type(ELFIO::PT_LOAD);
data_segment->set_virtual_address(data_segment_start);
data_segment->set_physical_address(data_segment_start);
data_segment->set_flags(ELFIO::PF_W | ELFIO::PF_R);
data_segment->set_align(4096);
uint64_t bss_align = bss_section_->get_addr_align() - 1;
bss_segment_start =
(data_segment_start + section_address_map_[data_section_] + bss_align) &
~bss_align;
}
// Now we can update the symbol table based on the new section sizes.
// Different size symbol table entries for 32 and 64 bit ELF files.
if (elf_file_class_ == ELFIO::ELFCLASS64) {
UpdateSymbolsForExecutable<ELFIO::Elf64_Sym>(
text_segment_start, data_segment_start, bss_segment_start);
} else if (elf_file_class_ == ELFIO::ELFCLASS32) {
UpdateSymbolsForExecutable<ELFIO::Elf32_Sym>(
text_segment_start, data_segment_start, bss_segment_start);
} else {
return absl::InternalError(
absl::StrCat("Unsupported ELF file class: ", elf_file_class_));
}
// Update the section address map so that each section starts at the right
// address, i.e., it no longer tracks the offset within each section, but the
// absolute address.
section_address_map_[text_section_] = text_segment_start;
section_address_map_[data_section_] = data_segment_start;
section_address_map_[bss_section_] = bss_segment_start;
std::function<void()> cleanup = []() {};
if (symbol_resolver == nullptr) {
symbol_resolver =
new SymbolResolver(elf_file_class_, symtab_, symbol_indices_);
cleanup = [symbol_resolver]() { delete symbol_resolver; };
}
// Pass in the relocation vector to the second pass of parsing, but ignore
// the values, since we are creating an executable file, and all the symbols
// are resolved.
std::vector<RelocationInfo> relo_vector;
auto status = ParsePassTwo(relo_vector, symbol_resolver);
if (!status.ok()) {
cleanup();
return status;
}
// Add sections to the segments. First segment gets the text section. The
// second segment gets the data and bss sections.
if (text_segment != nullptr) {
text_segment->add_section_index(text_section_->get_index(),
text_section_->get_addr_align());
}
if (data_segment != nullptr) {
data_segment->add_section_index(data_section_->get_index(),
data_section_->get_addr_align());
data_segment->add_section_index(bss_section_->get_index(),
bss_section_->get_addr_align());
}
auto res = SimpleTextToInt<uint64_t>(entry_point, symbol_resolver);
if (!res.ok()) {
cleanup();
return res.status();
}
uint64_t entry_point_value = res.value();
symbol_accessor_->arrange_local_symbols();
writer_.set_entry(entry_point_value);
cleanup();
return absl::OkStatus();
}
namespace {
// Helper function to add a relocation entry to a relocation section.
template <typename RelocaType>
absl::Status AddRelocationEntries(
const std::vector<RelocationInfo>& relo_vector,
absl::flat_hash_map<std::string, ELFIO::Elf_Word>& symbol_indices,
ELFIO::section* reloca_section) {
for (auto const& relo : relo_vector) {
RelocaType rela;
rela.r_offset = relo.offset;
rela.r_addend = relo.addend;
auto iter = symbol_indices.find(relo.symbol);
if (iter == symbol_indices.end()) {
return absl::InvalidArgumentError(
absl::StrCat("Symbol '", relo.symbol, "' not found"));
}
if (sizeof(RelocaType) == sizeof(ELFIO::Elf64_Rela)) {
rela.r_info = ELF64_R_INFO(iter->second, relo.type);
} else {
rela.r_info = ELF32_R_INFO(iter->second, relo.type);
}
reloca_section->append_data(reinterpret_cast<const char*>(&rela),
sizeof(RelocaType));
}
return absl::OkStatus();
}
} // namespace
template <typename SymbolType>
void SimpleAssembler::UpdateSymtabHeaderInfo() {
int last_local = 0;
auto syms =
absl::MakeSpan(reinterpret_cast<const SymbolType*>(symtab_->get_data()),
symtab_->get_size() / sizeof(SymbolType));
for (int i = 0; i < syms.size(); ++i) {
auto name = string_accessor_->get_string(syms[i].st_name);
symbol_indices_.insert({name, i});
if (ELF_ST_BIND(syms[i].st_info) == ELFIO::STB_LOCAL) last_local = i;
}
symtab_->set_info(last_local + 1);
}
absl::Status SimpleAssembler::CreateRelocatable(
ResolverInterface* symbol_resolver) {
writer_.set_type(ELFIO::ET_REL);
// Reset the section address map to zero since we are creating a relocatable
// file.
section_address_map_[text_section_] = 0;
section_address_map_[data_section_] = 0;
section_address_map_[bss_section_] = 0;
// Since the symbols now are rearranged, we need to set global symbols flag
// for those in the global_symbols_ set.
// Different size symbol table entries for 32 and 64 bit ELF files.
if (elf_file_class_ == ELFIO::ELFCLASS64) {
UpdateSymbolsForRelocatable<ELFIO::Elf64_Sym>();
} else if (elf_file_class_ == ELFIO::ELFCLASS32) {
UpdateSymbolsForRelocatable<ELFIO::Elf32_Sym>();
} else {
return absl::InternalError(
absl::StrCat("Unsupported ELF file class: ", elf_file_class_));
}
// Rearrange local symbols in the symbol table so that they are at the
// beginning (ELF requirement).
symbol_accessor_->arrange_local_symbols(nullptr);
// Find the last local symbol and set the section header info for symbtab
// to point to 1 past that. Update the symbol_indices_ map.
symbol_indices_.clear();
if (elf_file_class_ == ELFIO::ELFCLASS64) {
UpdateSymtabHeaderInfo<ELFIO::Elf64_Sym>();
} else {
UpdateSymtabHeaderInfo<ELFIO::Elf32_Sym>();
}
std::function<void()> cleanup = []() {};
if (symbol_resolver == nullptr) {
symbol_resolver =
new SymbolResolver(elf_file_class_, symtab_, symbol_indices_);
cleanup = [symbol_resolver]() { delete symbol_resolver; };
}
// Parse the source again, collect relocations.
std::vector<RelocationInfo> relo_vector;
auto status = ParsePassTwo(relo_vector, symbol_resolver);
if (!status.ok()) {
cleanup();
return status;
}
// Handle relocations if there are any.
if (!relo_vector.empty()) {
// First scan through the entries relocation vector and group them by
// the section in which the relocation is to be applied.
absl::flat_hash_map<uint16_t, std::vector<RelocationInfo>> relo_map;
for (auto const& relo : relo_vector) {
relo_map[relo.section_index].push_back(relo);
}
for (auto const& [section_index, relo_vec] : relo_map) {
if (section_index == 0) {
cleanup();
return absl::InternalError(
"Relocation entry with section index 0 not supported");
}
if (!section_index_map_.contains(section_index)) {
cleanup();
return absl::InternalError(
absl::StrCat("Section index not found: ", section_index));
}
// Now, create a relocation section for each key in the map.
std::string name =
absl::StrCat(".rela", section_index_map_[section_index]->get_name());
auto* rela_section = writer_.sections.add(name);
rela_section->set_type(ELFIO::SHT_RELA);
rela_section->set_flags(ELFIO::SHF_INFO_LINK);
rela_section->set_entry_size(elf_file_class_ == ELFIO::ELFCLASS64
? sizeof(ELFIO::Elf64_Rela)
: sizeof(ELFIO::Elf32_Rela));
rela_section->set_link(symtab_->get_index());
rela_section->set_info(text_section_->get_index());
rela_section->set_addr_align(8);
// Process the relocation vector entries.
absl::Status status;
if (elf_file_class_ == ELFIO::ELFCLASS64) {
status = AddRelocationEntries<ELFIO::Elf64_Rela>(
relo_vec, symbol_indices_, rela_section);
} else if (elf_file_class_ == ELFIO::ELFCLASS32) {
status = AddRelocationEntries<ELFIO::Elf32_Rela>(
relo_vec, symbol_indices_, rela_section);
} else {
cleanup();
return absl::InternalError(
absl::StrCat("Unsupported ELF file class: ", elf_file_class_));
}
if (!status.ok()) {
cleanup();
return status;
}
}
}
cleanup();
return absl::OkStatus();
}
absl::Status SimpleAssembler::ParsePassTwo(
std::vector<RelocationInfo>& relo_vector,
ResolverInterface* symbol_resolver) {
// Now fill in the sections. Parse each of the lines saved in the first
// pass.
for (auto const& line : lines_) {
std::vector<uint8_t> byte_vector;
absl::Status status;
auto* section = current_section_;
auto relo_size = relo_vector.size();
auto address = section_address_map_[section];
if (line[0] == '.') {
auto status = ParseAsmDirective(line, address, symbol_resolver,
byte_vector, relo_vector);
} else {
auto status = ParseAsmStatement(line, address, symbol_resolver,
byte_vector, relo_vector);
}
if (!status.ok()) return status;
// Update section information in the relocation vector.
for (int i = relo_size; i < relo_vector.size(); ++i) {
relo_vector[i].section_index = section->get_index();
relo_vector[i].offset = address;
}
// Go to the next line if there is no data to add to the section.
if (byte_vector.empty()) continue;
// Add data to the section, but first make sure it's not bss.
if (section != bss_section_) {
if (section == nullptr) {
return absl::InternalError("Data is added to a null section");
}
section->append_data(reinterpret_cast<const char*>(byte_vector.data()),
byte_vector.size());
}
}
return absl::OkStatus();
}
// Top level function that writes the ELF file out to disk.
absl::Status SimpleAssembler::Write(std::ostream& os) {
writer_.save(os);
return absl::OkStatus();
}
// Parse and process an assembly directive. The assembly directive is
// expected to be in the form of a line starting with a period followed by a
// directive name and an optional argument. The argument is a string of
// tokens separated by spaces. The argument is parsed using regular
// expressions. The byte values are appended to the given vector.
absl::Status SimpleAssembler::ParseAsmDirective(
absl::string_view line, uint64_t address, ResolverInterface* resolver,
std::vector<uint8_t>& byte_values,
std::vector<RelocationInfo>& relocations) {
std::string match;
std::string remainder;
ELFIO::section* section = current_section_;
uint64_t size = 0;
std::string directive;
std::string label;
if (!RE2::FullMatch(line, asm_line_re_, &label, &directive)) {
return absl::InvalidArgumentError(
absl::StrCat("Invalid assembly line: '", line, "'"));
}
if (!RE2::FullMatch(directive, directive_re_, &match, &remainder)) {
return absl::InvalidArgumentError(
absl::StrCat("Invalid directive: '", directive, "'"));
}
if (match == "align") {
// .align <n>
if (section == nullptr) {
return absl::InvalidArgumentError(
absl::StrCat("No section for directive: '", directive, "'"));
}
auto res = SimpleTextToInt<uint64_t>(remainder);
if (!res.ok()) return res.status();
uint64_t align = res.value();
// Verify that the alignment is a power of two.
if ((align & (align - 1)) != 0) {
return absl::InvalidArgumentError(
absl::StrCat("Invalid alignment: '", directive, "'"));
}
uint64_t address = section_address_map_[section];
size = ((address + align - 1) & ~(align - 1)) - address;
} else if (match == "bss") {
// .bss
SetBssSection(".bss");
} else if (match == "bytes") {
// .bytes
auto res = GetValues<uint8_t>(remainder, resolver);
if (!res.ok()) return res.status();
auto values = res.value();
size = values.size();
for (auto const& value : values) byte_values.push_back(value);
} else if (match == "char") {
// .char
auto res = GetValues<char>(remainder, resolver);
if (!res.ok()) return res.status();
auto values = res.value();
size = values.size();
for (auto const& value : values) byte_values.push_back(value);
} else if (match == "cstring") {
// .cstring
auto res = GetValues<std::string>(remainder, resolver);
if (!res.ok()) return res.status();
auto values = res.value();
size = 0;
for (auto const& value : values) {
for (auto const& c : value) byte_values.push_back(c);
byte_values.push_back('\0');
size += value.size() + 1;
}
} else if (match == "data") {
// .data
SetDataSection(".data");
} else if (match == "global") {
// .global <name>
auto res = GetLabels(remainder);
if (!res.ok()) return res.status();
auto values = res.value();
for (auto const& value : values) {
global_symbols_.insert(value);
}
} else if (match == "long") {
// .long
auto res = GetValues<int64_t>(remainder);
if (!res.ok()) return res.status();
auto values = res.value();
size = values.size() * sizeof(int64_t);
ConvertToBytes<int64_t>(values, byte_values);
} else if (match == "sect") {
// .section <name>,<type>
// TODO(torerik): Implement.
return absl::UnimplementedError("Section directive not implemented");
} else if (match == "short") {
// .short
auto res = GetValues<int16_t>(remainder);
if (!res.ok()) return res.status();
auto values = res.value();
size = values.size() * sizeof(int16_t);
ConvertToBytes<int16_t>(values, byte_values);
} else if (match == "space") {
// .space <n>
auto res = SimpleTextToInt<uint64_t>(remainder);
if (!res.ok()) return res.status();
size = res.value();
} else if (match == "string") {
// .string
auto res = GetValues<std::string>(remainder);
if (!res.ok()) return res.status();
auto values = res.value();
size = 0;
for (auto const& value : values) {
for (auto const& c : value) byte_values.push_back(c);
size += value.size();
}
} else if (match == "text") {
// .text
SetTextSection(".text");
} else if (match == "uchar") {
// .uchar
auto res = GetValues<uint8_t>(remainder);
if (!res.ok()) return res.status();
auto values = res.value();
size = values.size();
for (auto const& value : values) byte_values.push_back(value);
} else if (match == "ulong") {
// .ulong
auto res = GetValues<uint64_t>(remainder);
if (!res.ok()) return res.status();
auto values = res.value();
size = values.size() * sizeof(uint64_t);
ConvertToBytes<uint64_t>(values, byte_values);
} else if (match == "ushort") {
// .ushort
auto res = GetValues<uint16_t>(remainder);
if (!res.ok()) return res.status();
auto values = res.value();
size = values.size() * sizeof(uint16_t);
ConvertToBytes<uint16_t>(values, byte_values);
} else if (match == "uword") {
// .uword
auto res = GetValues<uint32_t>(remainder);
if (!res.ok()) return res.status();
auto values = res.value();
size = values.size() * sizeof(uint32_t);
ConvertToBytes<uint32_t>(values, byte_values);
} else if (match == "word") {
// .word
auto res = GetValues<int32_t>(remainder);
if (!res.ok()) return res.status();
auto values = res.value();
size = values.size() * sizeof(int32_t);
ConvertToBytes<int32_t>(values, byte_values);
} else {
return absl::InvalidArgumentError(
absl::StrCat("Unsupported directive: '", directive, "'"));
}
if ((size > 0) && (section != nullptr)) {
if (!section_address_map_.contains(section)) {
return absl::InternalError(
absl::StrCat("No address for section '", section->get_name(), "'"));
}
section_address_map_[section] += size;
}
if (!label.empty()) {
// When initially adding symbols, the address is relative to the start
// of the containing section. This will be corrected later.
if (section == nullptr) {
return absl::InvalidArgumentError(
absl::StrCat("Label: '", label, "' defined outside of a section"));
}
auto status = AddSymbol(label, address, size, ELFIO::STT_NOTYPE,
ELFIO::STB_LOCAL, 0, section);
}
return absl::OkStatus();
}
// Parse and process an assembly statement. The assembly statement is
// expected to be a single line of text. The byte values are appended to the
// given vector.
absl::Status SimpleAssembler::ParseAsmStatement(
absl::string_view line, uint64_t address, ResolverInterface* resolver,
std::vector<uint8_t>& byte_values,
std::vector<RelocationInfo>& relocations) {
// Call the target specific assembler to encode the statement.
auto result = opcode_assembler_if_->Encode(
address, line,
absl::bind_front(&SimpleAssembler::AddSymbolToCurrentSection, this),
resolver, byte_values, relocations);
if (!result.ok()) return result.status();
section_address_map_[current_section_] += result.value();
return absl::OkStatus();
}
void SimpleAssembler::SetTextSection(const std::string& name) {
// First check if the section already exists.
auto* section = writer_.sections[name];
if (section != nullptr) {
current_section_ = section;
return;
}
section = writer_.sections.add(name);
auto status =
AddSymbol(name, 0, 0, ELFIO::STT_SECTION, ELFIO::STB_LOCAL, 0, section);
if (!status.ok()) {
LOG(ERROR) << "Failed to add symbol for data section: " << status.message();
}
section->set_type(ELFIO::SHT_PROGBITS);
section->set_flags(ELFIO::SHF_ALLOC | ELFIO::SHF_EXECINSTR);
section->set_addr_align(0x10);
// Should probably add the section symbol to the symbol table.
current_section_ = section;
text_section_ = section;
section_index_map_.insert({section->get_index(), text_section_});
}
void SimpleAssembler::SetDataSection(const std::string& name) {
// First check if the section already exists.
auto* section = writer_.sections[name];
if (section != nullptr) {
current_section_ = section;
return;
}
section = writer_.sections.add(name);
auto status =
AddSymbol(name, 0, 0, ELFIO::STT_SECTION, ELFIO::STB_LOCAL, 0, section);
if (!status.ok()) {
LOG(ERROR) << "Failed to add symbol for data section: " << status.message();
}
section->set_type(ELFIO::SHT_PROGBITS);
section->set_flags(ELFIO::SHF_ALLOC | ELFIO::SHF_WRITE);
section->set_addr_align(0x10);
// Should probably add the section symbol to the symbol table.
current_section_ = section;
data_section_ = section;
section_index_map_.insert({section->get_index(), data_section_});
}
void SimpleAssembler::SetBssSection(const std::string& name) {
// First check if the section already exists.
auto* section = writer_.sections[name];
if (section != nullptr) {
current_section_ = section;
return;
}
section = writer_.sections.add(name);
auto status =
AddSymbol(name, 0, 0, ELFIO::STT_SECTION, ELFIO::STB_LOCAL, 0, section);
if (!status.ok()) {
LOG(ERROR) << "Failed to add symbol for bss section: " << status.message();
}
section->set_type(ELFIO::SHT_NOBITS);
section->set_flags(ELFIO::SHF_ALLOC | ELFIO::SHF_WRITE);
section->set_addr_align(0x10);
// Should probably add the section symbol to the symbol table.
current_section_ = section;
bss_section_ = section;
section_index_map_.insert({section->get_index(), bss_section_});
}
absl::Status SimpleAssembler::AddSymbolToCurrentSection(
const std::string& name, ELFIO::Elf64_Addr value, ELFIO::Elf_Xword size,
uint8_t type, uint8_t binding, uint8_t other) {
return AddSymbol(name, value, size, type, binding, other, current_section_);
}
absl::Status SimpleAssembler::AddSymbol(const std::string& name,
ELFIO::Elf64_Addr value,
ELFIO::Elf_Xword size, uint8_t type,
uint8_t binding, uint8_t other,
const std::string& section_name) {
ELFIO::section* section = nullptr;
if (!section_name.empty()) {
section = writer_.sections[section_name];
if (section == nullptr) {
return absl::InvalidArgumentError(
absl::StrCat("Section '", section_name, "' not found"));
}
}
return AddSymbol(name, value, size, type, binding, other, section);
}
absl::Status SimpleAssembler::AddSymbol(const std::string& name,
ELFIO::Elf64_Addr value,
ELFIO::Elf_Xword size, uint8_t type,
uint8_t binding, uint8_t other,
ELFIO::section* section) {
auto iter = symbol_indices_.find(name);
if (iter != symbol_indices_.end()) {
return absl::AlreadyExistsError(
absl::StrCat("Symbol '", name, "' already exists"));
}
auto index = symbol_accessor_->add_symbol(
*string_accessor_, name.c_str(), value, size, binding, type, other,
section == nullptr ? ELFIO::SHN_UNDEF : section->get_index());
symbol_indices_.insert({name, index});
// If this is not an undefined symbol reference, then see if the symbol name
// is part of the "current" undefined symbols, and if so, remove it.
if (section != nullptr) {
auto iter = undefined_symbols_.find(name);
if (iter != undefined_symbols_.end()) {
undefined_symbols_.erase(iter);
}
}
return absl::OkStatus();
}
void SimpleAssembler::SimpleAddSymbol(absl::string_view name) {
// If the symbol exists, then just return.
if (symbol_indices_.contains(name)) return;
if (undefined_symbols_.contains(name)) return;
std::string name_str(name);
undefined_symbols_.insert(name_str);
}
absl::Status SimpleAssembler::AppendData(const char* data, size_t size) {
if (current_section_ == nullptr) {
return absl::FailedPreconditionError("No current section");
}
current_section_->append_data(data, size);
return absl::OkStatus();
}
} // namespace assembler
} // namespace util
} // namespace sim
} // namespace mpact