Initial commit for adding capability to generate a simple assembler from the .isa and .bin_fmt instruction set and encoding description files. PiperOrigin-RevId: 708362970 Change-Id: I6943199a81c5a22053a5ed2bb4cc888d87d49fc2
diff --git a/mpact/sim/decoder/BUILD b/mpact/sim/decoder/BUILD index d84909f..75511a5 100644 --- a/mpact/sim/decoder/BUILD +++ b/mpact/sim/decoder/BUILD
@@ -120,6 +120,7 @@ "@com_google_absl//absl/container:btree", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/log", "@com_google_absl//absl/memory", "@com_google_absl//absl/numeric:bits", "@com_google_absl//absl/status",
diff --git a/mpact/sim/decoder/bin_encoding_info.h b/mpact/sim/decoder/bin_encoding_info.h index 0709a89..918198c 100644 --- a/mpact/sim/decoder/bin_encoding_info.h +++ b/mpact/sim/decoder/bin_encoding_info.h
@@ -75,6 +75,7 @@ return include_files_; } BinDecoder *decoder() const { return decoder_; } + std::string opcode_enum() const { return opcode_enum_; } private: std::string opcode_enum_;
diff --git a/mpact/sim/decoder/bin_format_visitor.cc b/mpact/sim/decoder/bin_format_visitor.cc index ccd01d6..060e102 100644 --- a/mpact/sim/decoder/bin_format_visitor.cc +++ b/mpact/sim/decoder/bin_format_visitor.cc
@@ -22,6 +22,7 @@ #include <list> #include <memory> #include <string> +#include <tuple> #include <utility> #include <vector> @@ -175,24 +176,43 @@ ProcessSpecializations(encoding_info.get()); // Create output streams for .h and .cc files. - std::string dot_h_name = absl::StrCat(prefix, "_bin_decoder.h"); - std::string dot_cc_name = absl::StrCat(prefix, "_bin_decoder.cc"); - std::ofstream dot_h_file(absl::StrCat(directory, "/", dot_h_name)); - std::ofstream dot_cc_file(absl::StrCat(directory, "/", dot_cc_name)); + std::string dec_dot_h_name = absl::StrCat(prefix, "_bin_decoder.h"); + std::string dec_dot_cc_name = absl::StrCat(prefix, "_bin_decoder.cc"); + std::string enc_dot_h_name = absl::StrCat(prefix, "_bin_encoder.h"); + std::string enc_dot_cc_name = absl::StrCat(prefix, "_bin_encoder.cc"); + std::ofstream dec_dot_h_file(absl::StrCat(directory, "/", dec_dot_h_name)); + std::ofstream dec_dot_cc_file(absl::StrCat(directory, "/", dec_dot_cc_name)); + std::ofstream enc_dot_h_file(absl::StrCat(directory, "/", enc_dot_h_name)); + std::ofstream enc_dot_cc_file(absl::StrCat(directory, "/", enc_dot_cc_name)); - auto [h_output, cc_output] = EmitFilePrefix(dot_h_name, encoding_info.get()); - dot_h_file << h_output; - dot_cc_file << cc_output; + auto [dec_h_output, dec_cc_output] = + EmitDecoderFilePrefix(dec_dot_h_name, encoding_info.get()); + dec_dot_h_file << dec_h_output; + dec_dot_cc_file << dec_cc_output; + auto [enc_h_output, enc_cc_output] = + EmitEncoderFilePrefix(enc_dot_h_name, encoding_info.get()); + enc_dot_h_file << enc_h_output; + enc_dot_cc_file << enc_cc_output; // Output file prefix is the input file name. - auto [h_output2, cc_output2] = EmitCode(encoding_info.get()); - dot_h_file << h_output2; - dot_cc_file << cc_output2; - auto [h_output3, cc_output3] = - EmitFileSuffix(dot_h_name, encoding_info.get()); - dot_h_file << h_output3; - dot_cc_file << cc_output3; - dot_h_file.close(); - dot_cc_file.close(); + auto [dec_h_output2, dec_cc_output2] = EmitDecoderCode(encoding_info.get()); + dec_dot_h_file << dec_h_output2; + dec_dot_cc_file << dec_cc_output2; + auto [dec_h_output3, dec_cc_output3] = + EmitFileSuffix(dec_dot_h_name, encoding_info.get()); + dec_dot_h_file << dec_h_output3; + dec_dot_cc_file << dec_cc_output3; + auto [enc_h_output2, enc_cc_output2] = EmitEncoderCode(encoding_info.get()); + enc_dot_h_file << enc_h_output2; + enc_dot_cc_file << enc_cc_output2; + auto [enc_h_output3, enc_cc_output3] = + EmitFileSuffix(enc_dot_h_name, encoding_info.get()); + enc_dot_h_file << enc_h_output3; + enc_dot_cc_file << enc_cc_output3; + + dec_dot_h_file.close(); + dec_dot_cc_file.close(); + enc_dot_h_file.close(); + enc_dot_cc_file.close(); return absl::OkStatus(); } @@ -200,8 +220,8 @@ encoding->decoder()->CheckEncodings(); } -BinFormatVisitor::StringPair BinFormatVisitor::EmitFilePrefix( - const std::string &dot_h_name, BinEncodingInfo *encoding_info) { +BinFormatVisitor::StringPair BinFormatVisitor::EmitDecoderFilePrefix( + const std::string &dot_h_name, BinEncodingInfo *encoding_info) const { std::string h_string; std::string cc_string; @@ -262,7 +282,7 @@ return {h_string, cc_string}; } -BinFormatVisitor::StringPair BinFormatVisitor::EmitCode( +BinFormatVisitor::StringPair BinFormatVisitor::EmitDecoderCode( BinEncodingInfo *encoding) { std::string h_string; std::string cc_string; @@ -276,11 +296,10 @@ absl::StrAppend(&extractor_class, classes); } absl::StrAppend(&h_string, extractor_class, "};\n\n"); - absl::flat_hash_set<std::string> groups; auto *decoder = encoding->decoder(); // Generate the code for decoders. for (auto *group : decoder->instruction_group_vec()) { - auto [h_decoder, cc_decoder] = group->EmitCode(); + auto [h_decoder, cc_decoder] = group->EmitDecoderCode(); absl::StrAppend(&h_string, h_decoder); absl::StrAppend(&cc_string, cc_decoder); // Write out some summary information about the instruction encodings. @@ -291,6 +310,71 @@ return {h_string, cc_string}; } +std::tuple<std::string, std::string> BinFormatVisitor::EmitEncoderFilePrefix( + const std::string &dot_h_name, BinEncodingInfo *encoding_info) const { + std::string h_string; + std::string cc_string; + + std::string guard_name = ToHeaderGuard(dot_h_name); + absl::StrAppend(&h_string, "#ifndef ", guard_name, + "\n" + "#define ", + guard_name, + "\n" + "\n" + "#include <iostream>\n" + "#include <cstdint>\n\n"); + absl::StrAppend(&cc_string, "#include \"", dot_h_name, + "\"\n\n" + "#include <cstdint>\n\n"); + for (auto &name_space : encoding_info->decoder()->namespaces()) { + auto name_space_str = absl::StrCat("namespace ", name_space, " {\n"); + absl::StrAppend(&cc_string, name_space_str); + absl::StrAppend(&h_string, name_space_str); + } + absl::StrAppend(&h_string, "\n"); + absl::StrAppend(&cc_string, "\n"); + return std::tie(h_string, cc_string); +} + +std::tuple<std::string, std::string> BinFormatVisitor::EmitEncoderCode( + BinEncodingInfo *encoding) { + std::string h_string; + std::string cc_string; + // Write out the inline functions for bitfield and overlay encoding. + absl::StrAppend(&h_string, "struct Encoder {\n\n"); + for (auto &[unused, format_ptr] : encoding->format_map()) { + auto functions = format_ptr->GenerateInserters(); + absl::StrAppend(&h_string, functions); + } + absl::StrAppend(&h_string, "}; // struct Encoder\n\n"); + absl::flat_hash_set<std::string> groups; + auto *decoder = encoding->decoder(); + // Generate the code for decoders. + absl::btree_map<std::string, std::tuple<uint64_t, int>> encodings; + for (auto *group : decoder->instruction_group_vec()) { + group->GetInstructionEncodings(encodings); + } + std::string opcode_enum = encoding->opcode_enum(); + absl::StrAppend(&h_string, + "extern const std::tuple<uint64_t, int> kOpcodeEncodings[", + encodings.size() + 1, "];\n"); + absl::StrAppend(&cc_string, + "const std::tuple<uint64_t, int> kOpcodeEncodings[", + encodings.size() + 1, "] = {\n"); + absl::StrAppend(&cc_string, " /* ", opcode_enum, + "::kNone = */ {0x0ULL, 0},\n"); + for (auto &[name, pair] : encodings) { + auto [value, width] = pair; + std::string enum_name = + absl::StrCat(opcode_enum, "::k", ToPascalCase(name)); + absl::StrAppend(&cc_string, " /* ", enum_name, " = */ {0x", + absl::Hex(value), "ULL, ", width, "},\n"); + } + absl::StrAppend(&cc_string, "};\n"); + return std::tie(h_string, cc_string); +} + // Parse the range and convert to a BitRange. BitRange BinFormatVisitor::GetBitIndexRange(BitIndexRangeCtx *ctx) { int start = ConvertToInt(ctx->number(0));
diff --git a/mpact/sim/decoder/bin_format_visitor.h b/mpact/sim/decoder/bin_format_visitor.h index f10455a..c8aec08 100644 --- a/mpact/sim/decoder/bin_format_visitor.h +++ b/mpact/sim/decoder/bin_format_visitor.h
@@ -20,6 +20,7 @@ #include <list> #include <memory> #include <string> +#include <tuple> #include <utility> #include <vector> @@ -93,9 +94,16 @@ void PerformEncodingChecks(BinEncodingInfo *encoding); // Called to generate and emit code for the decoder according to the parsed // input file. - StringPair EmitCode(BinEncodingInfo *encoding); - StringPair EmitFilePrefix(const std::string &dot_h_name, - BinEncodingInfo *encoding_info); + StringPair EmitDecoderCode(BinEncodingInfo *encoding); + StringPair EmitDecoderFilePrefix(const std::string &dot_h_name, + BinEncodingInfo *encoding_info) const; + // Called to generate and emit code for the decoder according to the parsed + // input file. + std::tuple<std::string, std::string> EmitEncoderCode( + BinEncodingInfo *encoding); + std::tuple<std::string, std::string> EmitEncoderFilePrefix( + const std::string &dot_h_name, BinEncodingInfo *encoding_info) const; + // Generate the file suffixes (namespace closing etc.) StringPair EmitFileSuffix(const std::string &dot_h_name, BinEncodingInfo *encoding_info); // Utility methods to parse certain nodes.
diff --git a/mpact/sim/decoder/bundle.h b/mpact/sim/decoder/bundle.h index a149ebc..ae58095 100644 --- a/mpact/sim/decoder/bundle.h +++ b/mpact/sim/decoder/bundle.h
@@ -15,15 +15,12 @@ #ifndef LMPACT_SIM_DECODER_BUNDLE_H_ #define LMPACT_SIM_DECODER_BUNDLE_H_ -#include <iostream> #include <string> #include <utility> #include <vector> -#include "absl/container/flat_hash_map.h" #include "absl/strings/string_view.h" #include "mpact/sim/decoder/instruction_set_contexts.h" -#include "mpact/sim/decoder/opcode.h" #include "mpact/sim/decoder/slot.h" namespace mpact {
diff --git a/mpact/sim/decoder/format.cc b/mpact/sim/decoder/format.cc index 3997dae..c107833 100644 --- a/mpact/sim/decoder/format.cc +++ b/mpact/sim/decoder/format.cc
@@ -388,6 +388,116 @@ return h_output; } +// This method generates the C++ code for field inserters for the current +// format. That is, the generated code will take the value of a field and insert +// it into the right place in the instruction word. +std::string Format::GenerateFieldInserter(const Field *field) const { + std::string h_output; + absl::StrAppend(&h_output, "static inline uint64_t Insert", + ToPascalCase(field->name), + "(uint64_t value, uint64_t inst_word) {\n"); + if (declared_width_ <= 64) { + uint64_t mask = ((1ULL << field->width) - 1) << field->low; + std::string shift; + if (field->low != 0) { + shift = absl::StrCat(" << ", field->low); + } + absl::StrAppend(&h_output, " inst_word = (inst_word & ~0x", + absl::Hex(mask), "ULL)", " | ((value", shift, ") & 0x", + absl::Hex(mask), "ULL);\n"); + } else { + absl::StrAppend( + &h_output, + " #error Support for formats > 64 bits not implemented - yet."); + } + absl::StrAppend(&h_output, + " return inst_word;\n" + "}\n"); + return h_output; +} + +// This method generates the C++ code for overlay inserters for the current +// format. That is, the generated code will take the value of an overlay and +// insert its components into the right places in the instruction word. +std::string Format::GenerateOverlayInserter(Overlay *overlay) const { + std::string h_output; + absl::StrAppend(&h_output, "static inline uint64_t Insert", + ToPascalCase(overlay->name()), + "(uint64_t value, uint64_t inst_word) {\n"); + // Mark error if either the overlay or the format is > 64 bits. + if (overlay->declared_width() > 64) { + absl::StrAppend(&h_output, + " #error Support for overlays > 64 bits not implemented - " + "yet.\n}\n"); + return h_output; + } + if (computed_width_ > 64) { + absl::StrAppend(&h_output, + " #error Support for formats > 64 bits not implemented - " + "yet.\n}\n"); + return h_output; + } + absl::StrAppend(&h_output, " uint64_t tmp;\n"); + // Track the leftmost bit in the overlay. + int left = overlay->declared_width(); + for (auto &bits_or_field : overlay->component_vec()) { + int width = bits_or_field->width(); + // Ignore the bit fields in the overlay. + if (bits_or_field->high() < 0) { + left -= width; + continue; + } + uint64_t mask = ((1ULL << width) - 1); + std::string shift; + if (left - width > 0) { + shift = absl::StrCat(" >> ", left - width); + } + // Extract the bits from the overlay value for the current component. + absl::StrAppend(&h_output, " tmp = (value ", shift, ") & 0x", mask, + "ULL;\n"); + shift.clear(); + if (bits_or_field->low() != 0) { + shift = absl::StrCat(" << ", bits_or_field->low()); + } + absl::StrAppend(&h_output, " inst_word |= (tmp ", shift, + ");\n" + " return inst_word;\n"); + left -= width; + } + absl::StrAppend(&h_output, "}\n"); + return h_output; +} + +// This method generates the C++ code for format inserters for the current +// format. That is, the generated code will take the value of a format and +// insert it into the right place in the instruction word. +std::string Format::GenerateFormatInserter(std::string_view format_alias, + const Format *format, int high, + int size) const { + std::string h_output; + std::string target_type_name = absl::StrCat("u", GetIntType(computed_width_)); + absl::StrAppend(&h_output, "static inline uint64_tInsert", + ToPascalCase(format_alias), + "(uint64_t value, uint64_t inst_word) {\n"); + if (declared_width_ > 64) { + absl::StrAppend(&h_output, + " #error Support for formats > 64 bits not implemented - " + "yet.\n}\n"); + return h_output; + } + int width = format->declared_width(); + int low = high - width + 1; + uint64_t mask = (1ULL << width) << low; + std::string shift; + if (low != 0) { + shift = absl::StrCat(" << ", low); + } + absl::StrAppend(&h_output, " return (inst_word & (~0x", absl::Hex(mask), + "ULL))", " | ((value ", shift, ") & 0x", absl::Hex(mask), + "ULL);\n}\n"); + return h_output; +} + // This method generates the format extractors for the current format (for when // a format contains other formats). std::string Format::GenerateFormatExtractor(absl::string_view format_alias, @@ -485,8 +595,37 @@ return h_output; } +// Top level function called to generate all the inserters for this format. +std::string Format::GenerateInserters() const { + std::string class_output; + std::string h_output; + if (extractors_.empty() && overlay_extractors_.empty()) { + return h_output; + } + absl::StrAppend(&h_output, "struct ", ToPascalCase(name()), " {\n\n"); + // First fields and formats. + for (auto &[unused, field_or_format_ptr] : extractors_) { + if (field_or_format_ptr->is_field()) { + auto inserter = GenerateFieldInserter(field_or_format_ptr->field()); + absl::StrAppend(&h_output, inserter); + } else { + auto inserter = GenerateFormatInserter( + field_or_format_ptr->format_alias(), field_or_format_ptr->format(), + field_or_format_ptr->high(), field_or_format_ptr->size()); + absl::StrAppend(&h_output, inserter); + } + } + // Next the overlays. + for (auto &[unused, overlay_ptr] : overlay_extractors_) { + auto inserter = GenerateOverlayInserter(overlay_ptr); + absl::StrAppend(&h_output, inserter); + } + absl::StrAppend(&h_output, "}; // struct ", ToPascalCase(name()), "\n\n"); + return h_output; +} + // Top level function called to generate all the extractors for this format. -std::tuple<std::string, std::string> Format::GenerateExtractors() { +std::tuple<std::string, std::string> Format::GenerateExtractors() const { std::string class_output; std::string h_output; if (extractors_.empty() && overlay_extractors_.empty()) {
diff --git a/mpact/sim/decoder/format.h b/mpact/sim/decoder/format.h index 76cc38f..1ebfbbe 100644 --- a/mpact/sim/decoder/format.h +++ b/mpact/sim/decoder/format.h
@@ -15,7 +15,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include <map> #include <string> #include <tuple> #include <vector> @@ -140,7 +139,9 @@ void PropagateExtractorsUp(); void PropagateExtractorsDown(); // Generates definitions of the field and overlay extractors in the format. - std::tuple<std::string, std::string> GenerateExtractors(); + std::tuple<std::string, std::string> GenerateExtractors() const; + // Generates definitions of the field and overlay inserters in the format. + std::string GenerateInserters() const; // True if the current format is a descendent of format. bool IsDerivedFrom(const Format *format); @@ -165,6 +166,12 @@ const Format *format, int high, int size) const; std::string GenerateOverlayExtractor(Overlay *overlay) const; + // Inserters. + std::string GenerateFieldInserter(const Field *field) const; + std::string GenerateFormatInserter(std::string_view format_alias, + const Format *format, int high, + int size) const; + std::string GenerateOverlayInserter(Overlay *overlay) const; // Return string representation of the int type that contains bitwidth bits. std::string GetIntType(int bitwidth) const; int GetIntTypeBitWidth(int bitwidth) const;
diff --git a/mpact/sim/decoder/instruction_group.cc b/mpact/sim/decoder/instruction_group.cc index 4b4e394..527c0f4 100644 --- a/mpact/sim/decoder/instruction_group.cc +++ b/mpact/sim/decoder/instruction_group.cc
@@ -21,6 +21,7 @@ #include <tuple> #include <utility> +#include "absl/container/btree_map.h" #include "absl/status/status.h" #include "absl/strings/str_cat.h" #include "antlr4-runtime/Token.h" @@ -28,6 +29,7 @@ #include "mpact/sim/decoder/decoder_error_listener.h" #include "mpact/sim/decoder/encoding_group.h" #include "mpact/sim/decoder/extract.h" +#include "mpact/sim/decoder/format_name.h" #include "mpact/sim/decoder/instruction_encoding.h" namespace mpact { @@ -35,6 +37,8 @@ namespace decoder { namespace bin_format { +using ::mpact::sim::machine_description::instruction_set::ToPascalCase; + InstructionGroup::InstructionGroup(std::string name, int width, std::string format_name, std::string opcode_enum, @@ -174,68 +178,77 @@ } // Emit the code in the form of two strings that are returned in a tuple. -std::tuple<std::string, std::string> InstructionGroup::EmitCode() { +std::tuple<std::string, std::string> InstructionGroup::EmitDecoderCode() { std::string h_string; std::string cc_string; + if (encoding_group_vec_.empty()) return std::make_tuple(h_string, cc_string); + // First sort the encoding group vector according to the value of the // discriminator bits. std::sort(encoding_group_vec_.begin(), encoding_group_vec_.end(), &InstructionGroupLess); - if (!encoding_group_vec_.empty()) { - std::string initializers; - // The signature for the top level decode function for this instruction - // group. - std::string signature = - absl::StrCat(opcode_enum_, " Decode", this->name(), "(", - format_->uint_type_name(), " inst_word)"); - std::string w_format_signature = absl::StrCat( - "std::pair<", opcode_enum_, ", FormatEnum> Decode", this->name(), - "WithFormat(", format_->uint_type_name(), " inst_word)"); - // First part of the definition of the top level decoder function. - std::string top_level_decoder = absl::StrCat(signature, " {\n"); - std::string w_format_top_level_decoder = - absl::StrCat(w_format_signature, " {\n"); - std::string declarations = - absl::StrCat("std::pair<", opcode_enum_, ", FormatEnum> Decode", - this->name(), "None(", format_->uint_type_name(), ");\n"); - std::string definitions = absl::StrCat( - "std::pair<", opcode_enum_, ", FormatEnum> Decode", this->name(), - "None(", format_->uint_type_name(), ") {\n return std::make_pair(", - opcode_enum_, "::kNone, FormatEnum::kNone);\n}\n\n"); - for (size_t i = 0; i < encoding_group_vec_.size(); i++) { - auto *grp = encoding_group_vec_[i]; - std::string name = absl::StrCat(this->name(), "_", absl::Hex(i)); - grp->EmitInitializers(name, &initializers, opcode_enum_); - grp->EmitDecoders(name, &declarations, &definitions, opcode_enum_); - absl::StrAppend(&top_level_decoder, " auto opcode = Decode", name, - "(inst_word).first;\n"); - absl::StrAppend(&w_format_top_level_decoder, - " auto opcode_format = Decode", name, "(inst_word);\n"); - if (encoding_group_vec_.size() > 1) { - absl::StrAppend(&top_level_decoder, " if (opcode != ", opcode_enum_, - "::kNone) return opcode;\n"); - absl::StrAppend(&w_format_top_level_decoder, - " if (opcode_format.first != ", opcode_enum_, - "::kNone) return opcode_format;\n"); - } - } - // Last part of the definition of the top level decoder function. - absl::StrAppend(&top_level_decoder, - " return opcode;\n" - "}\n"); + std::string initializers; + // The signature for the top level decode function for this instruction + // group. + std::string signature = + absl::StrCat(opcode_enum_, " Decode", this->name(), "(", + format_->uint_type_name(), " inst_word)"); + std::string w_format_signature = absl::StrCat( + "std::pair<", opcode_enum_, ", FormatEnum> Decode", this->name(), + "WithFormat(", format_->uint_type_name(), " inst_word)"); + // First part of the definition of the top level decoder function. + std::string top_level_decoder = absl::StrCat(signature, " {\n"); + std::string w_format_top_level_decoder = + absl::StrCat(w_format_signature, " {\n"); + std::string declarations = + absl::StrCat("std::pair<", opcode_enum_, ", FormatEnum> Decode", + this->name(), "None(", format_->uint_type_name(), ");\n"); + std::string definitions = absl::StrCat( + "std::pair<", opcode_enum_, ", FormatEnum> Decode", this->name(), "None(", + format_->uint_type_name(), ") {\n return std::make_pair(", opcode_enum_, + "::kNone, FormatEnum::kNone);\n}\n\n"); + for (size_t i = 0; i < encoding_group_vec_.size(); i++) { + auto *grp = encoding_group_vec_[i]; + std::string name = absl::StrCat(this->name(), "_", absl::Hex(i)); + grp->EmitInitializers(name, &initializers, opcode_enum_); + grp->EmitDecoders(name, &declarations, &definitions, opcode_enum_); + absl::StrAppend(&top_level_decoder, " auto opcode = Decode", name, + "(inst_word).first;\n"); absl::StrAppend(&w_format_top_level_decoder, - " return opcode_format;\n" - "}\n"); - // String the different strings together in order and return. - absl::StrAppend(&cc_string, declarations, initializers, definitions, - top_level_decoder, w_format_top_level_decoder); - absl::StrAppend(&h_string, signature, ";\n", w_format_signature, ";\n"); + " auto opcode_format = Decode", name, "(inst_word);\n"); + if (encoding_group_vec_.size() > 1) { + absl::StrAppend(&top_level_decoder, " if (opcode != ", opcode_enum_, + "::kNone) return opcode;\n"); + absl::StrAppend(&w_format_top_level_decoder, + " if (opcode_format.first != ", opcode_enum_, + "::kNone) return opcode_format;\n"); + } } + // Last part of the definition of the top level decoder function. + absl::StrAppend(&top_level_decoder, + " return opcode;\n" + "}\n"); + absl::StrAppend(&w_format_top_level_decoder, + " return opcode_format;\n" + "}\n"); + // String the different strings together in order and return. + absl::StrAppend(&cc_string, declarations, initializers, definitions, + top_level_decoder, w_format_top_level_decoder); + absl::StrAppend(&h_string, signature, ";\n", w_format_signature, ";\n"); return std::make_tuple(h_string, cc_string); } +// Emit code to encode the instructions in the group. +void InstructionGroup::GetInstructionEncodings( + absl::btree_map<std::string, std::tuple<uint64_t, int>> &encodings) { + for (auto *enc : encoding_vec_) { + encodings.insert(std::make_pair(ToPascalCase(enc->name()), + std::make_tuple(enc->GetValue(), width()))); + } +} + // Write out instruction group information. std::string InstructionGroup::WriteGroup() { std::string output;
diff --git a/mpact/sim/decoder/instruction_group.h b/mpact/sim/decoder/instruction_group.h index 40459e2..f235daa 100644 --- a/mpact/sim/decoder/instruction_group.h +++ b/mpact/sim/decoder/instruction_group.h
@@ -56,7 +56,10 @@ // Check encodings for duplicates etc. void CheckEncodings(); // Generate and emit code for decoding this instruction group. - std::tuple<std::string, std::string> EmitCode(); + std::tuple<std::string, std::string> EmitDecoderCode(); + // Collect the encodings for these instructions. + void GetInstructionEncodings( + absl::btree_map<std::string, std::tuple<uint64_t, int>> &encodings); // Return a string containing information about this instruction group and // how it has been partitioned across encoding groups. std::string WriteGroup();
diff --git a/mpact/sim/decoder/instruction_set.cc b/mpact/sim/decoder/instruction_set.cc index 924a639..35641c5 100644 --- a/mpact/sim/decoder/instruction_set.cc +++ b/mpact/sim/decoder/instruction_set.cc
@@ -16,6 +16,7 @@ #include <memory> #include <string> +#include <tuple> #include <utility> #include <vector> @@ -456,7 +457,7 @@ } InstructionSet::StringPair InstructionSet::GenerateEnums( - absl::string_view file_name) const { + absl::string_view file_name) { std::string h_output; std::string cc_output; @@ -516,6 +517,7 @@ int pred_count = 0; absl::StrAppend(&h_output, " kNone = ", pred_count++, ",\n"); for (auto const &pred_name : predicate_operands) { + pred_op_map_.insert({pred_name, pred_count}); absl::StrAppend(&h_output, " k", pred_name, " = ", pred_count++, ",\n"); } absl::StrAppend(&h_output, " kPastMaxValue = ", pred_count, @@ -526,6 +528,7 @@ int src_count = 0; absl::StrAppend(&h_output, " kNone = ", src_count++, ",\n"); for (auto const &source_name : source_operands) { + source_op_map_.insert({source_name, src_count}); absl::StrAppend(&h_output, " k", source_name, " = ", src_count++, ",\n"); } absl::StrAppend(&h_output, " kPastMaxValue = ", src_count, @@ -536,6 +539,7 @@ int list_src_count = 0; absl::StrAppend(&h_output, " kNone = ", list_src_count++, ",\n"); for (auto const &source_name : list_source_operands) { + list_source_op_map_.insert({source_name, list_src_count}); absl::StrAppend(&h_output, " k", source_name, " = ", list_src_count++, ",\n"); } @@ -547,6 +551,7 @@ int dst_count = 0; absl::StrAppend(&h_output, " kNone = ", dst_count++, ",\n"); for (auto const &dest_name : dest_operands) { + dest_op_map_.insert({dest_name, dst_count}); absl::StrAppend(&h_output, " k", dest_name, " = ", dst_count++, ",\n"); } absl::StrAppend(&h_output, " kPastMaxValue = ", dst_count, @@ -557,6 +562,7 @@ int list_dst_count = 0; absl::StrAppend(&h_output, " kNone = ", list_dst_count++, ",\n"); for (auto const &dest_name : list_dest_operands) { + list_dest_op_map_.insert({dest_name, list_dst_count}); absl::StrAppend(&h_output, " k", dest_name, " = ", list_dst_count++, ",\n"); } @@ -671,6 +677,185 @@ return {h_output, cc_output}; } +std::string InstructionSet::GenerateOperandEncoder( + int position, absl::string_view op_name, const OperandLocator &locator, + const Opcode *opcode) const { + std::string output; + switch (locator.type) { + case OperandLocator::kPredicate: { + std::string pred_op = + absl::StrCat("PredOpEnum::k", ToPascalCase(op_name)); + absl::StrAppend(&output, " // Predicate operand ", op_name, "\n"); + absl::StrAppend( + &output, " result = encoder->GetPredOpEncoding(address, operands[", + position, + "], slot, " + "entry, opcode, ", + pred_op, ");\n"); + break; + } + case OperandLocator::kSource: { + std::string source_op = + absl::StrCat("SourceOpEnum::k", ToPascalCase(op_name)); + absl::StrAppend(&output, " // Source operand ", op_name, "\n"); + absl::StrAppend(&output, + " result = encoder->GetSrcOpEncoding(address, operands[", + position, + "], slot, " + "entry, opcode, ", + source_op, ", ", locator.instance, ");\n"); + break; + } + case OperandLocator::kSourceArray: { + std::string list_source_op = + absl::StrCat("ListSourceOpEnum::k", ToPascalCase(op_name)); + absl::StrAppend(&output, " // Source array operand ", op_name, "\n"); + absl::StrAppend( + &output, + " result = encoder->GetListSourceOpEncoding(address, operands[", + position, + "], slot, " + "entry, opcode, ", + list_source_op, ", ", locator.instance, ");\n"); + break; + } + case OperandLocator::kDestination: { + std::string dest_op = + absl::StrCat("DestOpEnum::k", ToPascalCase(op_name)); + absl::StrAppend(&output, " // Destination operand ", op_name, "\n"); + absl::StrAppend( + &output, " result = encoder->GetDestOpEncoding(address, operands[", + position, + "], slot, " + "entry, opcode, ", + dest_op, ", ", locator.instance, ");\n"); + break; + } + case OperandLocator::kDestinationArray: { + std::string list_dest_op = + absl::StrCat("ListDestOpEnum::k", ToPascalCase(op_name)); + absl::StrAppend(&output, " // Destination array operand ", op_name, + "\n"); + absl::StrAppend( + &output, + " result = encoder->GetListDestOpEncoding(addres, operands[", + position, + "], slot, " + "entry, opcode, ", + list_dest_op, ", ", locator.instance, ");\n"); + break; + } + default: + absl::StrAppend(&output, " #error Unknown operand type ", op_name, "\n"); + break; + } + absl::StrAppend(&output, + " if (!result.ok()) return result.status();\n" + " encoding |= result.value();\n"); + return output; +} + +std::tuple<std::string, std::string> InstructionSet::GenerateEncClasses( + absl::string_view file_name, absl::string_view opcode_file_name, + absl::string_view encoder_type) const { + std::string h_output; + std::string cc_output; + std::string encoder = absl::StrCat(pascal_name(), "EncoderInterfaceBase"); + // Generate the bin encoder base class. + absl::StrAppend(&h_output, "class ", encoder, + " {\n" + " public:\n" + " virtual ~", + encoder, + "() = default;\n" + R"( + // Returns the opcode encoding and size (in bits) of the opcode. + virtual absl::StatusOr<std::tuple<uint64_t, int>> GetOpcodeEncoding( + SlotEnum slot, int entry, OpcodeEnum opcode) = 0; + virtual absl::StatusOr<uint64_t> GetSrcOpEncoding(uint64_t address, + absl::string_view text, SlotEnum slot, int entry, OpcodeEnum opcode, + SourceOpEnum source_op, int source_num) = 0; + virtual absl::StatusOr<uint64_t> GetDestOpEncoding(uint64_t address, + absl::string_view text, SlotEnum slot, int entry, OpcodeEnum opcode, + DestOpEnum dest_op, int dest_num) = 0; + virtual absl::StatusOr<uint64_t> GetListDestOpEncoding(uint64_t address, + absl::string_view text, SlotEnum slot, int entry, OpcodeEnum opcode, + ListDestOpEnum dest_op, int dest_num) = 0; + virtual absl::StatusOr<uint64_t> GetListSourceOpEncoding( uint64_t address, + absl::string_view text,SlotEnum slot, int entry, OpcodeEnum opcode, + ListSourceOpEnum source_op, int source_num) = 0; + virtual absl::StatusOr<uint64_t> GetPredOpEncoding(uint64_t address, + absl::string_view text, SlotEnum slot, int entry, OpcodeEnum opcode, + PredOpEnum pred_op) = 0; +}; + +)"); + + absl::StrAppend(&cc_output, + "namespace {\n\n" + "absl::StatusOr<std::tuple<uint64_t, int>> EncodeNone(", + encoder, + "*, SlotEnum, int, OpcodeEnum, uint64_t, const " + "std::vector<std::string> &) {\n" + " return absl::NotFoundError(\"No such opcode\");\n" + "}\n\n"); + std::string array; + absl::StrAppend( + &array, + "using EncodeFcn = absl::StatusOr<std::tuple<uint64_t, int>> (*)(", + encoder, + "*, SlotEnum, int, OpcodeEnum, uint64_t, const " + "std::vector<std::string> " + "&);\n" + "EncodeFcn encode_fcns[] = {\n" + " EncodeNone,\n"); + for (auto &[name, inst_ptr] : instruction_map_) { + auto *opcode = inst_ptr->opcode(); + absl::StrAppend(&array, " Encode", opcode->pascal_name(), ",\n"); + absl::StrAppend( + &cc_output, "absl::StatusOr<std::tuple<uint64_t, int>> Encode", + opcode->pascal_name(), "(", encoder, + " *encoder, SlotEnum slot, int entry,\n" + " OpcodeEnum opcode, uint64_t address, const " + "std::vector<std::string> &operands) " + "{\n" + " auto res_opcode = encoder->GetOpcodeEncoding(slot, entry, opcode);\n" + " if (!res_opcode.ok()) return res_opcode.status();\n" + " auto [encoding, bit_size] = res_opcode.value();\n" + " absl::StatusOr<uint64_t> result;\n"); + int position = 0; + for (auto const *disasm_format : inst_ptr->disasm_format_vec()) { + for (auto const *format_info : disasm_format->format_info_vec) { + if (format_info->op_name.empty()) continue; + auto iter = opcode->op_locator_map().find(format_info->op_name); + if (iter == opcode->op_locator_map().end()) { + absl::StrAppend(&cc_output, " #error ", format_info->op_name, + " not found in instruction opcodes\n"); + continue; + } + auto locator = iter->second; + absl::StrAppend(&cc_output, + GenerateOperandEncoder(position++, format_info->op_name, + locator, opcode)); + } + } + absl::StrAppend(&cc_output, + " return std::make_tuple(encoding, bit_size);\n" + "}\n\n"); + } + absl::StrAppend(&array, "};\n\n"); + absl::StrAppend(&cc_output, array, "\n} // namespace\n\n"); + + // Generate the regex matchers for each slot. + for (auto *slot : slot_order_) { + if (!slot->is_referenced()) continue; + auto [h_slot, cc_slot] = slot->GenerateAsmRegexMatcher(); + absl::StrAppend(&h_output, h_slot); + absl::StrAppend(&cc_output, cc_slot); + } + return {h_output, cc_output}; +} + } // namespace instruction_set } // namespace machine_description } // namespace sim
diff --git a/mpact/sim/decoder/instruction_set.h b/mpact/sim/decoder/instruction_set.h index e767325..0db705c 100644 --- a/mpact/sim/decoder/instruction_set.h +++ b/mpact/sim/decoder/instruction_set.h
@@ -17,13 +17,16 @@ #include <memory> #include <string> +#include <tuple> #include <vector> +#include "absl/container/btree_map.h" #include "absl/container/btree_set.h" #include "absl/container/flat_hash_map.h" #include "absl/status/status.h" #include "absl/strings/string_view.h" #include "mpact/sim/decoder/bundle.h" +#include "mpact/sim/decoder/instruction.h" #include "mpact/sim/decoder/opcode.h" #include "mpact/sim/decoder/resource.h" #include "mpact/sim/decoder/slot.h" @@ -70,12 +73,20 @@ absl::string_view encoding_type) const; std::string GenerateClassDefinitions(absl::string_view include_file, absl::string_view encoding_type) const; + std::tuple<std::string, std::string> GenerateEncClasses( + absl::string_view file_name, absl::string_view opcode_file_name, + absl::string_view encoder_type) const; // This method is static, as it considers all the instruction sets that were // defined. - StringPair GenerateEnums(absl::string_view file_name) const; + StringPair GenerateEnums(absl::string_view file_name); static void AddAttributeName(const std::string &name); + void AddInstruction(Instruction *inst) { + if (instruction_map_.contains(inst->opcode()->name())) return; + instruction_map_.emplace(inst->opcode()->name(), inst); + } + // Getters and setters. std::vector<std::string> &namespaces() { return namespaces_; } const std::string &name() const { return name_; } @@ -93,7 +104,25 @@ } absl::flat_hash_map<std::string, Slot *> &slot_map() { return slot_map_; } + // Maps from operand names to enum values. + absl::flat_hash_map<std::string, int> &pred_op_map() { return pred_op_map_; } + absl::flat_hash_map<std::string, int> &source_op_map() { + return source_op_map_; + } + absl::flat_hash_map<std::string, int> &list_source_op_map() { + return list_source_op_map_; + } + absl::flat_hash_map<std::string, int> &dest_op_map() { return dest_op_map_; } + absl::flat_hash_map<std::string, int> &list_dest_op_map() { + return list_dest_op_map_; + } + + std::string GenerateEncodingFunctions() const; + private: + std::string GenerateOperandEncoder(int position, absl::string_view op_name, + const OperandLocator &locator, + const Opcode *opcode) const; // Add bundle and slot to list of classes that need to be generated. void AddToBundleOrder(Bundle *); void AddToSlotOrder(Slot *); @@ -107,11 +136,19 @@ // Name in PascalCase. std::string pascal_name_; Bundle *bundle_ = nullptr; + // Map from instruction name to pointer. + absl::btree_map<std::string, Instruction *> instruction_map_; // Maps from names to bundle/slot pointers. absl::flat_hash_map<std::string, Bundle *> bundle_map_; absl::flat_hash_map<std::string, Slot *> slot_map_; // Attribute name list - shared across all the isas. static absl::btree_set<std::string> *attribute_names_; + // Maps from operand names to enum values. + absl::flat_hash_map<std::string, int> pred_op_map_; + absl::flat_hash_map<std::string, int> source_op_map_; + absl::flat_hash_map<std::string, int> list_source_op_map_; + absl::flat_hash_map<std::string, int> dest_op_map_; + absl::flat_hash_map<std::string, int> list_dest_op_map_; }; } // namespace instruction_set
diff --git a/mpact/sim/decoder/instruction_set_visitor.cc b/mpact/sim/decoder/instruction_set_visitor.cc index e9f7f37..05a7d3f 100644 --- a/mpact/sim/decoder/instruction_set_visitor.cc +++ b/mpact/sim/decoder/instruction_set_visitor.cc
@@ -23,6 +23,7 @@ #include <memory> #include <optional> #include <string> +#include <tuple> #include <utility> #include <variant> #include <vector> @@ -166,43 +167,69 @@ absl::StrCat(ToPascalCase(isa_name), "EncodingBase"); // Create output streams for .h and .cc files. - std::string dot_h_name = absl::StrCat(isa_prefix, "_decoder.h"); - std::string dot_cc_name = absl::StrCat(isa_prefix, "_decoder.cc"); + std::string dec_dot_h_name = absl::StrCat(isa_prefix, "_decoder.h"); + std::string dec_dot_cc_name = absl::StrCat(isa_prefix, "_decoder.cc"); + std::string enc_dot_h_name = absl::StrCat(isa_prefix, "_encoder.h"); + std::string enc_dot_cc_name = absl::StrCat(isa_prefix, "_encoder.cc"); std::string enum_h_name = absl::StrCat(isa_prefix, "_enums.h"); std::string enum_cc_name = absl::StrCat(isa_prefix, "_enums.cc"); - std::ofstream dot_h_file(absl::StrCat(directory, "/", dot_h_name)); - std::ofstream dot_cc_file(absl::StrCat(directory, "/", dot_cc_name)); + std::ofstream dec_dot_h_file(absl::StrCat(directory, "/", dec_dot_h_name)); + std::ofstream dec_dot_cc_file(absl::StrCat(directory, "/", dec_dot_cc_name)); + std::ofstream enc_dot_h_file(absl::StrCat(directory, "/", enc_dot_h_name)); + std::ofstream enc_dot_cc_file(absl::StrCat(directory, "/", enc_dot_cc_name)); std::ofstream enum_h_file(absl::StrCat(directory, "/", enum_h_name)); std::ofstream enum_cc_file(absl::StrCat(directory, "/", enum_cc_name)); // Generate the code, close the files and return. - std::string guard_name = ToHeaderGuard(dot_h_name); - dot_h_file << GenerateHdrFileProlog(dot_h_name, enum_h_name, guard_name, - encoding_type_name, - instruction_set->namespaces()); - dot_h_file << instruction_set->GenerateClassDeclarations( - dot_h_name, enum_h_name, encoding_type_name); - dot_h_file << GenerateHdrFileEpilog(guard_name, - instruction_set->namespaces()); - dot_cc_file << GenerateCcFileProlog(dot_h_name, - instruction_set->namespaces()); - dot_cc_file << instruction_set->GenerateClassDefinitions(dot_h_name, - encoding_type_name); - dot_cc_file << GenerateNamespaceEpilog(instruction_set->namespaces()); - enum_cc_file << GenerateCcFileProlog(enum_h_name, - instruction_set->namespaces()); + std::string guard_name = ToHeaderGuard(dec_dot_h_name); + // Decoder .h file. + dec_dot_h_file << GenerateHdrFileProlog(dec_dot_h_name, enum_h_name, + guard_name, encoding_type_name, + instruction_set->namespaces()); + dec_dot_h_file << instruction_set->GenerateClassDeclarations( + dec_dot_h_name, enum_h_name, encoding_type_name); + dec_dot_h_file << GenerateHdrFileEpilog(guard_name, + instruction_set->namespaces()); + dec_dot_h_file.close(); + // Decoder .cc file. + dec_dot_cc_file << GenerateCcFileProlog(dec_dot_h_name, + instruction_set->namespaces()); + dec_dot_cc_file << instruction_set->GenerateClassDefinitions( + dec_dot_h_name, encoding_type_name); + dec_dot_cc_file << GenerateNamespaceEpilog(instruction_set->namespaces()); + dec_dot_cc_file.close(); + + // Enum files. enum_h_file << GenerateSimpleHdrProlog(ToHeaderGuard(enum_h_name), instruction_set->namespaces()); + enum_cc_file << GenerateCcFileProlog(enum_h_name, + instruction_set->namespaces()); auto [h_output, cc_output] = instruction_set->GenerateEnums(enum_h_name); enum_h_file << h_output; enum_cc_file << cc_output; enum_h_file << GenerateHdrFileEpilog(ToHeaderGuard(enum_h_name), instruction_set->namespaces()); enum_cc_file << GenerateNamespaceEpilog(instruction_set->namespaces()); - dot_h_file.close(); - dot_cc_file.close(); enum_h_file.close(); enum_cc_file.close(); + // Encoder files + guard_name = ToHeaderGuard(enc_dot_h_name); + auto [enc_dot_h_prolog, enc_dot_cc_prolog] = + GenerateEncFilePrologs(enc_dot_h_name, guard_name, enum_h_name, + encoding_type_name, instruction_set->namespaces()); + enc_dot_h_file << enc_dot_h_prolog; + enc_dot_cc_file << enc_dot_cc_prolog; + auto [h_enc, cc_enc] = instruction_set->GenerateEncClasses( + enc_dot_h_name, enum_h_name, encoding_type_name); + enc_dot_h_file << h_enc; + enc_dot_cc_file << cc_enc; + enc_dot_h_file << GenerateHdrFileEpilog(guard_name, + instruction_set->namespaces()); + enc_dot_cc_file << GenerateNamespaceEpilog( + instruction_set->namespaces()); // Enum .h and .cc files. + enc_dot_h_file.close(); + enc_dot_cc_file.close(); + return absl::OkStatus(); } @@ -1515,6 +1542,7 @@ Opcode *top = result.value(); auto inst = new Instruction(top, slot); + slot->instruction_set()->AddInstruction(inst); // Get the size of the instruction if specified, otherwise use default size. if (opcode_ctx->size_spec() != nullptr) { @@ -1596,8 +1624,9 @@ is_array = true; } child->opcode()->AppendSourceOp(name, is_array); - parent->opcode()->op_locator_map().insert( - std::make_pair(name, OperandLocator(op_spec_number, 's', instance))); + parent->opcode()->op_locator_map().insert(std::make_pair( + name, + OperandLocator(op_spec_number, is_array ? 't' : 's', instance))); instance++; } } @@ -1630,8 +1659,9 @@ } else { child->opcode()->AppendDestOp(ident, is_array, new TemplateConstant(1)); } - parent->opcode()->op_locator_map().insert( - std::make_pair(ident, OperandLocator(op_spec_number, 'd', instance))); + parent->opcode()->op_locator_map().insert(std::make_pair( + ident, + OperandLocator(op_spec_number, is_array ? 'e' : 'd', instance))); instance++; } } @@ -2248,6 +2278,58 @@ return output; } +std::tuple<std::string, std::string> +InstructionSetVisitor::GenerateEncFilePrologs( + absl::string_view file_name, absl::string_view guard_name, + absl::string_view opcode_file_name, absl::string_view encoding_type_name, + const std::vector<std::string> &namespaces) { + std::string h_output; + std::string cc_output; + absl::StrAppend(&h_output, "#ifndef ", guard_name, + "\n" + "#define ", + guard_name, + "\n" + "\n" + "#include <array>\n" + "#include <string>\n" + "#include <vector>\n" + "\n" + "#include \"absl/status/status.h\"\n" + "#include \"absl/status/statusor.h\"\n" + "#include \"absl/strings/string_view.h\"\n" + "#include \"re2/re2.h\"\n" + "#include \"re2/set.h\"\n" + "#include \"", + opcode_file_name, + "\"\n" + "\n"); + absl::StrAppend(&cc_output, "#include \"", file_name, + "\"\n" + "\n" + "#include <array>\n" + "#include <string>\n" + "#include <vector>\n" + "\n" + "#include \"absl/status/status.h\"\n" + "#include \"absl/status/statusor.h\"\n" + "#include \"absl/strings/string_view.h\"\n" + "#include \"re2/re2.h\"\n" + "#include \"re2/set.h\"\n" + "#include \"", + opcode_file_name, + "\"\n" + "\n"); + + for (const auto &namespace_name : namespaces) { + absl::StrAppend(&h_output, "namespace ", namespace_name, " {\n"); + absl::StrAppend(&cc_output, "namespace ", namespace_name, " {\n"); + } + absl::StrAppend(&h_output, "\n"); + absl::StrAppend(&cc_output, "\n"); + return {h_output, cc_output}; +} + std::string InstructionSetVisitor::GenerateHdrFileEpilog( absl::string_view guard_name, const std::vector<std::string> &namespaces) { std::string output;
diff --git a/mpact/sim/decoder/instruction_set_visitor.h b/mpact/sim/decoder/instruction_set_visitor.h index 5889a21..e0f19b7 100644 --- a/mpact/sim/decoder/instruction_set_visitor.h +++ b/mpact/sim/decoder/instruction_set_visitor.h
@@ -21,6 +21,7 @@ #include <memory> #include <optional> #include <string> +#include <tuple> #include <utility> #include <vector> @@ -179,6 +180,10 @@ absl::string_view guard_name, absl::string_view encoding_base_name, const std::vector<std::string> &namespaces); + std::tuple<std::string, std::string> GenerateEncFilePrologs( + absl::string_view file_name, absl::string_view guard_name, + absl::string_view opcode_file_name, absl::string_view encoding_type_name, + const std::vector<std::string> &namespaces); std::string GenerateHdrFileEpilog(absl::string_view guard_name, const std::vector<std::string> &namespaces); std::string GenerateCcFileProlog(absl::string_view hdr_file_name,
diff --git a/mpact/sim/decoder/mpact_sim_isa.bzl b/mpact/sim/decoder/mpact_sim_isa.bzl index 668817b..02fee77 100644 --- a/mpact/sim/decoder/mpact_sim_isa.bzl +++ b/mpact/sim/decoder/mpact_sim_isa.bzl
@@ -86,6 +86,8 @@ "%s_decoder.cc" % base_file_prefix, "%s_enums.h" % base_file_prefix, "%s_enums.cc" % base_file_prefix, + "%s_encoder.h" % base_file_prefix, + "%s_encoder.cc" % base_file_prefix, ] # The command to generate the files. @@ -115,6 +117,14 @@ lib_deps.append("@com_google_mpact-sim//mpact/sim/generic:arch_state") if "@com_google_mpact-sim//mpact/sim/generic:instruction" not in deps: lib_deps.append("@com_google_mpact-sim//mpact/sim/generic:instruction") + if "@com_googlesource_code_re2//:re2" not in deps: + lib_deps.append("@com_googlesource_code_re2//:re2") + if "@com_google_absl//absl/status" not in deps: + lib_deps.append("@com_google_absl//absl/status") + if "@com_google_absl//absl/status:statusor" not in deps: + lib_deps.append("@com_google_absl//absl/status:statusor") + if "@com_google_absl//absl/strings" not in deps: + lib_deps.append("@com_google_absl//absl/strings") native.cc_library( name = name, srcs = [f for f in out_files if f.endswith(".cc")], @@ -154,6 +164,8 @@ out_files = [ "%s_bin_decoder.h" % base_file_prefix, "%s_bin_decoder.cc" % base_file_prefix, + "%s_bin_encoder.h" % base_file_prefix, + "%s_bin_encoder.cc" % base_file_prefix, ] # The command to generate the files.
diff --git a/mpact/sim/decoder/opcode.h b/mpact/sim/decoder/opcode.h index ad327aa..4b77fc3 100644 --- a/mpact/sim/decoder/opcode.h +++ b/mpact/sim/decoder/opcode.h
@@ -108,10 +108,16 @@ // This struct is used to specify the location of an operand within an // instruction. It specifies which instruction (or child instruction) number. In // this case, 0 is the top level instruction, 1 is the first child instruction -// etc. The type is 'p' for predicate operand, 's' for source operand, and 'd' -// for destination operand. The instance number specifies the entry index in the +// etc. The type is 'p' for predicate operand, 's' for source operand, 't' for +// source array operand, 'd' for destination operand, and 'e' for destination +// array operand. The instance number specifies the entry index in the // source or destination operand vector. struct OperandLocator { + static constexpr char kPredicate = 'p'; + static constexpr char kSource = 's'; + static constexpr char kSourceArray = 't'; + static constexpr char kDestination = 'd'; + static constexpr char kDestinationArray = 'e'; int op_spec_number; char type; int instance;
diff --git a/mpact/sim/decoder/slot.cc b/mpact/sim/decoder/slot.cc index d8ca513..dab1ac2 100644 --- a/mpact/sim/decoder/slot.cc +++ b/mpact/sim/decoder/slot.cc
@@ -14,12 +14,14 @@ #include "mpact/sim/decoder/slot.h" +#include <algorithm> #include <cctype> #include <cstddef> #include <cstdlib> #include <map> #include <stack> #include <string> +#include <tuple> #include <utility> #include <variant> #include <vector> @@ -58,9 +60,9 @@ } if (locator.type == 'p') { absl::StrAppend(&code, "Predicate()"); - } else if (locator.type == 's') { + } else if (locator.type == 's' || locator.type == 't') { absl::StrAppend(&code, "Source(", locator.instance, ")"); - } else if (locator.type == 'd') { + } else if (locator.type == 'd' || locator.type == 'e') { absl::StrAppend(&code, "Destination(", locator.instance, ")"); } else { return absl::InternalError(absl::StrCat("Unknown locator type '", @@ -284,6 +286,301 @@ return iter->second; } +namespace { + +std::string EscapeRegexCharacters(const std::string &str) { + std::string output; + if (str.empty()) return output; + auto pos = str.find_last_not_of(' '); + if (pos == std::string::npos) { + return "\\s+"; + } + std::string input(str.substr(pos)); + bool in_space = false; + char p; + for (auto c : str) { + if (isspace(c)) { + if (!in_space) { + if (ispunct(p)) { + absl::StrAppend(&output, "\\s*"); + } else { + absl::StrAppend(&output, "\\s+"); + } + } + in_space = true; + continue; + } + p = c; + in_space = false; + switch (c) { + case '.': + absl::StrAppend(&output, "\\."); + break; + case '(': + absl::StrAppend(&output, "\\("); + break; + case ')': + absl::StrAppend(&output, "\\)"); + break; + case '[': + absl::StrAppend(&output, "\\["); + break; + case ']': + absl::StrAppend(&output, "\\]"); + break; + case '*': + absl::StrAppend(&output, "\\*"); + break; + case '+': + absl::StrAppend(&output, "\\+"); + break; + case '?': + absl::StrAppend(&output, "\\?"); + break; + case '|': + absl::StrAppend(&output, "\\|"); + break; + case '{': + absl::StrAppend(&output, "\\{"); + break; + case '}': + absl::StrAppend(&output, "\\}"); + break; + case '^': + absl::StrAppend(&output, "\\^"); + break; + case '$': + absl::StrAppend(&output, "\\$"); + break; + case '!': + absl::StrAppend(&output, "\\!"); + break; + case '\\': + absl::StrAppend(&output, "\\\\"); + break; + default: + absl::StrAppend(&output, std::string(1, c)); + break; + } + } + return output; +} + +} // namespace + +std::tuple<std::string, std::vector<OperandLocator>> Slot::GenerateRegEx( + const Instruction *inst, std::vector<std::string> &formats) const { + std::string output = "R\"("; + std::string sep = "^\\s*"; + int args = 0; + std::vector<OperandLocator> opnd_locators; + // Iterate over the vector of disasm formats. These will end up concatenated + // with \s+ separators. + for (auto const *disasm_fmt : inst->disasm_format_vec()) { + absl::StrAppend(&output, sep); + sep = "\\s+"; + auto fragment_iter = disasm_fmt->format_fragment_vec.begin(); + auto fragment_end = disasm_fmt->format_fragment_vec.end(); + auto format_iter = disasm_fmt->format_info_vec.begin(); + auto format_end = disasm_fmt->format_info_vec.end(); + char prev = '\0'; + // Iterate over the format fragments. + while (fragment_iter != fragment_end) { + auto fragment = *fragment_iter; + if (!fragment.empty()) { + auto str = EscapeRegexCharacters(fragment); + absl::StrAppend(&output, str); + prev = str.back(); + } else { + prev = '\0'; + } + fragment_iter++; + if (format_iter != format_end) { + // If the previous character is punctuation, but not '.' or '_', add a + // space separator. + if ((prev != '\0') && + !(isalnum(prev) || (prev == '_') || (prev == '.'))) { + absl::StrAppend(&output, "\\s*"); + } + args++; + std::string op_name = (*format_iter)->op_name; + absl::StrAppend(&output, "(?<", op_name, ">\\S*?)"); + opnd_locators.push_back(inst->opcode()->op_locator_map().at(op_name)); + if ((fragment_iter != fragment_end) && (!(*fragment_iter).empty())) { + char c = (*fragment_iter)[0]; + // If the next fragment is not alnum or underscore, add a space + // separator. + if (!isalnum(c) || (c != '_')) { + absl::StrAppend(&output, "\\s*"); + } + } + format_iter++; + } + } + } + absl::StrAppend(&output, "\\s*(#.*)?$)\""); + return {output, opnd_locators}; +} + +std::string GenerateEncodingFunctions(const std::string &encoder, + InstructionSet instruction_set) { + std::string output; + absl::StrAppend(&output, "namespace {\n\n"); + absl::StrAppend( + &output, "absl::StatusOr<std::tuple<uint64_t, int>> EncodeNone(", encoder, + "*, SlotEnum, int, OpcodeEnum, uint64_t, const " + "std::vector<std::string> &) {\n" + " return absl::NotFoundError(\"No such opcode\");\n" + "}\n\n"); + return output; +} +// Generate a regex to match the assembly string for the instructions. +std::tuple<std::string, std::string> Slot::GenerateAsmRegexMatcher() const { + std::string h_output; + std::string cc_output; + std::string class_name = pascal_name() + "SlotMatcher"; + size_t max_args = 0; + + // Generate the encoder function for each instruction. + std::string encoder = + absl::StrCat(instruction_set_->pascal_name(), "EncoderInterfaceBase"); + + // Generate the matcher class. + absl::StrAppend( + &h_output, + "// Assembly matcher.\n" + "class ", + class_name, + " {\n" + " public:\n" + " ", + class_name, "(", instruction_set_->pascal_name(), + "EncoderInterfaceBase *encoder);\n" + " ~", + class_name, + "();\n" + " absl::Status Initialize();\n" + " bool Match(absl::string_view text, std::vector<int> &matches);\n" + " bool Extract(absl::string_view text, int index, " + "std::vector<std::string> &values);\n" + "absl::StatusOr<std::tuple<uint64_t, int>> " + "Encode(uint64_t address, absl::string_view text, int entry);\n" + " private:\n" + " ", + encoder, + " *encoder_;\n" + " std::vector<RE2 *> regex_vec_;\n" + " RE2::Set regex_set_;\n"); + absl::StrAppend(&cc_output, class_name, "::", class_name, "(", + instruction_set_->pascal_name(), + "EncoderInterfaceBase *encoder) :\n" + " encoder_(encoder),\n" + " regex_set_(RE2::Options(), RE2::ANCHOR_BOTH) {}\n" + "\n", + class_name, "::~", class_name, + "() {\n" + " for (int i = 0; i < re2_args.size(); ++i) {\n" + " delete re2_args[i];\n" + " }\n" + "}\n\n" + "absl::Status ", + class_name, + "::Initialize() {\n" + " int index;\n" + " std::string error;\n" + " index = regex_set_.Add(\"^$\", &error);\n" + " regex_vec_.push_back(new RE2(\"^$\"));\n"); + std::vector<std::string> formats; + for (auto const &[name, inst_ptr] : instruction_map_) { + auto [regex, opnd_locators] = GenerateRegEx(inst_ptr, formats); + max_args = std::max(max_args, opnd_locators.size()); + absl::StrAppend(&cc_output, " regex_vec_.push_back(new RE2(", regex, + "));\n" + " index = regex_set_.Add(", + regex, + ", &error);\n" + " if (index == -1) return absl::InternalError(error);\n"); + } + absl::StrAppend(&h_output, " std::string args[", max_args, + "];\n" + " std::array<RE2::Arg*, ", + max_args, "> re2_args = {"); + for (int i = 0; i < max_args; ++i) absl::StrAppend(&h_output, "nullptr, "); + absl::StrAppend(&h_output, " };\n"); + // Construct the RE2::Arg objects. + absl::StrAppend(&cc_output, + " auto ok = regex_set_.Compile();\n" + " if (!ok) return absl::InternalError(\"Failed to compile " + "regex set\");\n" + " for (int i = 0; i < ", + max_args, + "; ++i) {\n" + " re2_args[i] = new RE2::Arg(&args[i]);\n" + " }\n"); + absl::StrAppend( + &cc_output, + " return absl::OkStatus();\n" + "}\n\n" + "bool ", + class_name, + "::Match(absl::string_view text, std::vector<int> &matches) {\n" + " return regex_set_.Match(text, &matches);\n" + "}\n\n" + "bool ", + class_name, + "::Extract(absl::string_view text, int index, " + "std::vector<std::string> &values) {\n" + " auto ®ex = regex_vec_.at(index);\n" + " int arg_count = regex->NumberOfCapturingGroups();\n" + " if (!regex_vec_.at(index)->FullMatchN(text, *regex, " + "re2_args.data(), " + "arg_count))\n" + " return false;\n" + " for (int i = 0; i < arg_count; ++i) {\n" + " values.push_back(args[i]);\n" + " }\n" + " return true;\n" + "}\n\n" + "absl::StatusOr<std::tuple<uint64_t, int>> ", + pascal_name(), + "SlotMatcher::Encode(\n" + R"( + uint64_t address, absl::string_view text, int entry) { + std::vector<int> matches; + std::string error_message = absl::StrCat("Failed to encode '", text, "':"); + if (!Match(text, matches) || (matches.size() == 0)) { + return absl::NotFoundError(error_message); + } + std::vector<std::tuple<uint64_t, int>> encodings; + for (auto index : matches) { + std::vector<std::string> values; + if (!Extract(text, index, values)) continue; +)", + " auto result = encode_fcns[index](encoder_, SlotEnum::k", + pascal_name(), + ", entry, \n" + " " + "static_cast<OpcodeEnum>(index), address, values);\n", + R"( + if (!result.status().ok()) { + absl::StrAppend(&error_message, "\n ", result.status().message()); + continue; + } + encodings.push_back(result.value()); + } + if (encodings.empty()) return absl::NotFoundError(error_message); + if (encodings.size() > 1) { + return absl::NotFoundError( + absl::StrCat("Failed to encode '", text, "': ambiguous")); + } + return encodings[0]; +} + +)"); + absl::StrAppend(&h_output, "};\n\n"); + return {h_output, cc_output}; +} + // Generate a function that will set the disassembly string for the given // instruction. std::string Slot::GenerateDisasmSetterFcn(absl::string_view name, @@ -328,8 +625,8 @@ in_strcat.push(true); } // Generate the strings from the format fragments and the format info. + std::string next_sep; for (auto const &frag : disasm_fmt->format_fragment_vec) { - std::string next_sep; if (!frag.empty()) { absl::StrAppend(&output, inner_sep, indent_string(indent), "\"", frag, "\""); @@ -369,6 +666,7 @@ } } } + next_sep = ", "; index++; if (inner_sep.empty()) inner_sep = ",\n"; } @@ -416,8 +714,58 @@ return absl::StrCat(iter->second); } -// Generate a string that is a unique identifier from the resources to determine -// which instructions can share resource setter functions. +// Generate the assembler function for the given instruction. +std::string Slot::GenerateAssemblerFcn(const Instruction *inst, + absl::string_view encoder_type) const { + std::string output; + int num_values = inst->opcode()->source_op_vec().size() + + inst->opcode()->dest_op_vec().size(); + absl::StrAppend( + &output, "absl::StatusOr<std::tuple<int, uint64_t>> ", pascal_name(), + "Slot", "Assemble", inst->opcode()->pascal_name(), "(", encoder_type, + " *enc, const std::vector<std::string> &values, SlotEnum " + "slot, int entry) {\n", + " if (values.size() != ", num_values, + ")\n" + " return absl::InvalidArgumentError(\"Wrong number of values\");\n" + " constexpr OpcodeEnum opcode = OpcodeEnum::k", + inst->opcode()->pascal_name(), + ";\n" + "auto [inst_word, num_bits] = enc->GetOpEncoding(opcode, slot, " + "entry);\n", + " absl::Status status;\n"); + auto const &source_op_vec = inst->opcode()->source_op_vec(); + for (int i = 0; i < source_op_vec.size(); ++i) { + std::string op_name = ToPascalCase(source_op_vec[i].name); + absl::StrAppend(&output, " status = enc->SetSrcEncoding(values.at(", i, + "), slot, entry,\n" + "SourceOpEnum::k", + op_name, ", ", i, + ", opcode);\n" + " if (!stats.ok()) return status;\n"); + } + auto const &dest_op_vec = inst->opcode()->dest_op_vec(); + for (int i = 0; i < dest_op_vec.size(); ++i) { + absl::StrAppend(&output, " status = enc->SetDestEncoding(values.at(", i, + "), slot, entry,\n" + "DestOpEnum::k", + dest_op_vec[i]->pascal_case_name(), ", ", i, + ", opcode);\n" + " if (!stats.ok()) return status;\n"); + } + absl::StrAppend( + &output, + " auto ok = enc->ValidateEncoding(opcode, slot, entry, inst_word);\n" + " if (!ok) return absl::InvalidArgumentError(\"Invalid " + "encoding\");\n"); + absl::StrAppend(&output, + "return std::tie(num_bits, inst_word);\n" + "}\n\n"); + return output; +} + +// Generate a string that is a unique identifier from the resources to +// determine which instructions can share resource setter functions. std::string Slot::CreateResourceKey( const std::vector<const ResourceReference *> &refs) const { std::string key; @@ -480,8 +828,8 @@ return key; } -// Generate a resource setter function call for the resource "key" of the given -// instruction. If a matching one does not exist, call to create such a +// Generate a resource setter function call for the resource "key" of the +// given instruction. If a matching one does not exist, call to create such a // function. std::string Slot::GenerateResourceSetter(const Instruction *inst, absl::string_view encoding_type) { @@ -590,8 +938,8 @@ } } - // Get all the simple resources that need to be reserved, then all the complex - // resources that need to be reserved when issuing this instruction. + // Get all the simple resources that need to be reserved, then all the + // complex resources that need to be reserved when issuing this instruction. complex_refs.clear(); simple_refs.clear(); for (auto const *ref : inst->resource_acquire_vec()) { @@ -701,8 +1049,8 @@ return output; } -// Generates a string that is a unique identifier from the operands to determine -// which instructions can share operand getter functions. +// Generates a string that is a unique identifier from the operands to +// determine which instructions can share operand getter functions. std::string Slot::CreateOperandLookupKey(const Opcode *opcode) const { std::string key; // Generate identifier for the predicate operand, if the opcode has one. @@ -850,9 +1198,10 @@ if (instruction_map_.empty()) return output; std::string class_name = pascal_name() + "Slot"; // For each instruction create two lambda functions. One that is used to - // obtain the semantic function object for the instruction, the other a lambda - // that sets the predicate, source and target operands. Both lambdas use calls - // to virtual functions declared in the current class or a base class thereof. + // obtain the semantic function object for the instruction, the other a + // lambda that sets the predicate, source and target operands. Both lambdas + // use calls to virtual functions declared in the current class or a base + // class thereof. std::string signature = absl::StrCat("(Instruction *inst, ", encoding_type, " *enc, OpcodeEnum opcode, SlotEnum slot, int entry)"); @@ -884,8 +1233,8 @@ // Construct operand getter lookup key. std::string key = CreateOperandLookupKey(inst->opcode()); auto iter = operand_setter_name_map_.find(key); - // If the key is not found, create a new getter function, otherwise reuse - // the existing one. + // If the key is not found, create a new getter function, otherwise + // reuse the existing one. if (iter == operand_setter_name_map_.end()) { auto index = operand_setter_name_map_.size(); std::string setter_name =
diff --git a/mpact/sim/decoder/slot.h b/mpact/sim/decoder/slot.h index 79cb6ea..cc1df51 100644 --- a/mpact/sim/decoder/slot.h +++ b/mpact/sim/decoder/slot.h
@@ -17,6 +17,7 @@ #include <limits> #include <string> +#include <tuple> #include <vector> #include "absl/container/btree_map.h" @@ -91,6 +92,19 @@ absl::Status AddTemplateFormal(const std::string &name); TemplateFormal *GetTemplateFormal(const std::string &name) const; + // Generate the calls to encode the given operand. + std::string GenerateOperandEncoder(int position, absl::string_view op_name, + const OperandLocator &locator, + const Opcode *opcode) const; + // Generate regex for a given instruction. + std::tuple<std::string, std::vector<OperandLocator>> GenerateRegEx( + const Instruction *inst, std::vector<std::string> &formats) const; + // Generate regexes to match the assembly string for the instructions. + std::tuple<std::string, std::string> GenerateAsmRegexMatcher() const; + // Generate assembler function for the given instruction. + std::string GenerateAssemblerFcn(const Instruction *inst, + absl::string_view encoder_type) const; + // Resources Resource *GetOrInsertResource(const std::string &name);
diff --git a/mpact/sim/generic/literal_operand.h b/mpact/sim/generic/literal_operand.h index f7d584d..3256071 100644 --- a/mpact/sim/generic/literal_operand.h +++ b/mpact/sim/generic/literal_operand.h
@@ -54,8 +54,12 @@ template <bool literal> class BoolLiteralOperand : public SourceOperandInterface { public: - BoolLiteralOperand() = default; - explicit BoolLiteralOperand(const std::vector<int> &shape) : shape_(shape) {} + BoolLiteralOperand() : as_string_(absl::StrCat(literal)) {} + BoolLiteralOperand(absl::string_view as_string) : as_string_(as_string) {} + BoolLiteralOperand(const std::vector<int> &shape, absl::string_view as_string) + : shape_(shape), as_string_(as_string) {} + explicit BoolLiteralOperand(const std::vector<int> &shape) + : BoolLiteralOperand(shape, absl::StrCat(literal)) {} // Methods for accessing the literal value. Always returns the same // value regardless of the index parameter. @@ -78,7 +82,7 @@ // 128 element vector quantity. std::vector<int> shape() const override { return shape_; } - std::string AsString() const override { return absl::StrCat(literal); } + std::string AsString() const override { return as_string_; } private: std::string as_string_; @@ -89,7 +93,8 @@ template <int literal> class IntLiteralOperand : public SourceOperandInterface { public: - IntLiteralOperand() = default; + IntLiteralOperand() : as_string_(absl::StrCat(literal)) {}; + IntLiteralOperand(absl::string_view as_string) : as_string_(as_string) {} IntLiteralOperand(const std::vector<int> &shape, absl::string_view as_string) : shape_(shape), as_string_(as_string) {} explicit IntLiteralOperand(const std::vector<int> &shape) @@ -116,7 +121,7 @@ // 128 element vector quantity. std::vector<int> shape() const override { return shape_; } - std::string AsString() const override { return absl::StrCat(literal); } + std::string AsString() const override { return as_string_; } private: std::vector<int> shape_;