mpact/sim/decoder/InstructionSet.g4 - mpact-sim - Git at Google

 // Copyright 2023 Google LLC
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     https://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.


 // This grammar is used to specify the structure of the instruction decoder and
 // is used by the parser to generate the encoding independent part of the basic
 // instruction decoder. This grammar does not addess the actual encoding of the
 // instruction as that may be expressed in multiple different forms, such as
 // traditional binary, protobuff, etc. The code generated from this grammar
 // will declare virtual methods to obtain opcodes and operands from the
 // encoding that need to be overridden with methods cognizant of the actual
 // instruction encoding used.
 //
 // The grammar specifies the instruction word structure of an ISA, i.e.,
 // the grouping and structure of instruction words. At its simplest, the ISA
 // consists of a single slot, where any opcode is valid. This is the case for
 // most traditional architectures, where the ISA is really sequential (though
 // the implementation may do parallel issue), such as the x86 and Arm ISAs.
 //
 // A VLIW based ISA supports specifying multiple instructions in a large
 // instruction word, or bundle, that will be issued in parallel. Each
 // instruction within the large instruction word occupies a slot. In the case
 // of a binary instruction encoding, a slot refers to a specific bit range
 // (or bit ranges if the slot is replicated) within the instruction word. In
 // case of a protobuf based encoding, a slot refers to one or more message
 // instances. The individual slots within a bundle may support the same set of
 // opcodes, or the opcodes may be restricted by the slot instance. The latter
 // allows for different instruction slots to have different layout and widths,
 // optimizing for the specific opcodes (e.g., load/store vs alu) assigned to
 // each slot.
 //
 // A yet more complex ISA may divide a top level bundle into more than one
 // "sub-bundles". The idea here is that while the top level bundle is fetched
 // and "issued" as a unit, the sub-bundles are then separated and issued
 // separately, possibly in different cycles.
 //
 // The top level of the grammar is the specification of the ISA name and the
 // name of the class that provides access to the opcode that is being decoded.
 // The details of that "encoding" class is not used by the generated code,
 // instead a pointer to that class is passed to pure virtual methods that
 // the overall decoder will have to provide overriding implementations for
 // to have a working decoder.
 //
 // The ISA is then broken down into one or more bundles and/or slots. A bundle
 // typically contains a group of slots that correspond to the set of
 // instructions that are fetched and issued together as part of a long
 // instruction word. However, a bundle may also contain other bundles that
 // are issued as separate groups of instructions.
 //
 // Each slot contains one or more opcodes that represent the set of valid
 // instructions for that slot. Since the same opcode may be valid in multiple
 // slots, it is possible to define slots that aren't used directly in any
 // bundle, but are used as "base classes" for those that are, allowing common
 // opcodes to be factored out for ease of expression and maintenance.
 //
 // An opcode represents an instruction and contains an optional predicate
 // operand name, an optional list of source operand names, and an optional
 // list of destination operand names. The opcode name is used to generate an
 // enumeration type used by the decoder. The operand names are used to
 // declare virtual getter methods in the slot class for creating source and
 // destination operands. The operand names are intended to correspond to
 // specific operand fields in the instruction encoding.

 grammar InstructionSet;

 top_level
   : declaration* EOF
   ;

 include_top_level
   : declaration* EOF
   ;

 declaration
   : include_file
   | include_file_list
   | isa_declaration
   | bundle_declaration
   | slot_declaration
   | disasm_widths
   | constant_def
   ;

 // The include_file_list lists files to include in the generated source. This
 // may be specified at the global scope, or within each slot. Slot local Include
 // files are only added to the generated code if that slot is used. This is
 // to make it possible to avoid adding include files that will not be used
 // in the final isa decoder. If there are multiple global include file lists,
 // their content are merged.

 include_file_list
   : INCLUDE_FILES '{' include_file* '}'
   ;

 constant_def
   : template_parameter_type ident '=' expression ';'
   ;

 // This rule specifies the disassembler field widths and alignments. A
 // diasassembly string is specified by a number of fragments. Typically an
 // opcode fragment followed by a fragment for the operands. This declaration
 // specifies the field width for each fragment in order left to right, and
 // whether the fragment is left justified (negative number) or right
 // justified (positive number) within that field.
 disasm_widths
   : DISASM WIDTHS '=' '{' (expression (',' expression) *)? '}' ';'
   ;

 // This rule defines the name of an ISA description as well as the
 // name of the type that wraps the encoding of the instruction. The ISA instance
 // contains either a list of instruction slots or a list of instruction bundles.
 // There is either one ISA description, or if there are more than one, only one
 // can be selected for code generation.
 isa_declaration
   : ISA instruction_set_name=IDENT '{' namespace_decl (bundle_list | slot_list) '}'
   ;

 // The namespace_decl rule is used to specify which namespace in which to
 // generate the code for the isa.
 namespace_decl
   : NAMESPACE namespace_ident ('::' namespace_ident) * ';'
   ;

 // Mactches #include "<path to file>".

 include_file
   : INCLUDE STRING_LITERAL
   ;

 // A bundle_declaration has a name and specifies the set of bundles and/or
 // slots contained within. At least one slot or bundle has to be specified.

 bundle_declaration
   : BUNDLE bundle_name=IDENT '{' bundle_list? slot_list? '}'
   ;

 // A bundle list is a non-empty list of bundle identifiers

 bundle_list
   : BUNDLES '{' (bundle_spec ';')* ','? '}'
   ;

 bundle_spec
   : IDENT
   ;

 // A slot list is a non-empty list of slot specifiers.

 slot_list
   : SLOTS '{' (slot_spec ';')* ','? '}'
   ;

 // A slot specifier is a slot name with an optional range specification
 // to specify which instances are being used when the slot may occur multiple
 // times in a bundle or across multiple bundles. See below for slot declaration.

 slot_spec
   : IDENT array_spec?
   ;

 // The list of ranges of slot instances used.

 array_spec
   : '[' range_spec (',' range_spec)* ']'
   ;

 // A single index, or range of indices.

 range_spec
   : range_start=NUMBER (DOTDOT range_end=NUMBER)?
   ;

 // Declares a slot with an optional size spec ([size]) indicating that it has
 // multiple instances. It may optionally inherit from another slot. Note,
 // it is an error for a base slot that isn't used directly in the ISA (i.e.,
 // only by inheritance) to have a size specification. It is also an error
 // if not all slot instances are referenced in the isa (either at the top
 // level, from within a bundle, or in an inheritance specification).
 // A template slot may not have a size specification.

 slot_declaration
   // Template slot.
   : template_decl SLOT slot_name=IDENT
     (':' base_item_list )? '{' const_and_default_decl* opcode_list? '}'
   // Plain slot.
   | SLOT slot_name=IDENT size_spec? (':' base_item_list )?
     '{' const_and_default_decl* opcode_list? '}'
   ;

 template_decl
   : TEMPLATE '<' template_parameter_decl (',' template_parameter_decl)* '>'
   ;

 template_parameter_decl
   :  template_parameter_type IDENT
   ;

 // Only integer valued template parameters are allowed for now.

 template_parameter_type
   : INT
   ;

 // Can inherit from slots or templated slots.

 base_item_list
   : base_item (',' base_item)*
   ;

 base_item
   : IDENT template_spec?
   ;

 template_spec
   : '<' expression (',' expression) * '>'
   ;

 // Integer literals or template parameter names are allowed.

 expression
   : negop expr=expression
   | lhs=expression mulop rhs=expression
   | lhs=expression addop rhs=expression
   | func=IDENT '(' (expression (',' expression)* )? ')'
   | '(' paren_expr=expression ')'
   | NUMBER
   | IDENT
   ;

 negop
   : '-'
   ;

 mulop
   : '*' | '/'
   ;

 addop
   : '+' | '-'
   ;

 // Number of instances.

 size_spec
   : '[' NUMBER ']'
   ;

 const_and_default_decl
   : DEFAULT LATENCY '=' expression ';'
   | DEFAULT SIZE '=' NUMBER ';'
   | DEFAULT OPCODE '=' opcode_attribute_list ';'
   | DEFAULT ATTRIBUTES '=' instruction_attribute_list ';'
   | constant_def
   | RESOURCES ident '=' resource_details ';'
   | include_file_list
   ;

 // List of opcode specifications for the slot in question.

 opcode_list
   : OPCODES '{' (opcode_spec ';')* '}'
   ;

 // An opcode has a name, an optional predicate operand name, followed by
 // optional lists of source and destination operand names. Each is separated
 // by a colon. The colon between the predicate operand name and the source
 // operand name list is mandatory even if there is no predicate operand name.
 // The colon between the source and destination operand name lists is only
 // required if there is a destination operand list. An opcode name is required
 // to be unique. An opcode that would otherwise be inherited can be deleted
 // from the derived slot. This means that a derived slot isn't necessarily a
 // true superset of the base slot.

 opcode_spec
   : name=IDENT
     (
         '=' deleted=DELETE
       | size_spec? '{' operand_spec '}' (',' opcode_attribute_list)?
       | '=' overridden=OVERRIDE ',' opcode_attribute_list
     )
   ;


 operand_spec
   : opcode_operands
   | opcode_operands_list
   ;

 opcode_operands_list
   : '(' opcode_operands ')' (',' '(' opcode_operands ')' )*
   ;

 opcode_operands
   : pred=IDENT? (':' source=ident_list? ( ':' dest_list? )? )?
   ;

 // Destination operands may include a latency.

 dest_list
   : dest_operand (',' dest_operand)*
   ;

 dest_operand
   : dest=IDENT ( '(' (expression | wildcard='*' ) ')' )?
   ;

 // An opcode attribute list is a comma separated list with at least one member.

 opcode_attribute_list
   :  opcode_attribute (',' opcode_attribute)*
   ;

 // An opcode attribute is either a disassembly specifier or a semfunc specifier.

 opcode_attribute
   : disasm_spec | semfunc_spec | resource_spec | instruction_attribute_spec
   ;

 // The disassembly specifier lists a sequence of format strings. Each formatted
 // string is printed within a field of the width and justification specified in
 // the global "disasm widths" declaration. If no widths are specified, or fewer
 // widths are specified than there are format strings, the "extra" formatted
 // strings are concatenated with no explicit width or justification applied.
 disasm_spec
   : DISASM ':' STRING_LITERAL ( ',' STRING_LITERAL )*
   ;

 // The semantic function specifier lists a sequence of strings that in C++ can
 // be assigned to a C++ callables with signature void(Instruction *). These
 // will be used when dispatching the instruction. There will be one string
 // for the instruction itself, plus one for each child instruction.
 // E.g.,
 // Given the following function definitions:
 //
 // void MyCFunction(const Instruction *);
 // void MyOtherFcn(int num_regs, Instruction *);
 // void MyThirdFcn(Instruction *, int width);
 //
 // The strings should be:
 //
 // "&MyCFunction"
 // "absl::bind_front(&MyOtherFcn, /*num_regs*/ 8)"
 // "std::bind(&MyThirdFcn, std::_1, /*width*/ 32)"
 //
 semfunc_spec
   : SEMFUNC ':' STRING_LITERAL ( ',' STRING_LITERAL )*
   ;

 // The resource specifier lists the resource uses of the instruction.
 resource_spec
   : RESOURCES ':' resource_details
   ;

 instruction_attribute_spec
   : ATTRIBUTES ':' instruction_attribute_list
   ;

 resource_details
   : '{' use_list=resource_item_list?
      (':' acquire_list=resource_item_list?
      (':' hold_list=resource_item_list? )? )? '}'
   | ident
   ;

 resource_item_list
   : resource_item (',' resource_item)*
   ;

 // The resource will be acquired from begin_cycle to end_cycle. If omitted,
 // end_cycle is the result latency of the instruction. If omitted, begin_cycle
 // is cycle 0 (when the instruction issues).
 // Examples:
 // x[1..3]: x is acquired starting the cycle after issue through cycle 3.
 // x[..3]:  x is acquired starting at issue through cycle 3.
 // x[] or x: x is acquired starting at issue through the instruction latency.
 // x[2]: x is acquired starting at cycle 2 through the instruction latency.

 resource_item
   : name=IDENT
         ('[' (begin_cycle=expression)? ('..' end_cycle=expression? )? ']')?
   ;

 // Instruction attributes are a list of attribute names that are assigned
 // a value such as: { priv=0, branch=1.. } etc. Values can be omitted, in
 // which case the value is 1. Attributes that are not named are implicitly
 // defined to have value 0. All attribute names in an isa are listed in an
 // enum class and are used as the index into the instruction attribute
 // array.

 instruction_attribute_list
   : '{' instruction_attribute (',' instruction_attribute)* '}'
   ;

 instruction_attribute
   : IDENT ('=' expression)?
   ;

 // Comma separated list of identifiers.

 ident_list
   : IDENT (',' IDENT)*
   ;

 // Don't have to exclude all the reserved words from the permissible namespace
 // identifiers, just the obvious C++ ones.

 namespace_ident
   : IDENT | 'latency' | 'size' | 'includes' | 'isa' | 'bundle'
   | 'bundles' | 'slot' | 'slots' | 'opcode' | 'opcodes' | 'disasm' | 'semfunc'
   ;

 ident
   : IDENT
   ;

 // Lexer specification

 // Reserved words.
 ATTRIBUTES : 'attributes';
 BUNDLE : 'bundle';
 BUNDLES : 'bundles';
 DEFAULT : 'default';
 DELETE : 'delete';
 DISASM : 'disasm';
 WIDTHS : 'widths';
 SIZE : 'size';
 INCLUDE : '#include';
 INCLUDE_FILES : 'includes';
 INT : 'int';
 ISA : 'isa';
 LATENCY : 'latency';
 OPCODE : 'opcode';
 OPCODES : 'opcodes';
 OVERRIDE : 'override';
 NAMESPACE : 'namespace';
 RESOURCES: 'resources';
 SEMFUNC : 'semfunc';
 SLOT : 'slot';
 SLOTS : 'slots';
 TEMPLATE : 'template';

 // Other tokens.
 STRING_LITERAL : UNTERMINATED_STRING_LITERAL '"';
 UNTERMINATED_STRING_LITERAL : '"' (~["\\\r\n] | '\\' (. | EOF))*;
 IDENT : [_a-zA-Z][_a-zA-Z0-9]*;
 NUMBER: HEX_NUMBER | OCT_NUMBER | DEC_NUMBER | BIN_NUMBER;
 HEX_NUMBER: '0x' HEX_DIGIT (HEX_DIGIT | '\'')*;
 HEX_DIGIT: [0-9a-fA-F];
 OCT_NUMBER: '0'(OCT_DIGIT | '\'')*;
 OCT_DIGIT: [0-7];
 DEC_NUMBER: ('0' | [1-9] ([0-9] | '\'')*);
 BIN_NUMBER: '0b' [0-1] ([0-1] | '\'')*;
 DOTDOT : '..' ;

 BLOCK_COMMENT : '/*' .*? '*/' -> channel(HIDDEN);
 LINE_COMMENT : '//' ~[\n\r]* -> channel(HIDDEN);
 WS : [ \t\r\n] -> channel(HIDDEN) ;
	// Copyright 2023 Google LLC
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// https://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.


	// This grammar is used to specify the structure of the instruction decoder and
	// is used by the parser to generate the encoding independent part of the basic
	// instruction decoder. This grammar does not addess the actual encoding of the
	// instruction as that may be expressed in multiple different forms, such as
	// traditional binary, protobuff, etc. The code generated from this grammar
	// will declare virtual methods to obtain opcodes and operands from the
	// encoding that need to be overridden with methods cognizant of the actual
	// instruction encoding used.
	//
	// The grammar specifies the instruction word structure of an ISA, i.e.,
	// the grouping and structure of instruction words. At its simplest, the ISA
	// consists of a single slot, where any opcode is valid. This is the case for
	// most traditional architectures, where the ISA is really sequential (though
	// the implementation may do parallel issue), such as the x86 and Arm ISAs.
	//
	// A VLIW based ISA supports specifying multiple instructions in a large
	// instruction word, or bundle, that will be issued in parallel. Each
	// instruction within the large instruction word occupies a slot. In the case
	// of a binary instruction encoding, a slot refers to a specific bit range
	// (or bit ranges if the slot is replicated) within the instruction word. In
	// case of a protobuf based encoding, a slot refers to one or more message
	// instances. The individual slots within a bundle may support the same set of
	// opcodes, or the opcodes may be restricted by the slot instance. The latter
	// allows for different instruction slots to have different layout and widths,
	// optimizing for the specific opcodes (e.g., load/store vs alu) assigned to
	// each slot.
	//
	// A yet more complex ISA may divide a top level bundle into more than one
	// "sub-bundles". The idea here is that while the top level bundle is fetched
	// and "issued" as a unit, the sub-bundles are then separated and issued
	// separately, possibly in different cycles.
	//
	// The top level of the grammar is the specification of the ISA name and the
	// name of the class that provides access to the opcode that is being decoded.
	// The details of that "encoding" class is not used by the generated code,
	// instead a pointer to that class is passed to pure virtual methods that
	// the overall decoder will have to provide overriding implementations for
	// to have a working decoder.
	//
	// The ISA is then broken down into one or more bundles and/or slots. A bundle
	// typically contains a group of slots that correspond to the set of
	// instructions that are fetched and issued together as part of a long
	// instruction word. However, a bundle may also contain other bundles that
	// are issued as separate groups of instructions.
	//
	// Each slot contains one or more opcodes that represent the set of valid
	// instructions for that slot. Since the same opcode may be valid in multiple
	// slots, it is possible to define slots that aren't used directly in any
	// bundle, but are used as "base classes" for those that are, allowing common
	// opcodes to be factored out for ease of expression and maintenance.
	//
	// An opcode represents an instruction and contains an optional predicate
	// operand name, an optional list of source operand names, and an optional
	// list of destination operand names. The opcode name is used to generate an
	// enumeration type used by the decoder. The operand names are used to
	// declare virtual getter methods in the slot class for creating source and
	// destination operands. The operand names are intended to correspond to
	// specific operand fields in the instruction encoding.

	grammar InstructionSet;

	top_level
	: declaration* EOF
	;

	include_top_level
	: declaration* EOF
	;

	declaration
	: include_file
	\| include_file_list
	\| isa_declaration
	\| bundle_declaration
	\| slot_declaration
	\| disasm_widths
	\| constant_def
	;

	// The include_file_list lists files to include in the generated source. This
	// may be specified at the global scope, or within each slot. Slot local Include
	// files are only added to the generated code if that slot is used. This is
	// to make it possible to avoid adding include files that will not be used
	// in the final isa decoder. If there are multiple global include file lists,
	// their content are merged.

	include_file_list
	: INCLUDE_FILES '{' include_file* '}'
	;

	constant_def
	: template_parameter_type ident '=' expression ';'
	;

	// This rule specifies the disassembler field widths and alignments. A
	// diasassembly string is specified by a number of fragments. Typically an
	// opcode fragment followed by a fragment for the operands. This declaration
	// specifies the field width for each fragment in order left to right, and
	// whether the fragment is left justified (negative number) or right
	// justified (positive number) within that field.
	disasm_widths
	: DISASM WIDTHS '=' '{' (expression (',' expression) *)? '}' ';'
	;

	// This rule defines the name of an ISA description as well as the
	// name of the type that wraps the encoding of the instruction. The ISA instance
	// contains either a list of instruction slots or a list of instruction bundles.
	// There is either one ISA description, or if there are more than one, only one
	// can be selected for code generation.
	isa_declaration
	: ISA instruction_set_name=IDENT '{' namespace_decl (bundle_list \| slot_list) '}'
	;

	// The namespace_decl rule is used to specify which namespace in which to
	// generate the code for the isa.
	namespace_decl
	: NAMESPACE namespace_ident ('::' namespace_ident) * ';'
	;

	// Mactches #include "<path to file>".

	include_file
	: INCLUDE STRING_LITERAL
	;

	// A bundle_declaration has a name and specifies the set of bundles and/or
	// slots contained within. At least one slot or bundle has to be specified.

	bundle_declaration
	: BUNDLE bundle_name=IDENT '{' bundle_list? slot_list? '}'
	;

	// A bundle list is a non-empty list of bundle identifiers

	bundle_list
	: BUNDLES '{' (bundle_spec ';')* ','? '}'
	;

	bundle_spec
	: IDENT
	;

	// A slot list is a non-empty list of slot specifiers.

	slot_list
	: SLOTS '{' (slot_spec ';')* ','? '}'
	;

	// A slot specifier is a slot name with an optional range specification
	// to specify which instances are being used when the slot may occur multiple
	// times in a bundle or across multiple bundles. See below for slot declaration.

	slot_spec
	: IDENT array_spec?
	;

	// The list of ranges of slot instances used.

	array_spec
	: '[' range_spec (',' range_spec)* ']'
	;

	// A single index, or range of indices.

	range_spec
	: range_start=NUMBER (DOTDOT range_end=NUMBER)?
	;

	// Declares a slot with an optional size spec ([size]) indicating that it has
	// multiple instances. It may optionally inherit from another slot. Note,
	// it is an error for a base slot that isn't used directly in the ISA (i.e.,
	// only by inheritance) to have a size specification. It is also an error
	// if not all slot instances are referenced in the isa (either at the top
	// level, from within a bundle, or in an inheritance specification).
	// A template slot may not have a size specification.

	slot_declaration
	// Template slot.
	: template_decl SLOT slot_name=IDENT
	(':' base_item_list )? '{' const_and_default_decl* opcode_list? '}'
	// Plain slot.
	\| SLOT slot_name=IDENT size_spec? (':' base_item_list )?
	'{' const_and_default_decl* opcode_list? '}'
	;

	template_decl
	: TEMPLATE '<' template_parameter_decl (',' template_parameter_decl)* '>'
	;

	template_parameter_decl
	: template_parameter_type IDENT
	;

	// Only integer valued template parameters are allowed for now.

	template_parameter_type
	: INT
	;

	// Can inherit from slots or templated slots.

	base_item_list
	: base_item (',' base_item)*
	;

	base_item
	: IDENT template_spec?
	;

	template_spec
	: '<' expression (',' expression) * '>'
	;

	// Integer literals or template parameter names are allowed.

	expression
	: negop expr=expression
	\| lhs=expression mulop rhs=expression
	\| lhs=expression addop rhs=expression
	\| func=IDENT '(' (expression (',' expression)* )? ')'
	\| '(' paren_expr=expression ')'
	\| NUMBER
	\| IDENT
	;

	negop
	: '-'
	;

	mulop
	: '*' \| '/'
	;

	addop
	: '+' \| '-'
	;

	// Number of instances.

	size_spec
	: '[' NUMBER ']'
	;

	const_and_default_decl
	: DEFAULT LATENCY '=' expression ';'
	\| DEFAULT SIZE '=' NUMBER ';'
	\| DEFAULT OPCODE '=' opcode_attribute_list ';'
	\| DEFAULT ATTRIBUTES '=' instruction_attribute_list ';'
	\| constant_def
	\| RESOURCES ident '=' resource_details ';'
	\| include_file_list
	;

	// List of opcode specifications for the slot in question.

	opcode_list
	: OPCODES '{' (opcode_spec ';')* '}'
	;

	// An opcode has a name, an optional predicate operand name, followed by
	// optional lists of source and destination operand names. Each is separated
	// by a colon. The colon between the predicate operand name and the source
	// operand name list is mandatory even if there is no predicate operand name.
	// The colon between the source and destination operand name lists is only
	// required if there is a destination operand list. An opcode name is required
	// to be unique. An opcode that would otherwise be inherited can be deleted
	// from the derived slot. This means that a derived slot isn't necessarily a
	// true superset of the base slot.

	opcode_spec
	: name=IDENT
	(
	'=' deleted=DELETE
	\| size_spec? '{' operand_spec '}' (',' opcode_attribute_list)?
	\| '=' overridden=OVERRIDE ',' opcode_attribute_list
	)
	;


	operand_spec
	: opcode_operands
	\| opcode_operands_list
	;

	opcode_operands_list
	: '(' opcode_operands ')' (',' '(' opcode_operands ')' )*
	;

	opcode_operands
	: pred=IDENT? (':' source=ident_list? ( ':' dest_list? )? )?
	;

	// Destination operands may include a latency.

	dest_list
	: dest_operand (',' dest_operand)*
	;

	dest_operand
	: dest=IDENT ( '(' (expression \| wildcard='*' ) ')' )?
	;

	// An opcode attribute list is a comma separated list with at least one member.

	opcode_attribute_list
	: opcode_attribute (',' opcode_attribute)*
	;

	// An opcode attribute is either a disassembly specifier or a semfunc specifier.

	opcode_attribute
	: disasm_spec \| semfunc_spec \| resource_spec \| instruction_attribute_spec
	;

	// The disassembly specifier lists a sequence of format strings. Each formatted
	// string is printed within a field of the width and justification specified in
	// the global "disasm widths" declaration. If no widths are specified, or fewer
	// widths are specified than there are format strings, the "extra" formatted
	// strings are concatenated with no explicit width or justification applied.
	disasm_spec
	: DISASM ':' STRING_LITERAL ( ',' STRING_LITERAL )*
	;

	// The semantic function specifier lists a sequence of strings that in C++ can
	// be assigned to a C++ callables with signature void(Instruction *). These
	// will be used when dispatching the instruction. There will be one string
	// for the instruction itself, plus one for each child instruction.
	// E.g.,
	// Given the following function definitions:
	//
	// void MyCFunction(const Instruction *);
	// void MyOtherFcn(int num_regs, Instruction *);
	// void MyThirdFcn(Instruction *, int width);
	//
	// The strings should be:
	//
	// "&MyCFunction"
	// "absl::bind_front(&MyOtherFcn, /num_regs/ 8)"
	// "std::bind(&MyThirdFcn, std::_1, /width/ 32)"
	//
	semfunc_spec
	: SEMFUNC ':' STRING_LITERAL ( ',' STRING_LITERAL )*
	;

	// The resource specifier lists the resource uses of the instruction.
	resource_spec
	: RESOURCES ':' resource_details
	;

	instruction_attribute_spec
	: ATTRIBUTES ':' instruction_attribute_list
	;

	resource_details
	: '{' use_list=resource_item_list?
	(':' acquire_list=resource_item_list?
	(':' hold_list=resource_item_list? )? )? '}'
	\| ident
	;

	resource_item_list
	: resource_item (',' resource_item)*
	;

	// The resource will be acquired from begin_cycle to end_cycle. If omitted,
	// end_cycle is the result latency of the instruction. If omitted, begin_cycle
	// is cycle 0 (when the instruction issues).
	// Examples:
	// x[1..3]: x is acquired starting the cycle after issue through cycle 3.
	// x[..3]: x is acquired starting at issue through cycle 3.
	// x[] or x: x is acquired starting at issue through the instruction latency.
	// x[2]: x is acquired starting at cycle 2 through the instruction latency.

	resource_item
	: name=IDENT
	('[' (begin_cycle=expression)? ('..' end_cycle=expression? )? ']')?
	;

	// Instruction attributes are a list of attribute names that are assigned
	// a value such as: { priv=0, branch=1.. } etc. Values can be omitted, in
	// which case the value is 1. Attributes that are not named are implicitly
	// defined to have value 0. All attribute names in an isa are listed in an
	// enum class and are used as the index into the instruction attribute
	// array.

	instruction_attribute_list
	: '{' instruction_attribute (',' instruction_attribute)* '}'
	;

	instruction_attribute
	: IDENT ('=' expression)?
	;

	// Comma separated list of identifiers.

	ident_list
	: IDENT (',' IDENT)*
	;

	// Don't have to exclude all the reserved words from the permissible namespace
	// identifiers, just the obvious C++ ones.

	namespace_ident
	: IDENT \| 'latency' \| 'size' \| 'includes' \| 'isa' \| 'bundle'
	\| 'bundles' \| 'slot' \| 'slots' \| 'opcode' \| 'opcodes' \| 'disasm' \| 'semfunc'
	;

	ident
	: IDENT
	;

	// Lexer specification

	// Reserved words.
	ATTRIBUTES : 'attributes';
	BUNDLE : 'bundle';
	BUNDLES : 'bundles';
	DEFAULT : 'default';
	DELETE : 'delete';
	DISASM : 'disasm';
	WIDTHS : 'widths';
	SIZE : 'size';
	INCLUDE : '#include';
	INCLUDE_FILES : 'includes';
	INT : 'int';
	ISA : 'isa';
	LATENCY : 'latency';
	OPCODE : 'opcode';
	OPCODES : 'opcodes';
	OVERRIDE : 'override';
	NAMESPACE : 'namespace';
	RESOURCES: 'resources';
	SEMFUNC : 'semfunc';
	SLOT : 'slot';
	SLOTS : 'slots';
	TEMPLATE : 'template';

	// Other tokens.
	STRING_LITERAL : UNTERMINATED_STRING_LITERAL '"';
	UNTERMINATED_STRING_LITERAL : '"' (~["\\\r\n] \| '\\' (. \| EOF))*;
	IDENT : [_a-zA-Z][_a-zA-Z0-9]*;
	NUMBER: HEX_NUMBER \| OCT_NUMBER \| DEC_NUMBER \| BIN_NUMBER;
	HEX_NUMBER: '0x' HEX_DIGIT (HEX_DIGIT \| '\'')*;
	HEX_DIGIT: [0-9a-fA-F];
	OCT_NUMBER: '0'(OCT_DIGIT \| '\'')*;
	OCT_DIGIT: [0-7];
	DEC_NUMBER: ('0' \| [1-9] ([0-9] \| '\'')*);
	BIN_NUMBER: '0b' [0-1] ([0-1] \| '\'')*;
	DOTDOT : '..' ;

	BLOCK_COMMENT : '/' .? '*/' -> channel(HIDDEN);
	LINE_COMMENT : '//' ~[\n\r]* -> channel(HIDDEN);
	WS : [ \t\r\n] -> channel(HIDDEN) ;