| // Copyright 2023 Google LLC |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // https://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #ifndef THIRD_PARTY_MPACT_RISCV_RISCV_RISCV_VECTOR_INSTRUCTION_HELPERS_H_ |
| #define THIRD_PARTY_MPACT_RISCV_RISCV_RISCV_VECTOR_INSTRUCTION_HELPERS_H_ |
| |
| #include <algorithm> |
| #include <cstdint> |
| #include <functional> |
| #include <limits> |
| #include <optional> |
| #include <tuple> |
| |
| #include "absl/log/log.h" |
| #include "absl/strings/str_cat.h" |
| #include "cheriot/cheriot_vector_state.h" |
| #include "mpact/sim/generic/instruction.h" |
| #include "mpact/sim/generic/type_helpers.h" |
| #include "riscv//riscv_fp_host.h" |
| #include "riscv//riscv_fp_info.h" |
| #include "riscv//riscv_register.h" |
| #include "riscv//riscv_state.h" |
| |
| namespace mpact { |
| namespace sim { |
| namespace cheriot { |
| |
| using ::mpact::sim::cheriot::CheriotVectorState; |
| using ::mpact::sim::generic::FPTypeInfo; |
| using ::mpact::sim::generic::GetInstructionSource; |
| using ::mpact::sim::generic::Instruction; |
| using ::mpact::sim::riscv::FPExceptions; |
| using ::mpact::sim::riscv::RV32VectorDestinationOperand; |
| using ::mpact::sim::riscv::RV32VectorSourceOperand; |
| using ::mpact::sim::riscv::ScopedFPStatus; |
| using ::mpact::sim::riscv::VectorLoadContext; |
| |
| // This helper function handles the case of instructions that target a vector |
| // mask. |
| // It clears the masked bit and uses the mask value in the |
| // instruction, such as carry generation from add with carry. |
| // Note that this function will modify masked bits no matter what the mask |
| // value is. |
| template <typename Vs2, typename Vs1> |
| void RiscVSetMaskBinaryVectorMaskOp(CheriotVectorState *rv_vector, |
| const Instruction *inst, |
| std::function<bool(Vs2, Vs1, bool)> op) { |
| if (rv_vector->vector_exception()) return; |
| auto *dest_op = |
| static_cast<RV32VectorDestinationOperand *>(inst->Destination(0)); |
| // Get the vector start element index and compute where to start |
| // the operation. |
| const int num_elements = rv_vector->vector_length(); |
| const int vector_index = rv_vector->vstart(); |
| // Allocate data buffer for the new register data. |
| auto *dest_db = dest_op->CopyDataBuffer(); |
| auto dest_span = dest_db->Get<uint8_t>(); |
| // Determine if it's vector-vector or vector-scalar. |
| const bool vector_scalar = inst->Source(1)->shape()[0] == 1; |
| // Get the vector mask. |
| auto *mask_op = static_cast<RV32VectorSourceOperand *>(inst->Source(2)); |
| bool vm_unmasked_bit = false; |
| if (inst->SourcesSize() > 3) { |
| vm_unmasked_bit = GetInstructionSource<bool>(inst, 3); |
| } |
| const bool mask_used = !vm_unmasked_bit; |
| auto mask_span = mask_op->GetRegister(0)->data_buffer()->Get<uint8_t>(); |
| for (int i = vector_index; i < num_elements; i++) { |
| const int mask_index = i >> 3; |
| const int mask_offset = i & 0b111; |
| const bool mask_value = ((mask_span[mask_index] >> mask_offset) & 0b1) != 0; |
| const Vs2 vs2 = GetInstructionSource<Vs2>(inst, 0, i); |
| const Vs1 vs1 = GetInstructionSource<Vs1>(inst, 1, vector_scalar ? 0 : i); |
| |
| // Clear the masked register bit. |
| dest_span[mask_index] &= ~(1 << mask_offset); |
| |
| // Mask value is used only when `vm_unmasked_bit` is 0. |
| dest_span[mask_index] |= |
| (op(vs2, vs1, mask_used & mask_value) << mask_offset); |
| } |
| // Submit the destination db . |
| dest_db->Submit(); |
| rv_vector->clear_vstart(); |
| } |
| |
| // This helper function handles the case of instructions that target a vector |
| // mask and uses the mask value in the instruction, such as carry generation |
| // from add with carry. |
| template <typename Vs2, typename Vs1> |
| void RiscVMaskBinaryVectorMaskOp(CheriotVectorState *rv_vector, |
| const Instruction *inst, |
| std::function<bool(Vs2, Vs1, bool)> op) { |
| if (rv_vector->vector_exception()) return; |
| auto *dest_op = |
| static_cast<RV32VectorDestinationOperand *>(inst->Destination(0)); |
| // Get the vector start element index and compute where to start |
| // the operation. |
| int num_elements = rv_vector->vector_length(); |
| int vector_index = rv_vector->vstart(); |
| // Allocate data buffer for the new register data. |
| auto *dest_db = dest_op->CopyDataBuffer(); |
| auto dest_span = dest_db->Get<uint8_t>(); |
| // Determine if it's vector-vector or vector-scalar. |
| bool vector_scalar = inst->Source(1)->shape()[0] == 1; |
| // Get the vector mask. |
| auto *mask_op = static_cast<RV32VectorSourceOperand *>(inst->Source(2)); |
| bool vm_unmasked_bit = false; |
| if (inst->SourcesSize() > 3) { |
| vm_unmasked_bit = GetInstructionSource<bool>(inst, 3); |
| } |
| const bool mask_used = !vm_unmasked_bit; |
| auto mask_span = mask_op->GetRegister(0)->data_buffer()->Get<uint8_t>(); |
| for (int i = vector_index; i < num_elements; i++) { |
| int mask_index = i >> 3; |
| int mask_offset = i & 0b111; |
| bool mask_value = ((mask_span[mask_index] >> mask_offset) & 0b1) != 0; |
| if (mask_used && !mask_value) { |
| continue; |
| } |
| |
| Vs2 vs2 = GetInstructionSource<Vs2>(inst, 0, i); |
| Vs1 vs1 = GetInstructionSource<Vs1>(inst, 1, vector_scalar ? 0 : i); |
| |
| // Clear the masked register bit. |
| dest_span[mask_index] &= ~(1 << mask_offset); |
| |
| // Mask value is used only when `vm_unmasked_bit` is 0. |
| dest_span[mask_index] |= |
| (op(vs2, vs1, mask_used & mask_value) << mask_offset); |
| } |
| // Submit the destination db . |
| dest_db->Submit(); |
| rv_vector->clear_vstart(); |
| } |
| |
| // This helper function handles the case of vector mask |
| // operations. |
| template <typename Vs2, typename Vs1> |
| void RiscVBinaryVectorMaskOp(CheriotVectorState *rv_vector, |
| const Instruction *inst, |
| std::function<bool(Vs2, Vs1)> op) { |
| RiscVMaskBinaryVectorMaskOp<Vs2, Vs1>( |
| rv_vector, inst, [op](Vs2 vs2, Vs1 vs1, bool mask_value) -> bool { |
| if (mask_value) { |
| return op(vs2, vs1); |
| } |
| return false; |
| }); |
| } |
| |
| // This helper function handles the case of nullary vector |
| // operations. It implements all the checking necessary for both widening and |
| // narrowing operations. |
| template <typename Vd> |
| void RiscVMaskNullaryVectorOp(CheriotVectorState *rv_vector, |
| const Instruction *inst, |
| std::function<Vd(bool)> op) { |
| if (rv_vector->vector_exception()) return; |
| int num_elements = rv_vector->vector_length(); |
| int elements_per_vector = |
| rv_vector->vector_register_byte_length() / sizeof(Vd); |
| int max_regs = (num_elements + elements_per_vector - 1) / elements_per_vector; |
| auto *dest_op = |
| static_cast<RV32VectorDestinationOperand *>(inst->Destination(0)); |
| // Verify that there are enough registers in the destination operand. |
| if (dest_op->size() < max_regs) { |
| rv_vector->set_vector_exception(); |
| LOG(ERROR) << absl::StrCat( |
| "Vector destination '", dest_op->AsString(), "' has fewer registers (", |
| dest_op->size(), ") than required by the operation (", max_regs, ")"); |
| return; |
| } |
| // There 2 types of instruction with different number of source operands. |
| // 1. inst vd, vs2, vmask (viota instruction) |
| // 2. inst vd, vmask (vid instruction) |
| RV32VectorSourceOperand *vs2_op = nullptr; |
| RV32VectorSourceOperand *mask_op = nullptr; |
| if (inst->SourcesSize() > 1) { |
| vs2_op = static_cast<RV32VectorSourceOperand *>(inst->Source(0)); |
| mask_op = static_cast<RV32VectorSourceOperand *>(inst->Source(1)); |
| } else { |
| mask_op = static_cast<RV32VectorSourceOperand *>(inst->Source(0)); |
| } |
| auto mask_span = mask_op->GetRegister(0)->data_buffer()->Get<uint8_t>(); |
| // Get the vector start element index and compute where to start |
| // the operation. |
| int vector_index = rv_vector->vstart(); |
| int start_reg = vector_index / elements_per_vector; |
| int item_index = vector_index % elements_per_vector; |
| // Iterate over the number of registers to write. |
| for (int reg = start_reg; (reg < max_regs) && (vector_index < num_elements); |
| reg++) { |
| // Allocate data buffer for the new register data. |
| auto *dest_db = dest_op->CopyDataBuffer(reg); |
| auto dest_span = dest_db->Get<Vd>(); |
| // Write data into register subject to masking. |
| int element_count = std::min(elements_per_vector, num_elements); |
| for (int i = item_index; |
| (i < element_count) && (vector_index < num_elements); i++) { |
| // Get the mask value. |
| int mask_index = vector_index >> 3; |
| int mask_offset = vector_index & 0b111; |
| bool mask_value = ((mask_span[mask_index] >> mask_offset) & 0b1) != 0; |
| bool operation_mask = mask_value; |
| // Instruction with rs2 operand checks vs2 bit value. |
| if (vs2_op != nullptr) { |
| const auto rs2_span = |
| vs2_op->GetRegister(0)->data_buffer()->Get<uint8_t>(); |
| const bool rs2_value = ((rs2_span[mask_index] >> mask_offset) & 0b1); |
| // If rs2 is set, then the operation is performed. |
| operation_mask &= rs2_value; |
| } |
| |
| auto result = op(operation_mask); |
| if (mask_value) { |
| dest_span[i] = result; |
| } |
| vector_index++; |
| } |
| // Submit the destination db . |
| dest_db->Submit(); |
| item_index = 0; |
| } |
| rv_vector->clear_vstart(); |
| } |
| |
| // This helper function handles the case of unary vector |
| // operations. It implements all the checking necessary for both widening and |
| // narrowing operations. |
| template <typename Vd, typename Vs2> |
| void RiscVUnaryVectorOp(CheriotVectorState *rv_vector, const Instruction *inst, |
| std::function<Vd(Vs2)> op) { |
| if (rv_vector->vector_exception()) return; |
| int num_elements = rv_vector->vector_length(); |
| int lmul = rv_vector->vector_length_multiplier(); |
| int sew = rv_vector->selected_element_width(); |
| int lmul_vd = lmul * sizeof(Vd) / sew; |
| int lmul_vs2 = lmul * sizeof(Vs2) / sew; |
| if (lmul_vd > 64 || lmul_vd == 0) { |
| rv_vector->set_vector_exception(); |
| LOG(ERROR) << "Illegal lmul value vd (" << lmul_vd << ")"; |
| return; |
| } |
| if (lmul_vs2 > 64 || lmul_vs2 == 0) { |
| rv_vector->set_vector_exception(); |
| LOG(ERROR) << "Illegal lmul_value vs2 (" << lmul_vs2 << ")"; |
| return; |
| } |
| int elements_per_vector = |
| rv_vector->vector_register_byte_length() / sizeof(Vd); |
| int max_regs = (num_elements + elements_per_vector - 1) / elements_per_vector; |
| auto *dest_op = |
| static_cast<RV32VectorDestinationOperand *>(inst->Destination(0)); |
| // Verify that there are enough registers in the destination operand. |
| if (dest_op->size() < max_regs) { |
| rv_vector->set_vector_exception(); |
| LOG(ERROR) << absl::StrCat( |
| "Vector destination '", dest_op->AsString(), "' has fewer registers (", |
| dest_op->size(), ") than required by the operation (", max_regs, ")"); |
| return; |
| } |
| // Get the vector mask. |
| auto *mask_op = static_cast<RV32VectorSourceOperand *>(inst->Source(1)); |
| auto mask_span = mask_op->GetRegister(0)->data_buffer()->Get<uint8_t>(); |
| // Get the vector start element index and compute where to start |
| // the operation. |
| int vector_index = rv_vector->vstart(); |
| int start_reg = vector_index / elements_per_vector; |
| int item_index = vector_index % elements_per_vector; |
| // Iterate over the number of registers to write. |
| for (int reg = start_reg; (reg < max_regs) && (vector_index < num_elements); |
| reg++) { |
| // Allocate data buffer for the new register data. |
| auto *dest_db = dest_op->CopyDataBuffer(reg); |
| auto dest_span = dest_db->Get<Vd>(); |
| // Write data into register subject to masking. |
| int element_count = std::min(elements_per_vector, num_elements); |
| for (int i = item_index; |
| (i < element_count) && (vector_index < num_elements); i++) { |
| // Get the mask value. |
| int mask_index = vector_index >> 3; |
| int mask_offset = vector_index & 0b111; |
| bool mask_value = ((mask_span[mask_index] >> mask_offset) & 0b1) != 0; |
| if (mask_value) { |
| // Compute result. |
| Vs2 vs2 = GetInstructionSource<Vs2>(inst, 0, vector_index); |
| dest_span[i] = op(vs2); |
| } |
| vector_index++; |
| } |
| // Submit the destination db . |
| dest_db->Submit(); |
| item_index = 0; |
| } |
| rv_vector->clear_vstart(); |
| } |
| |
| // This helper function handles the case of unary vector operations that set |
| // fflags. It implements all the checking necessary for both widening and |
| // narrowing operations. |
| template <typename Vd, typename Vs2> |
| void RiscVUnaryVectorOpWithFflags( |
| CheriotVectorState *rv_vector, const Instruction *inst, |
| std::function<std::tuple<Vd, uint32_t>(Vs2)> op) { |
| if (rv_vector->vector_exception()) return; |
| int num_elements = rv_vector->vector_length(); |
| int lmul = rv_vector->vector_length_multiplier(); |
| int sew = rv_vector->selected_element_width(); |
| int lmul_vd = lmul * sizeof(Vd) / sew; |
| int lmul_vs2 = lmul * sizeof(Vs2) / sew; |
| if (lmul_vd > 64 || lmul_vd == 0) { |
| rv_vector->set_vector_exception(); |
| LOG(ERROR) << "Illegal lmul value vd (" << lmul_vd << ")"; |
| return; |
| } |
| if (lmul_vs2 > 64 || lmul_vs2 == 0) { |
| rv_vector->set_vector_exception(); |
| LOG(ERROR) << "Illegal lmul_value vs2 (" << lmul_vs2 << ")"; |
| return; |
| } |
| int elements_per_vector = |
| rv_vector->vector_register_byte_length() / sizeof(Vd); |
| int max_regs = (num_elements + elements_per_vector - 1) / elements_per_vector; |
| auto *dest_op = |
| static_cast<RV32VectorDestinationOperand *>(inst->Destination(0)); |
| // Verify that there are enough registers in the destination operand. |
| if (dest_op->size() < max_regs) { |
| rv_vector->set_vector_exception(); |
| LOG(ERROR) << absl::StrCat( |
| "Vector destination '", dest_op->AsString(), "' has fewer registers (", |
| dest_op->size(), ") than required by the operation (", max_regs, ")"); |
| return; |
| } |
| // Get the vector mask. |
| auto *mask_op = static_cast<RV32VectorSourceOperand *>(inst->Source(1)); |
| auto mask_span = mask_op->GetRegister(0)->data_buffer()->Get<uint8_t>(); |
| // Get the vector start element index and compute where to start |
| // the operation. |
| int vector_index = rv_vector->vstart(); |
| int start_reg = vector_index / elements_per_vector; |
| int item_index = vector_index % elements_per_vector; |
| // Iterate over the number of registers to write. |
| uint32_t fflags = 0; |
| for (int reg = start_reg; (reg < max_regs) && (vector_index < num_elements); |
| reg++) { |
| // Allocate data buffer for the new register data. |
| auto *dest_db = dest_op->CopyDataBuffer(reg); |
| auto dest_span = dest_db->Get<Vd>(); |
| // Write data into register subject to masking. |
| int element_count = std::min(elements_per_vector, num_elements); |
| for (int i = item_index; |
| (i < element_count) && (vector_index < num_elements); i++) { |
| // Get the mask value. |
| int mask_index = vector_index >> 3; |
| int mask_offset = vector_index & 0b111; |
| bool mask_value = ((mask_span[mask_index] >> mask_offset) & 0b1) != 0; |
| if (mask_value) { |
| // Compute result. |
| Vs2 vs2 = GetInstructionSource<Vs2>(inst, 0, vector_index); |
| auto [value, flag] = op(vs2); |
| dest_span[i] = value; |
| fflags |= flag; |
| } |
| vector_index++; |
| } |
| // Submit the destination db . |
| dest_db->Submit(); |
| item_index = 0; |
| } |
| auto *flag_db = inst->Destination(1)->AllocateDataBuffer(); |
| flag_db->Set<uint32_t>(0, fflags); |
| flag_db->Submit(); |
| rv_vector->clear_vstart(); |
| } |
| |
| // This helper function handles the case of mask + two source operand vector |
| // operations. It implements all the checking necessary for both widening and |
| // narrowing operations. |
| template <typename Vd, typename Vs2, typename Vs1> |
| void RiscVMaskBinaryVectorOp( |
| CheriotVectorState *rv_vector, const Instruction *inst, |
| std::function<std::optional<Vd>(Vs2, Vs1, bool)> op) { |
| if (rv_vector->vector_exception()) return; |
| int num_elements = rv_vector->vector_length(); |
| int lmul = rv_vector->vector_length_multiplier(); |
| int sew = rv_vector->selected_element_width(); |
| int lmul_vd = lmul * sizeof(Vd) / sew; |
| int lmul_vs2 = lmul * sizeof(Vs2) / sew; |
| int lmul_vs1 = lmul * sizeof(Vs1) / sew; |
| if (lmul_vd > 64 || lmul_vs2 > 64 || lmul_vs1 > 64) { |
| rv_vector->set_vector_exception(); |
| LOG(ERROR) << "Illegal lmul value"; |
| return; |
| } |
| if (lmul_vd == 0 || lmul_vs2 == 0 || lmul_vs1 == 0) { |
| rv_vector->set_vector_exception(); |
| LOG(ERROR) << "Illegal lmul_value"; |
| return; |
| } |
| int elements_per_vector = |
| rv_vector->vector_register_byte_length() / sizeof(Vd); |
| int max_regs = (num_elements + elements_per_vector - 1) / elements_per_vector; |
| auto *dest_op = |
| static_cast<RV32VectorDestinationOperand *>(inst->Destination(0)); |
| // Verify that there are enough registers in the destination operand. |
| if (dest_op->size() < max_regs) { |
| rv_vector->set_vector_exception(); |
| LOG(ERROR) << absl::StrCat( |
| "Vector destination '", dest_op->AsString(), "' has fewer registers (", |
| dest_op->size(), ") than required by the operation (", max_regs, ")"); |
| return; |
| } |
| // Get the vector mask. |
| auto *mask_op = static_cast<RV32VectorSourceOperand *>(inst->Source(2)); |
| auto mask_span = mask_op->GetRegister(0)->data_buffer()->Get<uint8_t>(); |
| // Get the vector start element index and compute where to start |
| // the operation. |
| int vector_index = rv_vector->vstart(); |
| int start_reg = vector_index / elements_per_vector; |
| int item_index = vector_index % elements_per_vector; |
| // Determine if it's vector-vector or vector-scalar. |
| bool vector_scalar = inst->Source(1)->shape()[0] == 1; |
| // Iterate over the number of registers to write. |
| bool exception = false; |
| for (int reg = start_reg; |
| !exception && (reg < max_regs) && (vector_index < num_elements); reg++) { |
| // Allocate data buffer for the new register data. |
| auto *dest_db = dest_op->CopyDataBuffer(reg); |
| auto dest_span = dest_db->Get<Vd>(); |
| // Write data into register subject to masking. |
| int element_count = std::min(elements_per_vector, num_elements); |
| for (int i = item_index; |
| (i < element_count) && (vector_index < num_elements); i++) { |
| // Get the mask value. |
| int mask_index = vector_index >> 3; |
| int mask_offset = vector_index & 0b111; |
| bool mask_value = ((mask_span[mask_index] >> mask_offset) & 0b1) != 0; |
| // Compute result. |
| Vs2 vs2 = GetInstructionSource<Vs2>(inst, 0, vector_index); |
| Vs1 vs1 = GetInstructionSource<Vs1>(inst, 1, |
| (vector_scalar ? 0 : vector_index)); |
| auto value = op(vs2, vs1, mask_value); |
| if (value.has_value()) { |
| dest_span[i] = value.value(); |
| } else if (mask_value) { |
| // If there is no value returned, but the mask_value is true, check |
| // to see if there was an exception. |
| if (rv_vector->vector_exception()) { |
| rv_vector->set_vstart(vector_index); |
| exception = true; |
| break; |
| } |
| } |
| vector_index++; |
| } |
| // Submit the destination db . |
| dest_db->Submit(); |
| item_index = 0; |
| } |
| rv_vector->clear_vstart(); |
| } |
| |
| // This helper function handles the case of two source operand vector |
| // operations. It implements all the checking necessary for both widening and |
| // narrowing operations. |
| template <typename Vd, typename Vs2, typename Vs1> |
| void RiscVBinaryVectorOp(CheriotVectorState *rv_vector, const Instruction *inst, |
| std::function<Vd(Vs2, Vs1)> op) { |
| RiscVMaskBinaryVectorOp<Vd, Vs2, Vs1>( |
| rv_vector, inst, |
| [op](Vs2 vs2, Vs1 vs1, bool mask_value) -> std::optional<Vd> { |
| if (mask_value) { |
| return op(vs2, vs1); |
| } |
| return std::nullopt; |
| }); |
| } |
| |
| template <typename Vd, typename Vs2, typename Vs1> |
| void RiscVBinaryVectorOpWithFflags( |
| CheriotVectorState *rv_vector, const Instruction *inst, |
| std::function<std::tuple<Vd, uint32_t>(Vs2, Vs1)> op) { |
| if (rv_vector->vector_exception()) return; |
| int num_elements = rv_vector->vector_length(); |
| int lmul = rv_vector->vector_length_multiplier(); |
| int sew = rv_vector->selected_element_width(); |
| int lmul_vd = lmul * sizeof(Vd) / sew; |
| int lmul_vs2 = lmul * sizeof(Vs2) / sew; |
| int lmul_vs1 = lmul * sizeof(Vs1) / sew; |
| if (lmul_vd > 64 || lmul_vs2 > 64 || lmul_vs1 > 64) { |
| rv_vector->set_vector_exception(); |
| LOG(ERROR) << "Illegal lmul value"; |
| return; |
| } |
| if (lmul_vd == 0 || lmul_vs2 == 0 || lmul_vs1 == 0) { |
| rv_vector->set_vector_exception(); |
| LOG(ERROR) << "Illegal lmul_value"; |
| return; |
| } |
| int elements_per_vector = |
| rv_vector->vector_register_byte_length() / sizeof(Vd); |
| int max_regs = (num_elements + elements_per_vector - 1) / elements_per_vector; |
| auto *dest_op = |
| static_cast<RV32VectorDestinationOperand *>(inst->Destination(0)); |
| // Verify that there are enough registers in the destination operand. |
| if (dest_op->size() < max_regs) { |
| rv_vector->set_vector_exception(); |
| LOG(ERROR) << absl::StrCat( |
| "Vector destination '", dest_op->AsString(), "' has fewer registers (", |
| dest_op->size(), ") than required by the operation (", max_regs, ")"); |
| return; |
| } |
| // Get the vector mask. |
| auto *mask_op = static_cast<RV32VectorSourceOperand *>(inst->Source(2)); |
| auto mask_span = mask_op->GetRegister(0)->data_buffer()->Get<uint8_t>(); |
| // Get the vector start element index and compute where to start |
| // the operation. |
| int vector_index = rv_vector->vstart(); |
| int start_reg = vector_index / elements_per_vector; |
| int item_index = vector_index % elements_per_vector; |
| // Determine if it's vector-vector or vector-scalar. |
| bool vector_scalar = inst->Source(1)->shape()[0] == 1; |
| // Iterate over the number of registers to write. |
| bool exception = false; |
| uint32_t fflags = 0; |
| for (int reg = start_reg; |
| !exception && (reg < max_regs) && (vector_index < num_elements); reg++) { |
| // Allocate data buffer for the new register data. |
| auto *dest_db = dest_op->CopyDataBuffer(reg); |
| auto dest_span = dest_db->Get<Vd>(); |
| // Write data into register subject to masking. |
| int element_count = std::min(elements_per_vector, num_elements); |
| for (int i = item_index; |
| (i < element_count) && (vector_index < num_elements); i++) { |
| // Get the mask value. |
| int mask_index = vector_index >> 3; |
| int mask_offset = vector_index & 0b111; |
| bool mask_value = ((mask_span[mask_index] >> mask_offset) & 0b1) != 0; |
| // Compute result. |
| Vs2 vs2 = GetInstructionSource<Vs2>(inst, 0, vector_index); |
| Vs1 vs1 = GetInstructionSource<Vs1>(inst, 1, |
| (vector_scalar ? 0 : vector_index)); |
| if (mask_value) { |
| auto [value, flag] = op(vs2, vs1); |
| dest_span[i] = value; |
| fflags |= flag; |
| if (rv_vector->vector_exception()) { |
| rv_vector->set_vstart(vector_index); |
| exception = true; |
| break; |
| } |
| } |
| vector_index++; |
| } |
| // Submit the destination dbs. |
| dest_db->Submit(); |
| item_index = 0; |
| } |
| auto *flag_db = inst->Destination(1)->AllocateDataBuffer(); |
| flag_db->Set<uint32_t>(0, fflags); |
| flag_db->Submit(); |
| rv_vector->clear_vstart(); |
| } |
| |
| // This helper function handles three source operand vector operations. It |
| // implements all the checking necessary for both widening and narrowing |
| // operations. |
| template <typename Vd, typename Vs2, typename Vs1> |
| void RiscVTernaryVectorOp(CheriotVectorState *rv_vector, |
| const Instruction *inst, |
| std::function<Vd(Vs2, Vs1, Vd)> op) { |
| if (rv_vector->vector_exception()) return; |
| int num_elements = rv_vector->vector_length(); |
| int lmul = rv_vector->vector_length_multiplier(); |
| int sew = rv_vector->selected_element_width(); |
| int lmul_vd = lmul * sizeof(Vd) / sew; |
| int lmul_vs2 = lmul * sizeof(Vs2) / sew; |
| int lmul_vs1 = lmul * sizeof(Vs1) / sew; |
| if (lmul_vd > 64 || lmul_vs2 > 64 || lmul_vs1 > 64) { |
| rv_vector->set_vector_exception(); |
| LOG(ERROR) << "Illegal lmul value"; |
| return; |
| } |
| if (lmul_vd == 0 || lmul_vs2 == 0 || lmul_vs1 == 0) { |
| rv_vector->set_vector_exception(); |
| LOG(ERROR) << "Illegal lmul_value"; |
| return; |
| } |
| int elements_per_vector = |
| rv_vector->vector_register_byte_length() / sizeof(Vd); |
| int max_regs = (num_elements + elements_per_vector - 1) / elements_per_vector; |
| auto *dest_op = |
| static_cast<RV32VectorDestinationOperand *>(inst->Destination(0)); |
| // Verify that there are enough registers in the destination operand. |
| if (dest_op->size() < max_regs) { |
| rv_vector->set_vector_exception(); |
| LOG(ERROR) << absl::StrCat( |
| "Vector destination '", dest_op->AsString(), "' has fewer registers (", |
| dest_op->size(), ") than required by the operation (", max_regs, ")"); |
| return; |
| } |
| // Get the vector mask. |
| auto *mask_op = static_cast<RV32VectorSourceOperand *>(inst->Source(3)); |
| auto mask_span = mask_op->GetRegister(0)->data_buffer()->Get<uint8_t>(); |
| // Get the vector start element index and compute where to start |
| // the operation. |
| int vector_index = rv_vector->vstart(); |
| int start_reg = vector_index / elements_per_vector; |
| int item_index = vector_index % elements_per_vector; |
| // Determine if it's vector-vector or vector-scalar. |
| bool vector_scalar = inst->Source(1)->shape()[0] == 1; |
| // Iterate over the number of registers to write. |
| for (int reg = start_reg; (reg < max_regs) && (vector_index < num_elements); |
| reg++) { |
| // Allocate data buffer for the new register data. |
| auto *dest_db = dest_op->CopyDataBuffer(reg); |
| auto dest_span = dest_db->Get<Vd>(); |
| // Write data into register subject to masking. |
| int element_count = std::min(elements_per_vector, num_elements); |
| for (int i = item_index; |
| (i < element_count) && (vector_index < num_elements); i++) { |
| // Get the mask value. |
| int mask_index = vector_index >> 3; |
| int mask_offset = vector_index & 0b111; |
| bool mask_value = ((mask_span[mask_index] >> mask_offset) & 0b1) != 0; |
| // Compute result. |
| Vs2 vs2 = GetInstructionSource<Vs2>(inst, 0, vector_index); |
| Vs1 vs1 = GetInstructionSource<Vs1>(inst, 1, |
| (vector_scalar ? 0 : vector_index)); |
| Vd vd = GetInstructionSource<Vd>(inst, 2, vector_index); |
| if (mask_value) { |
| dest_span[i] = op(vs2, vs1, vd); |
| } |
| vector_index++; |
| } |
| // Submit the destination db . |
| dest_db->Submit(); |
| item_index = 0; |
| } |
| rv_vector->clear_vstart(); |
| } |
| |
| // The reduction instructions take Vs1[0], and all the elements (subject to |
| // masking) from Vs2 and apply the reduction operation to produce a single |
| // element that is written to Vd[0]. |
| template <typename Vd, typename Vs2, typename Vs1> |
| void RiscVBinaryReductionVectorOp(CheriotVectorState *rv_vector, |
| const Instruction *inst, |
| std::function<Vd(Vd, Vs2)> op) { |
| if (rv_vector->vector_exception()) return; |
| if (rv_vector->vstart()) { |
| rv_vector->vector_exception(); |
| return; |
| } |
| int sew = rv_vector->selected_element_width(); |
| int lmul = rv_vector->vector_length_multiplier(); |
| int lmul_vd = lmul * sizeof(Vd) / sew; |
| int lmul_vs2 = lmul * sizeof(Vs2) / sew; |
| int lmul_vs1 = lmul * sizeof(Vs1) / sew; |
| if (lmul_vd > 64 || lmul_vs2 > 64 || lmul_vs1 > 64) { |
| rv_vector->set_vector_exception(); |
| LOG(ERROR) << "Illegal lmul value"; |
| return; |
| } |
| if (lmul_vd == 0 || lmul_vs2 == 0 || lmul_vs1 == 0) { |
| rv_vector->set_vector_exception(); |
| LOG(ERROR) << "Illegal lmul_value"; |
| return; |
| } |
| int num_elements = rv_vector->vector_length(); |
| // Get the vector mask. |
| auto *mask_op = static_cast<RV32VectorSourceOperand *>(inst->Source(2)); |
| auto mask_span = mask_op->GetRegister(0)->data_buffer()->Get<uint8_t>(); |
| Vd accumulator = |
| static_cast<Vd>(generic::GetInstructionSource<Vs1>(inst, 1, 0)); |
| for (int i = 0; i < num_elements; i++) { |
| int mask_index = i >> 3; |
| int mask_offset = i & 0b111; |
| bool mask_value = (mask_span[mask_index] >> mask_offset) & 0b1; |
| if (mask_value) { |
| accumulator = |
| op(accumulator, generic::GetInstructionSource<Vs2>(inst, 0, i)); |
| } |
| } |
| auto *dest_op = |
| static_cast<RV32VectorDestinationOperand *>(inst->Destination(0)); |
| auto dest_db = dest_op->CopyDataBuffer(); |
| dest_db->Set<Vd>(0, accumulator); |
| dest_db->Submit(); |
| rv_vector->clear_vstart(); |
| } |
| |
| template <typename T> |
| T GetRoundingBit(int rounding_mode, T rounding_bits, int size) { |
| switch (rounding_mode) { |
| case 0: // Round-to-nearest-up (add +0.5 lsb) |
| if (size < 2) return 0; |
| return (rounding_bits >> (size - 2)) & 0b1; |
| case 1: { // Round-to-nearest-event |
| T v_d_minus_1 = (size < 2) ? 0 : (rounding_bits >> (size - 2)) & 0b1; |
| T v_d = (size == 0) ? 0 : (rounding_bits >> (size - 1)) & 0b1; |
| T v_d_minus_2_0 = (size < 3) |
| ? 0 |
| : (rounding_bits & ~(std::numeric_limits<T>::max() |
| << (size - 2))) != 0; |
| return v_d_minus_1 & (v_d_minus_2_0 | v_d); |
| } |
| case 2: // Round-down (truncate). |
| return 0; |
| case 3: { // Round-to-odd. |
| T v_d_minus_1_0 = (size < 2) |
| ? 0 |
| : (rounding_bits & ~(std::numeric_limits<T>::max() |
| << (size - 1))) != 0; |
| T v_d = (rounding_bits >> (size - 1)) & 0b1; |
| return (!v_d) & v_d_minus_1_0; |
| } |
| default: |
| LOG(ERROR) << "GetRoundingBit: Invalid value for rounding mode"; |
| break; |
| } |
| return 0; |
| } |
| |
| template <typename T> |
| T RoundOff(CheriotVectorState *rv_vector, T value, int size) { |
| auto rm = rv_vector->vxrm(); |
| auto ret = (value >> size) + GetRoundingBit<T>(rm, value, size + 1); |
| return ret; |
| } |
| |
| } // namespace cheriot |
| } // namespace sim |
| } // namespace mpact |
| |
| #endif // THIRD_PARTY_MPACT_RISCV_RISCV_RISCV_VECTOR_INSTRUCTION_HELPERS_H_ |