blob: b7640e01724782e1e01782db0bc82f05b6176e37 [file] [log] [blame]
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef THIRD_PARTY_MPACT_RISCV_RISCV_RISCV_VECTOR_INSTRUCTION_HELPERS_H_
#define THIRD_PARTY_MPACT_RISCV_RISCV_RISCV_VECTOR_INSTRUCTION_HELPERS_H_
#include <algorithm>
#include <cstdint>
#include <functional>
#include <limits>
#include <optional>
#include <tuple>
#include "absl/log/log.h"
#include "absl/strings/str_cat.h"
#include "cheriot/cheriot_vector_state.h"
#include "mpact/sim/generic/instruction.h"
#include "mpact/sim/generic/type_helpers.h"
#include "riscv//riscv_fp_host.h"
#include "riscv//riscv_fp_info.h"
#include "riscv//riscv_register.h"
#include "riscv//riscv_state.h"
namespace mpact {
namespace sim {
namespace cheriot {
using ::mpact::sim::cheriot::CheriotVectorState;
using ::mpact::sim::generic::FPTypeInfo;
using ::mpact::sim::generic::GetInstructionSource;
using ::mpact::sim::generic::Instruction;
using ::mpact::sim::riscv::FPExceptions;
using ::mpact::sim::riscv::RV32VectorDestinationOperand;
using ::mpact::sim::riscv::RV32VectorSourceOperand;
using ::mpact::sim::riscv::ScopedFPStatus;
using ::mpact::sim::riscv::VectorLoadContext;
// This helper function handles the case of instructions that target a vector
// mask.
// It clears the masked bit and uses the mask value in the
// instruction, such as carry generation from add with carry.
// Note that this function will modify masked bits no matter what the mask
// value is.
template <typename Vs2, typename Vs1>
void RiscVSetMaskBinaryVectorMaskOp(CheriotVectorState *rv_vector,
const Instruction *inst,
std::function<bool(Vs2, Vs1, bool)> op) {
if (rv_vector->vector_exception()) return;
auto *dest_op =
static_cast<RV32VectorDestinationOperand *>(inst->Destination(0));
// Get the vector start element index and compute where to start
// the operation.
const int num_elements = rv_vector->vector_length();
const int vector_index = rv_vector->vstart();
// Allocate data buffer for the new register data.
auto *dest_db = dest_op->CopyDataBuffer();
auto dest_span = dest_db->Get<uint8_t>();
// Determine if it's vector-vector or vector-scalar.
const bool vector_scalar = inst->Source(1)->shape()[0] == 1;
// Get the vector mask.
auto *mask_op = static_cast<RV32VectorSourceOperand *>(inst->Source(2));
bool vm_unmasked_bit = false;
if (inst->SourcesSize() > 3) {
vm_unmasked_bit = GetInstructionSource<bool>(inst, 3);
}
const bool mask_used = !vm_unmasked_bit;
auto mask_span = mask_op->GetRegister(0)->data_buffer()->Get<uint8_t>();
for (int i = vector_index; i < num_elements; i++) {
const int mask_index = i >> 3;
const int mask_offset = i & 0b111;
const bool mask_value = ((mask_span[mask_index] >> mask_offset) & 0b1) != 0;
const Vs2 vs2 = GetInstructionSource<Vs2>(inst, 0, i);
const Vs1 vs1 = GetInstructionSource<Vs1>(inst, 1, vector_scalar ? 0 : i);
// Clear the masked register bit.
dest_span[mask_index] &= ~(1 << mask_offset);
// Mask value is used only when `vm_unmasked_bit` is 0.
dest_span[mask_index] |=
(op(vs2, vs1, mask_used & mask_value) << mask_offset);
}
// Submit the destination db .
dest_db->Submit();
rv_vector->clear_vstart();
}
// This helper function handles the case of instructions that target a vector
// mask and uses the mask value in the instruction, such as carry generation
// from add with carry.
template <typename Vs2, typename Vs1>
void RiscVMaskBinaryVectorMaskOp(CheriotVectorState *rv_vector,
const Instruction *inst,
std::function<bool(Vs2, Vs1, bool)> op) {
if (rv_vector->vector_exception()) return;
auto *dest_op =
static_cast<RV32VectorDestinationOperand *>(inst->Destination(0));
// Get the vector start element index and compute where to start
// the operation.
int num_elements = rv_vector->vector_length();
int vector_index = rv_vector->vstart();
// Allocate data buffer for the new register data.
auto *dest_db = dest_op->CopyDataBuffer();
auto dest_span = dest_db->Get<uint8_t>();
// Determine if it's vector-vector or vector-scalar.
bool vector_scalar = inst->Source(1)->shape()[0] == 1;
// Get the vector mask.
auto *mask_op = static_cast<RV32VectorSourceOperand *>(inst->Source(2));
bool vm_unmasked_bit = false;
if (inst->SourcesSize() > 3) {
vm_unmasked_bit = GetInstructionSource<bool>(inst, 3);
}
const bool mask_used = !vm_unmasked_bit;
auto mask_span = mask_op->GetRegister(0)->data_buffer()->Get<uint8_t>();
for (int i = vector_index; i < num_elements; i++) {
int mask_index = i >> 3;
int mask_offset = i & 0b111;
bool mask_value = ((mask_span[mask_index] >> mask_offset) & 0b1) != 0;
if (mask_used && !mask_value) {
continue;
}
Vs2 vs2 = GetInstructionSource<Vs2>(inst, 0, i);
Vs1 vs1 = GetInstructionSource<Vs1>(inst, 1, vector_scalar ? 0 : i);
// Clear the masked register bit.
dest_span[mask_index] &= ~(1 << mask_offset);
// Mask value is used only when `vm_unmasked_bit` is 0.
dest_span[mask_index] |=
(op(vs2, vs1, mask_used & mask_value) << mask_offset);
}
// Submit the destination db .
dest_db->Submit();
rv_vector->clear_vstart();
}
// This helper function handles the case of vector mask
// operations.
template <typename Vs2, typename Vs1>
void RiscVBinaryVectorMaskOp(CheriotVectorState *rv_vector,
const Instruction *inst,
std::function<bool(Vs2, Vs1)> op) {
RiscVMaskBinaryVectorMaskOp<Vs2, Vs1>(
rv_vector, inst, [op](Vs2 vs2, Vs1 vs1, bool mask_value) -> bool {
if (mask_value) {
return op(vs2, vs1);
}
return false;
});
}
// This helper function handles the case of nullary vector
// operations. It implements all the checking necessary for both widening and
// narrowing operations.
template <typename Vd>
void RiscVMaskNullaryVectorOp(CheriotVectorState *rv_vector,
const Instruction *inst,
std::function<Vd(bool)> op) {
if (rv_vector->vector_exception()) return;
int num_elements = rv_vector->vector_length();
int elements_per_vector =
rv_vector->vector_register_byte_length() / sizeof(Vd);
int max_regs = (num_elements + elements_per_vector - 1) / elements_per_vector;
auto *dest_op =
static_cast<RV32VectorDestinationOperand *>(inst->Destination(0));
// Verify that there are enough registers in the destination operand.
if (dest_op->size() < max_regs) {
rv_vector->set_vector_exception();
LOG(ERROR) << absl::StrCat(
"Vector destination '", dest_op->AsString(), "' has fewer registers (",
dest_op->size(), ") than required by the operation (", max_regs, ")");
return;
}
// There 2 types of instruction with different number of source operands.
// 1. inst vd, vs2, vmask (viota instruction)
// 2. inst vd, vmask (vid instruction)
RV32VectorSourceOperand *vs2_op = nullptr;
RV32VectorSourceOperand *mask_op = nullptr;
if (inst->SourcesSize() > 1) {
vs2_op = static_cast<RV32VectorSourceOperand *>(inst->Source(0));
mask_op = static_cast<RV32VectorSourceOperand *>(inst->Source(1));
} else {
mask_op = static_cast<RV32VectorSourceOperand *>(inst->Source(0));
}
auto mask_span = mask_op->GetRegister(0)->data_buffer()->Get<uint8_t>();
// Get the vector start element index and compute where to start
// the operation.
int vector_index = rv_vector->vstart();
int start_reg = vector_index / elements_per_vector;
int item_index = vector_index % elements_per_vector;
// Iterate over the number of registers to write.
for (int reg = start_reg; (reg < max_regs) && (vector_index < num_elements);
reg++) {
// Allocate data buffer for the new register data.
auto *dest_db = dest_op->CopyDataBuffer(reg);
auto dest_span = dest_db->Get<Vd>();
// Write data into register subject to masking.
int element_count = std::min(elements_per_vector, num_elements);
for (int i = item_index;
(i < element_count) && (vector_index < num_elements); i++) {
// Get the mask value.
int mask_index = vector_index >> 3;
int mask_offset = vector_index & 0b111;
bool mask_value = ((mask_span[mask_index] >> mask_offset) & 0b1) != 0;
bool operation_mask = mask_value;
// Instruction with rs2 operand checks vs2 bit value.
if (vs2_op != nullptr) {
const auto rs2_span =
vs2_op->GetRegister(0)->data_buffer()->Get<uint8_t>();
const bool rs2_value = ((rs2_span[mask_index] >> mask_offset) & 0b1);
// If rs2 is set, then the operation is performed.
operation_mask &= rs2_value;
}
auto result = op(operation_mask);
if (mask_value) {
dest_span[i] = result;
}
vector_index++;
}
// Submit the destination db .
dest_db->Submit();
item_index = 0;
}
rv_vector->clear_vstart();
}
// This helper function handles the case of unary vector
// operations. It implements all the checking necessary for both widening and
// narrowing operations.
template <typename Vd, typename Vs2>
void RiscVUnaryVectorOp(CheriotVectorState *rv_vector, const Instruction *inst,
std::function<Vd(Vs2)> op) {
if (rv_vector->vector_exception()) return;
int num_elements = rv_vector->vector_length();
int lmul = rv_vector->vector_length_multiplier();
int sew = rv_vector->selected_element_width();
int lmul_vd = lmul * sizeof(Vd) / sew;
int lmul_vs2 = lmul * sizeof(Vs2) / sew;
if (lmul_vd > 64 || lmul_vd == 0) {
rv_vector->set_vector_exception();
LOG(ERROR) << "Illegal lmul value vd (" << lmul_vd << ")";
return;
}
if (lmul_vs2 > 64 || lmul_vs2 == 0) {
rv_vector->set_vector_exception();
LOG(ERROR) << "Illegal lmul_value vs2 (" << lmul_vs2 << ")";
return;
}
int elements_per_vector =
rv_vector->vector_register_byte_length() / sizeof(Vd);
int max_regs = (num_elements + elements_per_vector - 1) / elements_per_vector;
auto *dest_op =
static_cast<RV32VectorDestinationOperand *>(inst->Destination(0));
// Verify that there are enough registers in the destination operand.
if (dest_op->size() < max_regs) {
rv_vector->set_vector_exception();
LOG(ERROR) << absl::StrCat(
"Vector destination '", dest_op->AsString(), "' has fewer registers (",
dest_op->size(), ") than required by the operation (", max_regs, ")");
return;
}
// Get the vector mask.
auto *mask_op = static_cast<RV32VectorSourceOperand *>(inst->Source(1));
auto mask_span = mask_op->GetRegister(0)->data_buffer()->Get<uint8_t>();
// Get the vector start element index and compute where to start
// the operation.
int vector_index = rv_vector->vstart();
int start_reg = vector_index / elements_per_vector;
int item_index = vector_index % elements_per_vector;
// Iterate over the number of registers to write.
for (int reg = start_reg; (reg < max_regs) && (vector_index < num_elements);
reg++) {
// Allocate data buffer for the new register data.
auto *dest_db = dest_op->CopyDataBuffer(reg);
auto dest_span = dest_db->Get<Vd>();
// Write data into register subject to masking.
int element_count = std::min(elements_per_vector, num_elements);
for (int i = item_index;
(i < element_count) && (vector_index < num_elements); i++) {
// Get the mask value.
int mask_index = vector_index >> 3;
int mask_offset = vector_index & 0b111;
bool mask_value = ((mask_span[mask_index] >> mask_offset) & 0b1) != 0;
if (mask_value) {
// Compute result.
Vs2 vs2 = GetInstructionSource<Vs2>(inst, 0, vector_index);
dest_span[i] = op(vs2);
}
vector_index++;
}
// Submit the destination db .
dest_db->Submit();
item_index = 0;
}
rv_vector->clear_vstart();
}
// This helper function handles the case of unary vector operations that set
// fflags. It implements all the checking necessary for both widening and
// narrowing operations.
template <typename Vd, typename Vs2>
void RiscVUnaryVectorOpWithFflags(
CheriotVectorState *rv_vector, const Instruction *inst,
std::function<std::tuple<Vd, uint32_t>(Vs2)> op) {
if (rv_vector->vector_exception()) return;
int num_elements = rv_vector->vector_length();
int lmul = rv_vector->vector_length_multiplier();
int sew = rv_vector->selected_element_width();
int lmul_vd = lmul * sizeof(Vd) / sew;
int lmul_vs2 = lmul * sizeof(Vs2) / sew;
if (lmul_vd > 64 || lmul_vd == 0) {
rv_vector->set_vector_exception();
LOG(ERROR) << "Illegal lmul value vd (" << lmul_vd << ")";
return;
}
if (lmul_vs2 > 64 || lmul_vs2 == 0) {
rv_vector->set_vector_exception();
LOG(ERROR) << "Illegal lmul_value vs2 (" << lmul_vs2 << ")";
return;
}
int elements_per_vector =
rv_vector->vector_register_byte_length() / sizeof(Vd);
int max_regs = (num_elements + elements_per_vector - 1) / elements_per_vector;
auto *dest_op =
static_cast<RV32VectorDestinationOperand *>(inst->Destination(0));
// Verify that there are enough registers in the destination operand.
if (dest_op->size() < max_regs) {
rv_vector->set_vector_exception();
LOG(ERROR) << absl::StrCat(
"Vector destination '", dest_op->AsString(), "' has fewer registers (",
dest_op->size(), ") than required by the operation (", max_regs, ")");
return;
}
// Get the vector mask.
auto *mask_op = static_cast<RV32VectorSourceOperand *>(inst->Source(1));
auto mask_span = mask_op->GetRegister(0)->data_buffer()->Get<uint8_t>();
// Get the vector start element index and compute where to start
// the operation.
int vector_index = rv_vector->vstart();
int start_reg = vector_index / elements_per_vector;
int item_index = vector_index % elements_per_vector;
// Iterate over the number of registers to write.
uint32_t fflags = 0;
for (int reg = start_reg; (reg < max_regs) && (vector_index < num_elements);
reg++) {
// Allocate data buffer for the new register data.
auto *dest_db = dest_op->CopyDataBuffer(reg);
auto dest_span = dest_db->Get<Vd>();
// Write data into register subject to masking.
int element_count = std::min(elements_per_vector, num_elements);
for (int i = item_index;
(i < element_count) && (vector_index < num_elements); i++) {
// Get the mask value.
int mask_index = vector_index >> 3;
int mask_offset = vector_index & 0b111;
bool mask_value = ((mask_span[mask_index] >> mask_offset) & 0b1) != 0;
if (mask_value) {
// Compute result.
Vs2 vs2 = GetInstructionSource<Vs2>(inst, 0, vector_index);
auto [value, flag] = op(vs2);
dest_span[i] = value;
fflags |= flag;
}
vector_index++;
}
// Submit the destination db .
dest_db->Submit();
item_index = 0;
}
auto *flag_db = inst->Destination(1)->AllocateDataBuffer();
flag_db->Set<uint32_t>(0, fflags);
flag_db->Submit();
rv_vector->clear_vstart();
}
// This helper function handles the case of mask + two source operand vector
// operations. It implements all the checking necessary for both widening and
// narrowing operations.
template <typename Vd, typename Vs2, typename Vs1>
void RiscVMaskBinaryVectorOp(
CheriotVectorState *rv_vector, const Instruction *inst,
std::function<std::optional<Vd>(Vs2, Vs1, bool)> op) {
if (rv_vector->vector_exception()) return;
int num_elements = rv_vector->vector_length();
int lmul = rv_vector->vector_length_multiplier();
int sew = rv_vector->selected_element_width();
int lmul_vd = lmul * sizeof(Vd) / sew;
int lmul_vs2 = lmul * sizeof(Vs2) / sew;
int lmul_vs1 = lmul * sizeof(Vs1) / sew;
if (lmul_vd > 64 || lmul_vs2 > 64 || lmul_vs1 > 64) {
rv_vector->set_vector_exception();
LOG(ERROR) << "Illegal lmul value";
return;
}
if (lmul_vd == 0 || lmul_vs2 == 0 || lmul_vs1 == 0) {
rv_vector->set_vector_exception();
LOG(ERROR) << "Illegal lmul_value";
return;
}
int elements_per_vector =
rv_vector->vector_register_byte_length() / sizeof(Vd);
int max_regs = (num_elements + elements_per_vector - 1) / elements_per_vector;
auto *dest_op =
static_cast<RV32VectorDestinationOperand *>(inst->Destination(0));
// Verify that there are enough registers in the destination operand.
if (dest_op->size() < max_regs) {
rv_vector->set_vector_exception();
LOG(ERROR) << absl::StrCat(
"Vector destination '", dest_op->AsString(), "' has fewer registers (",
dest_op->size(), ") than required by the operation (", max_regs, ")");
return;
}
// Get the vector mask.
auto *mask_op = static_cast<RV32VectorSourceOperand *>(inst->Source(2));
auto mask_span = mask_op->GetRegister(0)->data_buffer()->Get<uint8_t>();
// Get the vector start element index and compute where to start
// the operation.
int vector_index = rv_vector->vstart();
int start_reg = vector_index / elements_per_vector;
int item_index = vector_index % elements_per_vector;
// Determine if it's vector-vector or vector-scalar.
bool vector_scalar = inst->Source(1)->shape()[0] == 1;
// Iterate over the number of registers to write.
bool exception = false;
for (int reg = start_reg;
!exception && (reg < max_regs) && (vector_index < num_elements); reg++) {
// Allocate data buffer for the new register data.
auto *dest_db = dest_op->CopyDataBuffer(reg);
auto dest_span = dest_db->Get<Vd>();
// Write data into register subject to masking.
int element_count = std::min(elements_per_vector, num_elements);
for (int i = item_index;
(i < element_count) && (vector_index < num_elements); i++) {
// Get the mask value.
int mask_index = vector_index >> 3;
int mask_offset = vector_index & 0b111;
bool mask_value = ((mask_span[mask_index] >> mask_offset) & 0b1) != 0;
// Compute result.
Vs2 vs2 = GetInstructionSource<Vs2>(inst, 0, vector_index);
Vs1 vs1 = GetInstructionSource<Vs1>(inst, 1,
(vector_scalar ? 0 : vector_index));
auto value = op(vs2, vs1, mask_value);
if (value.has_value()) {
dest_span[i] = value.value();
} else if (mask_value) {
// If there is no value returned, but the mask_value is true, check
// to see if there was an exception.
if (rv_vector->vector_exception()) {
rv_vector->set_vstart(vector_index);
exception = true;
break;
}
}
vector_index++;
}
// Submit the destination db .
dest_db->Submit();
item_index = 0;
}
rv_vector->clear_vstart();
}
// This helper function handles the case of two source operand vector
// operations. It implements all the checking necessary for both widening and
// narrowing operations.
template <typename Vd, typename Vs2, typename Vs1>
void RiscVBinaryVectorOp(CheriotVectorState *rv_vector, const Instruction *inst,
std::function<Vd(Vs2, Vs1)> op) {
RiscVMaskBinaryVectorOp<Vd, Vs2, Vs1>(
rv_vector, inst,
[op](Vs2 vs2, Vs1 vs1, bool mask_value) -> std::optional<Vd> {
if (mask_value) {
return op(vs2, vs1);
}
return std::nullopt;
});
}
template <typename Vd, typename Vs2, typename Vs1>
void RiscVBinaryVectorOpWithFflags(
CheriotVectorState *rv_vector, const Instruction *inst,
std::function<std::tuple<Vd, uint32_t>(Vs2, Vs1)> op) {
if (rv_vector->vector_exception()) return;
int num_elements = rv_vector->vector_length();
int lmul = rv_vector->vector_length_multiplier();
int sew = rv_vector->selected_element_width();
int lmul_vd = lmul * sizeof(Vd) / sew;
int lmul_vs2 = lmul * sizeof(Vs2) / sew;
int lmul_vs1 = lmul * sizeof(Vs1) / sew;
if (lmul_vd > 64 || lmul_vs2 > 64 || lmul_vs1 > 64) {
rv_vector->set_vector_exception();
LOG(ERROR) << "Illegal lmul value";
return;
}
if (lmul_vd == 0 || lmul_vs2 == 0 || lmul_vs1 == 0) {
rv_vector->set_vector_exception();
LOG(ERROR) << "Illegal lmul_value";
return;
}
int elements_per_vector =
rv_vector->vector_register_byte_length() / sizeof(Vd);
int max_regs = (num_elements + elements_per_vector - 1) / elements_per_vector;
auto *dest_op =
static_cast<RV32VectorDestinationOperand *>(inst->Destination(0));
// Verify that there are enough registers in the destination operand.
if (dest_op->size() < max_regs) {
rv_vector->set_vector_exception();
LOG(ERROR) << absl::StrCat(
"Vector destination '", dest_op->AsString(), "' has fewer registers (",
dest_op->size(), ") than required by the operation (", max_regs, ")");
return;
}
// Get the vector mask.
auto *mask_op = static_cast<RV32VectorSourceOperand *>(inst->Source(2));
auto mask_span = mask_op->GetRegister(0)->data_buffer()->Get<uint8_t>();
// Get the vector start element index and compute where to start
// the operation.
int vector_index = rv_vector->vstart();
int start_reg = vector_index / elements_per_vector;
int item_index = vector_index % elements_per_vector;
// Determine if it's vector-vector or vector-scalar.
bool vector_scalar = inst->Source(1)->shape()[0] == 1;
// Iterate over the number of registers to write.
bool exception = false;
uint32_t fflags = 0;
for (int reg = start_reg;
!exception && (reg < max_regs) && (vector_index < num_elements); reg++) {
// Allocate data buffer for the new register data.
auto *dest_db = dest_op->CopyDataBuffer(reg);
auto dest_span = dest_db->Get<Vd>();
// Write data into register subject to masking.
int element_count = std::min(elements_per_vector, num_elements);
for (int i = item_index;
(i < element_count) && (vector_index < num_elements); i++) {
// Get the mask value.
int mask_index = vector_index >> 3;
int mask_offset = vector_index & 0b111;
bool mask_value = ((mask_span[mask_index] >> mask_offset) & 0b1) != 0;
// Compute result.
Vs2 vs2 = GetInstructionSource<Vs2>(inst, 0, vector_index);
Vs1 vs1 = GetInstructionSource<Vs1>(inst, 1,
(vector_scalar ? 0 : vector_index));
if (mask_value) {
auto [value, flag] = op(vs2, vs1);
dest_span[i] = value;
fflags |= flag;
if (rv_vector->vector_exception()) {
rv_vector->set_vstart(vector_index);
exception = true;
break;
}
}
vector_index++;
}
// Submit the destination dbs.
dest_db->Submit();
item_index = 0;
}
auto *flag_db = inst->Destination(1)->AllocateDataBuffer();
flag_db->Set<uint32_t>(0, fflags);
flag_db->Submit();
rv_vector->clear_vstart();
}
// This helper function handles three source operand vector operations. It
// implements all the checking necessary for both widening and narrowing
// operations.
template <typename Vd, typename Vs2, typename Vs1>
void RiscVTernaryVectorOp(CheriotVectorState *rv_vector,
const Instruction *inst,
std::function<Vd(Vs2, Vs1, Vd)> op) {
if (rv_vector->vector_exception()) return;
int num_elements = rv_vector->vector_length();
int lmul = rv_vector->vector_length_multiplier();
int sew = rv_vector->selected_element_width();
int lmul_vd = lmul * sizeof(Vd) / sew;
int lmul_vs2 = lmul * sizeof(Vs2) / sew;
int lmul_vs1 = lmul * sizeof(Vs1) / sew;
if (lmul_vd > 64 || lmul_vs2 > 64 || lmul_vs1 > 64) {
rv_vector->set_vector_exception();
LOG(ERROR) << "Illegal lmul value";
return;
}
if (lmul_vd == 0 || lmul_vs2 == 0 || lmul_vs1 == 0) {
rv_vector->set_vector_exception();
LOG(ERROR) << "Illegal lmul_value";
return;
}
int elements_per_vector =
rv_vector->vector_register_byte_length() / sizeof(Vd);
int max_regs = (num_elements + elements_per_vector - 1) / elements_per_vector;
auto *dest_op =
static_cast<RV32VectorDestinationOperand *>(inst->Destination(0));
// Verify that there are enough registers in the destination operand.
if (dest_op->size() < max_regs) {
rv_vector->set_vector_exception();
LOG(ERROR) << absl::StrCat(
"Vector destination '", dest_op->AsString(), "' has fewer registers (",
dest_op->size(), ") than required by the operation (", max_regs, ")");
return;
}
// Get the vector mask.
auto *mask_op = static_cast<RV32VectorSourceOperand *>(inst->Source(3));
auto mask_span = mask_op->GetRegister(0)->data_buffer()->Get<uint8_t>();
// Get the vector start element index and compute where to start
// the operation.
int vector_index = rv_vector->vstart();
int start_reg = vector_index / elements_per_vector;
int item_index = vector_index % elements_per_vector;
// Determine if it's vector-vector or vector-scalar.
bool vector_scalar = inst->Source(1)->shape()[0] == 1;
// Iterate over the number of registers to write.
for (int reg = start_reg; (reg < max_regs) && (vector_index < num_elements);
reg++) {
// Allocate data buffer for the new register data.
auto *dest_db = dest_op->CopyDataBuffer(reg);
auto dest_span = dest_db->Get<Vd>();
// Write data into register subject to masking.
int element_count = std::min(elements_per_vector, num_elements);
for (int i = item_index;
(i < element_count) && (vector_index < num_elements); i++) {
// Get the mask value.
int mask_index = vector_index >> 3;
int mask_offset = vector_index & 0b111;
bool mask_value = ((mask_span[mask_index] >> mask_offset) & 0b1) != 0;
// Compute result.
Vs2 vs2 = GetInstructionSource<Vs2>(inst, 0, vector_index);
Vs1 vs1 = GetInstructionSource<Vs1>(inst, 1,
(vector_scalar ? 0 : vector_index));
Vd vd = GetInstructionSource<Vd>(inst, 2, vector_index);
if (mask_value) {
dest_span[i] = op(vs2, vs1, vd);
}
vector_index++;
}
// Submit the destination db .
dest_db->Submit();
item_index = 0;
}
rv_vector->clear_vstart();
}
// The reduction instructions take Vs1[0], and all the elements (subject to
// masking) from Vs2 and apply the reduction operation to produce a single
// element that is written to Vd[0].
template <typename Vd, typename Vs2, typename Vs1>
void RiscVBinaryReductionVectorOp(CheriotVectorState *rv_vector,
const Instruction *inst,
std::function<Vd(Vd, Vs2)> op) {
if (rv_vector->vector_exception()) return;
if (rv_vector->vstart()) {
rv_vector->vector_exception();
return;
}
int sew = rv_vector->selected_element_width();
int lmul = rv_vector->vector_length_multiplier();
int lmul_vd = lmul * sizeof(Vd) / sew;
int lmul_vs2 = lmul * sizeof(Vs2) / sew;
int lmul_vs1 = lmul * sizeof(Vs1) / sew;
if (lmul_vd > 64 || lmul_vs2 > 64 || lmul_vs1 > 64) {
rv_vector->set_vector_exception();
LOG(ERROR) << "Illegal lmul value";
return;
}
if (lmul_vd == 0 || lmul_vs2 == 0 || lmul_vs1 == 0) {
rv_vector->set_vector_exception();
LOG(ERROR) << "Illegal lmul_value";
return;
}
int num_elements = rv_vector->vector_length();
// Get the vector mask.
auto *mask_op = static_cast<RV32VectorSourceOperand *>(inst->Source(2));
auto mask_span = mask_op->GetRegister(0)->data_buffer()->Get<uint8_t>();
Vd accumulator =
static_cast<Vd>(generic::GetInstructionSource<Vs1>(inst, 1, 0));
for (int i = 0; i < num_elements; i++) {
int mask_index = i >> 3;
int mask_offset = i & 0b111;
bool mask_value = (mask_span[mask_index] >> mask_offset) & 0b1;
if (mask_value) {
accumulator =
op(accumulator, generic::GetInstructionSource<Vs2>(inst, 0, i));
}
}
auto *dest_op =
static_cast<RV32VectorDestinationOperand *>(inst->Destination(0));
auto dest_db = dest_op->CopyDataBuffer();
dest_db->Set<Vd>(0, accumulator);
dest_db->Submit();
rv_vector->clear_vstart();
}
template <typename T>
T GetRoundingBit(int rounding_mode, T rounding_bits, int size) {
switch (rounding_mode) {
case 0: // Round-to-nearest-up (add +0.5 lsb)
if (size < 2) return 0;
return (rounding_bits >> (size - 2)) & 0b1;
case 1: { // Round-to-nearest-event
T v_d_minus_1 = (size < 2) ? 0 : (rounding_bits >> (size - 2)) & 0b1;
T v_d = (size == 0) ? 0 : (rounding_bits >> (size - 1)) & 0b1;
T v_d_minus_2_0 = (size < 3)
? 0
: (rounding_bits & ~(std::numeric_limits<T>::max()
<< (size - 2))) != 0;
return v_d_minus_1 & (v_d_minus_2_0 | v_d);
}
case 2: // Round-down (truncate).
return 0;
case 3: { // Round-to-odd.
T v_d_minus_1_0 = (size < 2)
? 0
: (rounding_bits & ~(std::numeric_limits<T>::max()
<< (size - 1))) != 0;
T v_d = (rounding_bits >> (size - 1)) & 0b1;
return (!v_d) & v_d_minus_1_0;
}
default:
LOG(ERROR) << "GetRoundingBit: Invalid value for rounding mode";
break;
}
return 0;
}
template <typename T>
T RoundOff(CheriotVectorState *rv_vector, T value, int size) {
auto rm = rv_vector->vxrm();
auto ret = (value >> size) + GetRoundingBit<T>(rm, value, size + 1);
return ret;
}
} // namespace cheriot
} // namespace sim
} // namespace mpact
#endif // THIRD_PARTY_MPACT_RISCV_RISCV_RISCV_VECTOR_INSTRUCTION_HELPERS_H_