From d960af8788235e9d8c35515bc70a661d38b7fbcc Mon Sep 17 00:00:00 2001 From: sharafat hussain Date: Fri, 9 Dec 2022 14:43:17 +0500 Subject: [PATCH 01/16] [hardware] Changes for single lane configuration support --- hardware/src/ara_sequencer.sv | 10 ++++-- hardware/src/lane/lane_sequencer.sv | 54 ++++++++++++++++++++--------- hardware/src/masku/masku.sv | 4 +-- hardware/src/sldu/sldu.sv | 36 ++++++++++++++----- hardware/src/vlsu/addrgen.sv | 48 ++++++++++++++++++------- hardware/src/vlsu/vldu.sv | 1 - 6 files changed, 110 insertions(+), 43 deletions(-) diff --git a/hardware/src/ara_sequencer.sv b/hardware/src/ara_sequencer.sv index 348c01107..b6997c57e 100644 --- a/hardware/src/ara_sequencer.sv +++ b/hardware/src/ara_sequencer.sv @@ -100,9 +100,13 @@ module ara_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i // This is because the instruction counters for ALU and MFPU refers // to lane 0. If lane 0 finishes before the other lanes, the counter // is not reflecting the real lane situations anymore. - for (genvar i = 0; i < NrVInsn; i++) begin : gen_stall_lane_desynch - assign stall_lanes_desynch_vec[i] = ~pe_vinsn_running_q[0][i] & |pe_vinsn_running_q_trns[i][NrLanes-1:1]; - end + if (NrLanes != 1) + for (genvar i = 0; i < NrVInsn; i++) begin : gen_stall_lane_desynch + assign stall_lanes_desynch_vec[i] = ~pe_vinsn_running_q[0][i] & |pe_vinsn_running_q_trns[i][NrLanes-1:1]; + end + else + assign stall_lanes_desynch_vec = '0; + assign stall_lanes_desynch = |stall_lanes_desynch_vec; ///////////////////////// diff --git a/hardware/src/lane/lane_sequencer.sv b/hardware/src/lane/lane_sequencer.sv index 35bb6b8ef..a10bc2e57 100644 --- a/hardware/src/lane/lane_sequencer.sv +++ b/hardware/src/lane/lane_sequencer.sv @@ -253,16 +253,33 @@ module lane_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg:: }; vfu_operation_valid_d = (vfu_operation_d.vfu != VFU_None) ? 1'b1 : 1'b0; - // Vector length calculation - vfu_operation_d.vl = pe_req.vl / NrLanes; - // If lane_id_i < vl % NrLanes, this lane has to execute one extra micro-operation. - if (lane_id_i < pe_req.vl[idx_width(NrLanes)-1:0]) vfu_operation_d.vl += 1; - - // Vector start calculation - vfu_operation_d.vstart = pe_req.vstart / NrLanes; - // If lane_id_i < vstart % NrLanes, this lane needs to execute one micro-operation less. - if (lane_id_i < pe_req.vstart[idx_width(NrLanes)-1:0]) vfu_operation_d.vstart -= 1; + if (NrLanes != 1) begin + // Vector length calculation + vfu_operation_d.vl = pe_req.vl / NrLanes; + // If lane_id_i < vl % NrLanes, this lane has to execute one extra micro-operation. + if (lane_id_i < pe_req.vl[idx_width(NrLanes)-1:0]) vfu_operation_d.vl += 1; + end + else begin + vfu_operation_d.vl = pe_req.vl; + end + // Mute request if the instruction runs in the lane and the vl is zero. + // During a reduction, all the lanes must cooperate anyway. + if (vfu_operation_d.vl == '0 && (vfu_operation_d.vfu inside {VFU_Alu, VFU_MFpu, VFU_MaskUnit}) && !(vfu_operation_d.op inside {[VREDSUM:VWREDSUM], [VFREDUSUM:VFWREDOSUM]})) begin + vfu_operation_valid_d = 1'b0; + // We are already done with this instruction + vinsn_done_d[pe_req.id] |= 1'b1; + vinsn_running_d[pe_req.id] = 1'b0; + end + if (NrLanes != 1) begin + // Vector start calculation + vfu_operation_d.vstart = pe_req.vstart / NrLanes; + // If lane_id_i < vstart % NrLanes, this lane needs to execute one micro-operation less. + if (lane_id_i < pe_req.vstart[idx_width(NrLanes)-1:0]) vfu_operation_d.vstart -= 1; + end + else begin + vfu_operation_d.vstart = pe_req.vstart; + end // Mark the vector instruction as running vinsn_running_d[pe_req.id] = (vfu_operation_d.vfu != VFU_None) ? 1'b1 : 1'b0; @@ -557,14 +574,17 @@ module lane_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg:: // we must request it as well from the VRF // Find the number of extra elements to ask, related to the stride - unique case (pe_req.eew_vs2) - EW8 : extra_stride = pe_req.stride[$clog2(8*NrLanes)-1:0]; - EW16: extra_stride = {1'b0, pe_req.stride[$clog2(4*NrLanes)-1:0]}; - EW32: extra_stride = {2'b0, pe_req.stride[$clog2(2*NrLanes)-1:0]}; - EW64: extra_stride = {3'b0, pe_req.stride[$clog2(1*NrLanes)-1:0]}; - default: - extra_stride = {3'b0, pe_req.stride[$clog2(1*NrLanes)-1:0]}; - endcase + if (NrLanes != 1) + unique case (pe_req.eew_vs2) + EW8 : extra_stride = pe_req.stride[idx_width(8*NrLanes)-1:0]; + EW16: extra_stride = {1'b0, pe_req.stride[idx_width(4*NrLanes)-1:0]}; + EW32: extra_stride = {2'b0, pe_req.stride[idx_width(2*NrLanes)-1:0]}; + EW64: extra_stride = {3'b0, pe_req.stride[idx_width(1*NrLanes)-1:0]}; + default: + extra_stride = {3'b0, pe_req.stride[idx_width(1*NrLanes)-1:0]}; + endcase + else + extra_stride = '0; // Find the total number of elements to be asked vl_tot = pe_req.vl; diff --git a/hardware/src/masku/masku.sv b/hardware/src/masku/masku.sv index afea302f6..30c564ecd 100644 --- a/hardware/src/masku/masku.sv +++ b/hardware/src/masku/masku.sv @@ -1094,8 +1094,8 @@ module masku import ara_pkg::*; import rvv_pkg::*; #( mask_pnt_d = (vlen_t'(trimmed_stride) >> $clog2(NrLanes << 1)) << $clog2(NrLanes << 1); end EW64: begin - read_cnt_d -= (vlen_t'(trimmed_stride) >> $clog2(NrLanes)) << $clog2(NrLanes); - mask_pnt_d = (vlen_t'(trimmed_stride) >> $clog2(NrLanes)) << $clog2(NrLanes); + read_cnt_d -= (vlen_t'(trimmed_stride) >> idx_width(NrLanes)) << idx_width(NrLanes); + mask_pnt_d = (vlen_t'(trimmed_stride) >> idx_width(NrLanes)) << idx_width(NrLanes); end default:; endcase diff --git a/hardware/src/sldu/sldu.sv b/hardware/src/sldu/sldu.sv index 9c06c3ac5..c9667b016 100644 --- a/hardware/src/sldu/sldu.sv +++ b/hardware/src/sldu/sldu.sv @@ -269,8 +269,11 @@ module sldu import ara_pkg::*; import rvv_pkg::*; #( vrf_pnt_d = vinsn_issue_q.stride >> $clog2(8*NrLanes); // Go to SLIDE_RUN_VSLIDE1UP_FIRST_WORD if this is a vslide1up instruction - if (vinsn_issue_q.use_scalar_op) + if (vinsn_issue_q.use_scalar_op) begin + // vslide1up always write scalar operand at the start of destination. + vrf_pnt_d = '0; state_d = SLIDE_RUN_VSLIDE1UP_FIRST_WORD; + end end VSLIDEDOWN: begin // vslidedown starts reading the source operand from the slide offset @@ -282,8 +285,15 @@ module sldu import ara_pkg::*; import rvv_pkg::*; #( issue_cnt_d = vinsn_issue_q.vl << int'(vinsn_issue_q.vtype.vsew); // Trim the last element of vslide1down, which does not come from the VRF - if (vinsn_issue_q.use_scalar_op) - issue_cnt_d -= 1 << int'(vinsn_issue_q.vtype.vsew); + if (NrLanes == 1) begin + if (vinsn_issue_q.vtype.vsew != EW64) begin + if (vinsn_issue_q.use_scalar_op ) + issue_cnt_d -= 1 << int'(vinsn_issue_q.vtype.vsew); + end + end else begin + if (vinsn_issue_q.use_scalar_op ) + issue_cnt_d -= 1 << int'(vinsn_issue_q.vtype.vsew); + end end // Ordered sum reductions VFREDOSUM, VFWREDOSUM: begin @@ -334,9 +344,9 @@ module sldu import ara_pkg::*; import rvv_pkg::*; #( // Is this a valid byte? (Allow wrap-up only with reductions!) if (b < issue_cnt_q && in_seq_byte < NrLanes * 8 && (vinsn_issue_q.vfu inside {VFU_Alu, VFU_MFpu} || out_seq_byte < NrLanes * 8)) begin // At which lane, and what is the offset in that lane, are the input and output bytes? - automatic int src_lane = in_byte[3 +: $clog2(NrLanes)]; + automatic int src_lane = NrLanes == 1 ? 0 : in_byte[3 +: idx_width(NrLanes)]; automatic int src_lane_offset = in_byte[2:0]; - automatic int tgt_lane = out_byte[3 +: $clog2(NrLanes)]; + automatic int tgt_lane = NrLanes == 1 ? 0 : out_byte[3 +: idx_width(NrLanes)]; automatic int tgt_lane_offset = out_byte[2:0]; result_queue_d[result_queue_write_pnt_q][tgt_lane].wdata[8*tgt_lane_offset +: 8] = @@ -356,7 +366,12 @@ module sldu import ara_pkg::*; import rvv_pkg::*; #( // Bump pointers (reductions always finish in one shot) in_pnt_d = vinsn_issue_q.vfu inside {VFU_Alu, VFU_MFpu} ? NrLanes * 8 : in_pnt_q + byte_count; out_pnt_d = vinsn_issue_q.vfu inside {VFU_Alu, VFU_MFpu} ? NrLanes * 8 : out_pnt_q + byte_count; - issue_cnt_d = vinsn_issue_q.vfu inside {VFU_Alu, VFU_MFpu} ? issue_cnt_q - (NrLanes * 8) : issue_cnt_q - byte_count; + if (NrLanes == 1) begin + if (!(state_q == SLIDE_RUN_VSLIDE1UP_FIRST_WORD && (vinsn_issue_q.vtype.vsew == EW64))) + issue_cnt_d = vinsn_issue_q.vfu inside {VFU_Alu, VFU_MFpu} ? issue_cnt_q - (NrLanes * 8) : issue_cnt_q - byte_count; + end + else + issue_cnt_d = vinsn_issue_q.vfu inside {VFU_Alu, VFU_MFpu} ? issue_cnt_q - (NrLanes * 8) : issue_cnt_q - byte_count; // Jump to SLIDE_RUN state_d = SLIDE_RUN; @@ -393,7 +408,12 @@ module sldu import ara_pkg::*; import rvv_pkg::*; #( if (in_pnt_d == NrLanes * 8 || issue_cnt_q <= byte_count) begin // Reset the pointer and ask for a new operand in_pnt_d = '0; - sldu_operand_ready_o = '1; + if (NrLanes == 1) begin + if (state_q != SLIDE_RUN_VSLIDE1UP_FIRST_WORD) + sldu_operand_ready_o = '1; + end + else + sldu_operand_ready_o = '1; // Left-rotate the logarithmic counter. Hacky way to write it, but it's to // deal with the 2-lanes design without complaints from Verilator... // wide signal to please the tool @@ -432,7 +452,7 @@ module sldu import ara_pkg::*; import rvv_pkg::*; #( // Copy the scalar operand to the last word automatic int out_seq_byte = issue_cnt_q; automatic int out_byte = shuffle_index(out_seq_byte, NrLanes, vinsn_issue_q.vtype.vsew); - automatic int tgt_lane = out_byte[3 +: $clog2(NrLanes)]; + automatic int tgt_lane = NrLanes == 1 ? 0 : out_byte[3 +: idx_width(NrLanes)]; automatic int tgt_lane_offset = out_byte[2:0]; unique case (vinsn_issue_q.vtype.vsew) diff --git a/hardware/src/vlsu/addrgen.sv b/hardware/src/vlsu/addrgen.sv index def21df8e..c25228626 100644 --- a/hardware/src/vlsu/addrgen.sv +++ b/hardware/src/vlsu/addrgen.sv @@ -655,12 +655,24 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #( // Strided access // ///////////////////// + automatic int unsigned strided_length; + automatic int unsigned eff_stride_size; + + // default length is 0, when AXIDataWidth >= (8 << sew) + strided_length = 0; + eff_stride_size = axi_addrgen_q.vew; + + if ( AxiDataWidth < (8 << axi_addrgen_q.vew)) begin + strided_length = (8 << axi_addrgen_q.vew) / AxiDataWidth - 1; + eff_stride_size = $clog2(AxiDataWidth/8); + end + // AR Channel if (axi_addrgen_q.is_load) begin axi_ar_o = '{ addr : axi_addrgen_q.addr, - len : 0, - size : axi_addrgen_q.vew, + len : strided_length, + size : eff_stride_size, cache : CACHE_MODIFIABLE, burst : BURST_INCR, default: '0 @@ -671,8 +683,8 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #( else begin axi_aw_o = '{ addr : axi_addrgen_q.addr, - len : 0, - size : axi_addrgen_q.vew, + len : strided_length, + size : eff_stride_size, cache : CACHE_MODIFIABLE, burst : BURST_INCR, default: '0 @@ -683,8 +695,8 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #( // Send this request to the load/store units axi_addrgen_queue = '{ addr : axi_addrgen_q.addr, - size : axi_addrgen_q.vew, - len : 0, + size : eff_stride_size, + len : strided_length, is_load: axi_addrgen_q.is_load }; axi_addrgen_queue_push = 1'b1; @@ -705,6 +717,18 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #( // Indexed access // ////////////////////// + automatic int unsigned indexed_length; + automatic int unsigned eff_index_size; + + // default length is 0, when AXIDataWidth >= (8 << sew) + indexed_length = 0; + eff_index_size = axi_addrgen_q.vew; + + if ( AxiDataWidth < (8 << axi_addrgen_q.vew)) begin + indexed_length = (8 << axi_addrgen_q.vew) / AxiDataWidth - 1; + eff_index_size = $clog2(AxiDataWidth/8); + end + if (idx_addr_valid_q) begin // We consumed a word idx_addr_ready_d = 1'b1; @@ -713,8 +737,8 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #( if (axi_addrgen_q.is_load) begin axi_ar_o = '{ addr : idx_final_addr_q, - len : 0, - size : axi_addrgen_q.vew, + len : indexed_length, + size : eff_index_size, cache : CACHE_MODIFIABLE, burst : BURST_INCR, default: '0 @@ -725,8 +749,8 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #( else begin axi_aw_o = '{ addr : idx_final_addr_q, - len : 0, - size : axi_addrgen_q.vew, + len : indexed_length, + size : eff_index_size, cache : CACHE_MODIFIABLE, burst : BURST_INCR, default: '0 @@ -737,8 +761,8 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #( // Send this request to the load/store units axi_addrgen_queue = '{ addr : idx_final_addr_q, - size : axi_addrgen_q.vew, - len : 0, + size : eff_index_size, + len : indexed_length, is_load: axi_addrgen_q.is_load }; axi_addrgen_queue_push = 1'b1; diff --git a/hardware/src/vlsu/vldu.sv b/hardware/src/vlsu/vldu.sv index 51042ed8e..f029aa9ad 100644 --- a/hardware/src/vlsu/vldu.sv +++ b/hardware/src/vlsu/vldu.sv @@ -255,7 +255,6 @@ module vldu import ara_pkg::*; import rvv_pkg::*; #( automatic logic [idx_width(DataWidth*NrLanes/8):0] valid_bytes; valid_bytes = issue_cnt_q < NrLanes * 8 ? vinsn_valid_bytes : vrf_valid_bytes; valid_bytes = valid_bytes < axi_valid_bytes ? valid_bytes : axi_valid_bytes; - r_pnt_d = r_pnt_q + valid_bytes; vrf_pnt_d = vrf_pnt_q + valid_bytes; From cc09801aa0166c59df2074e8a4263923652c52f9 Mon Sep 17 00:00:00 2001 From: sharafat hussain Date: Tue, 13 Dec 2022 01:25:12 +0500 Subject: [PATCH 02/16] [hardware] hardware change for single lane. --- hardware/src/masku/masku.sv | 18 +++++++++--------- hardware/src/vlsu/addrgen.sv | 14 ++++++++++---- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/hardware/src/masku/masku.sv b/hardware/src/masku/masku.sv index 30c564ecd..4859edebd 100644 --- a/hardware/src/masku/masku.sv +++ b/hardware/src/masku/masku.sv @@ -80,7 +80,7 @@ module masku import ara_pkg::*; import rvv_pkg::*; #( end: gen_masku_operand_ready assign masku_operand_b_i[lane] = masku_operand_i[lane][1]; - assign masku_operand_b_valid_i[lane] = (vinsn_issue.op inside {[VMSBF:VID]}) ? '1 : masku_operand_valid_i[lane][1]; + assign masku_operand_b_valid_i[lane] = /*(vinsn_issue.op inside {[VMSBF:VID]}) ? '1 : */masku_operand_valid_i[lane][1]; assign masku_operand_ready_o[lane][1] = masku_operand_b_ready_o[lane]; assign masku_operand_m_i[lane] = masku_operand_i[lane][0]; @@ -297,7 +297,7 @@ module masku import ara_pkg::*; import rvv_pkg::*; #( logic [$clog2(DataWidth*NrLanes):0] popcount; logic [$clog2(VLEN):0] popcount_d, popcount_q; logic [$clog2(DataWidth*NrLanes)-1:0] vfirst_count; - logic [$clog2(VLEN)-1:0] vfirst_count_d, vfirst_count_q; + logic [$clog2(VLEN):0] vfirst_count_d, vfirst_count_q; logic vfirst_empty; // Pointers @@ -562,32 +562,32 @@ module masku import ara_pkg::*; import rvv_pkg::*; #( unique case (vinsn_issue.vtype.vsew) EW8 : begin for (int index = 1; index < (NrLanes*DataWidth)/8; index++) begin - alu_result_vm [(index*8) +: 7] = (((NrLanes * DataWidth)/8) >= vinsn_issue.vl) ? index : index-(((vinsn_issue.vl/((NrLanes * DataWidth)/8))-iteration_count_d)*32); + alu_result_vm [(index*8) +: 7] = (((NrLanes * DataWidth)/8) <= vinsn_issue.vl) ? index : index-(((vinsn_issue.vl/((NrLanes * DataWidth)/8))-iteration_count_d)*32); alu_result_vm_m = alu_result_vm & mask; end end EW16: begin for (int index = 1; index < (NrLanes*DataWidth)/16; index++) begin - alu_result_vm [(index*16) +: 15] = (((NrLanes * DataWidth)/8) >= vinsn_issue.vl) ? index : index-(((vinsn_issue.vl/((NrLanes * DataWidth)/8))-iteration_count_d)*16); + alu_result_vm [(index*16) +: 15] = (((NrLanes * DataWidth)/8) <= vinsn_issue.vl) ? index : index-(((vinsn_issue.vl/((NrLanes * DataWidth)/8))-iteration_count_d)*16); alu_result_vm_m = alu_result_vm & mask; end end EW32: begin for (int index = 1; index < (NrLanes*DataWidth)/32; index++) begin - alu_result_vm [(index*32) +: 31] = (((NrLanes * DataWidth)/8) >= vinsn_issue.vl) ? index : index-(((vinsn_issue.vl/((NrLanes * DataWidth)/8))-iteration_count_d)*8); + alu_result_vm [(index*32) +: 31] = (((NrLanes * DataWidth)/8) <= vinsn_issue.vl) ? index : index-(((vinsn_issue.vl/((NrLanes * DataWidth)/8))-iteration_count_d)*8); alu_result_vm_m = alu_result_vm & mask; end end EW64: begin for (int index = 1; index < (NrLanes*DataWidth)/64; index++) begin - alu_result_vm [(index*64) +: 63] = (((NrLanes * DataWidth)/8) >= vinsn_issue.vl) ? index : index-(((vinsn_issue.vl/((NrLanes * DataWidth)/8))-iteration_count_d)*4); + alu_result_vm [(index*64) +: 63] = (((NrLanes * DataWidth)/8) <= vinsn_issue.vl) ? index : index-(((vinsn_issue.vl/((NrLanes * DataWidth)/8))-iteration_count_d)*4); alu_result_vm_m = alu_result_vm & mask; end end endcase end end - [VCPOP:VFIRST] : begin + VCPOP, VFIRST : begin vcpop_operand = (!vinsn_issue.vm) ? masku_operand_a_i & bit_enable_mask : masku_operand_a_i; end default: begin @@ -776,12 +776,12 @@ module masku import ara_pkg::*; import rvv_pkg::*; #( // Adding the popcount and vfirst_count from all streams of operands if (|masku_operand_a_valid_i) begin popcount_d = popcount_q + popcount; - vfirst_count_d = vfirst_count_q + vfirst_count; + vfirst_count_d = (|vfirst_count_d) ? vfirst_count_q + 0 : (vfirst_empty) ? -1 : vfirst_count_q + vfirst_count; end // if this is the last beat, commit the result to the scalar_result queue if (iteration_count_d >= (((8 << vinsn_issue.vtype.vsew)*vinsn_issue.vl)/(DataWidth*NrLanes))) begin - result_scalar_d = (vinsn_issue.op == VCPOP) ? popcount_d : (vfirst_empty) ? -1 : vfirst_count_d; + result_scalar_d = (vinsn_issue.op == VCPOP) ? popcount_d : vfirst_count_d; result_scalar_valid_d = '1; // Decrement the commit counter by the entire number of elements, diff --git a/hardware/src/vlsu/addrgen.sv b/hardware/src/vlsu/addrgen.sv index c25228626..94e76553a 100644 --- a/hardware/src/vlsu/addrgen.sv +++ b/hardware/src/vlsu/addrgen.sv @@ -198,9 +198,13 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #( end // Extract only 1/NrLanes of the word + if (NrLanes != 1) begin for (int unsigned lane = 0; lane < NrLanes; lane++) if (lane == word_lane_ptr_q) reduced_word = deshuffled_word[word_lane_ptr_q*$bits(elen_t) +: $bits(elen_t)]; + end else begin + reduced_word = deshuffled_word[word_lane_ptr_q*$bits(elen_t) +: $bits(elen_t)]; + end idx_addr = reduced_word; case (state_q) @@ -314,7 +318,6 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #( // Compose the address idx_final_addr_d = pe_req_q.scalar_op + idx_addr; - // When the data is accepted if (idx_addr_ready_q) begin // Consumed one element @@ -324,9 +327,12 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #( // Bump lane pointer elm_ptr_d = '0; word_lane_ptr_d += 1; - if (word_lane_ptr_q == NrLanes - 1) - // Ready for the next full word - addrgen_operand_ready_o = 1'b1; + if (NrLanes == 1) + if (word_lane_ptr_q == NrLanes - 1) + // Ready for the next full word + addrgen_operand_ready_o = 1'b1; + else + addrgen_operand_ready_o = 1'b1; end else begin // Bump element pointer elm_ptr_d += 1; From ef1a1e497fc38f4ccc4762dc1e7781a14d4eb257 Mon Sep 17 00:00:00 2001 From: sharafat hussain Date: Tue, 13 Dec 2022 18:08:06 +0500 Subject: [PATCH 03/16] [hardware] Update hardware for single lane. --- hardware/src/lane/valu.sv | 12 ++++++++---- hardware/src/masku/masku.sv | 6 +++--- hardware/src/vlsu/addrgen.sv | 2 +- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/hardware/src/lane/valu.sv b/hardware/src/lane/valu.sv index 0222f72e2..8b39f3bb9 100644 --- a/hardware/src/lane/valu.sv +++ b/hardware/src/lane/valu.sv @@ -376,6 +376,7 @@ module valu import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::idx_width; elen_t valu_result; logic valu_valid; vxsat_t alu_vxsat, alu_vxsat_q, alu_vxsat_d; + vxsat_t alu_op_vxsat_q, alu_op_vxsat_d; assign alu_vxsat_d = alu_vxsat; @@ -407,9 +408,10 @@ module valu import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::idx_width; always_comb begin: p_valu // Maintain state - vinsn_queue_d = vinsn_queue_q; - issue_cnt_d = issue_cnt_q; - commit_cnt_d = commit_cnt_q; + vinsn_queue_d = vinsn_queue_q; + issue_cnt_d = issue_cnt_q; + commit_cnt_d = commit_cnt_q; + alu_op_vxsat_d = (vinsn_commit_valid) ? (|alu_vxsat_q) ? alu_vxsat_q : alu_op_vxsat_q : '0; result_queue_d = result_queue_q; result_queue_valid_d = result_queue_valid_q; @@ -747,7 +749,7 @@ module valu import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::idx_width; // alu saturation calculation if (|result_queue_valid_q) - vxsat_flag_o = |(alu_vxsat_q & result_queue_q[result_queue_read_pnt_q].be); + vxsat_flag_o = |(alu_op_vxsat_d & result_queue_q[result_queue_read_pnt_q].be); // Received a grant from the VRF. // Deactivate the request. @@ -877,6 +879,7 @@ module valu import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::idx_width; simd_red_cnt_max_q <= '0; alu_red_ready_q <= 1'b0; alu_vxsat_q <= '0; + alu_op_vxsat_q <= '0; end else begin issue_cnt_q <= issue_cnt_d; commit_cnt_q <= commit_cnt_d; @@ -890,6 +893,7 @@ module valu import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::idx_width; simd_red_cnt_max_q <= simd_red_cnt_max_d; alu_red_ready_q <= alu_red_ready_i; alu_vxsat_q <= alu_vxsat_d; + alu_op_vxsat_q <= alu_op_vxsat_d; end end diff --git a/hardware/src/masku/masku.sv b/hardware/src/masku/masku.sv index 4859edebd..c5477de93 100644 --- a/hardware/src/masku/masku.sv +++ b/hardware/src/masku/masku.sv @@ -80,7 +80,7 @@ module masku import ara_pkg::*; import rvv_pkg::*; #( end: gen_masku_operand_ready assign masku_operand_b_i[lane] = masku_operand_i[lane][1]; - assign masku_operand_b_valid_i[lane] = /*(vinsn_issue.op inside {[VMSBF:VID]}) ? '1 : */masku_operand_valid_i[lane][1]; + assign masku_operand_b_valid_i[lane] = (vinsn_issue.op inside {[VMSBF:VID]}) ? '1 : masku_operand_valid_i[lane][1]; assign masku_operand_ready_o[lane][1] = masku_operand_b_ready_o[lane]; assign masku_operand_m_i[lane] = masku_operand_i[lane][0]; @@ -297,7 +297,7 @@ module masku import ara_pkg::*; import rvv_pkg::*; #( logic [$clog2(DataWidth*NrLanes):0] popcount; logic [$clog2(VLEN):0] popcount_d, popcount_q; logic [$clog2(DataWidth*NrLanes)-1:0] vfirst_count; - logic [$clog2(VLEN):0] vfirst_count_d, vfirst_count_q; + logic [$clog2(VLEN)-1:0] vfirst_count_d, vfirst_count_q; logic vfirst_empty; // Pointers @@ -587,7 +587,7 @@ module masku import ara_pkg::*; import rvv_pkg::*; #( endcase end end - VCPOP, VFIRST : begin + [VCPOP:VFIRST] : begin vcpop_operand = (!vinsn_issue.vm) ? masku_operand_a_i & bit_enable_mask : masku_operand_a_i; end default: begin diff --git a/hardware/src/vlsu/addrgen.sv b/hardware/src/vlsu/addrgen.sv index 94e76553a..de7129d05 100644 --- a/hardware/src/vlsu/addrgen.sv +++ b/hardware/src/vlsu/addrgen.sv @@ -821,4 +821,4 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #( end end -endmodule : addrgen +endmodule : addrgen \ No newline at end of file From d43e6174fc01cc8e45ab3eb503801c7eb2ca73df Mon Sep 17 00:00:00 2001 From: sharafat hussain Date: Tue, 13 Dec 2022 19:26:46 +0500 Subject: [PATCH 04/16] [hardware] updated hardware for vfirst single lane --- hardware/src/masku/masku.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hardware/src/masku/masku.sv b/hardware/src/masku/masku.sv index c5477de93..d70210222 100644 --- a/hardware/src/masku/masku.sv +++ b/hardware/src/masku/masku.sv @@ -781,7 +781,7 @@ module masku import ara_pkg::*; import rvv_pkg::*; #( // if this is the last beat, commit the result to the scalar_result queue if (iteration_count_d >= (((8 << vinsn_issue.vtype.vsew)*vinsn_issue.vl)/(DataWidth*NrLanes))) begin - result_scalar_d = (vinsn_issue.op == VCPOP) ? popcount_d : vfirst_count_d; + result_scalar_d = (vinsn_issue.op == VCPOP) ? popcount_d : (vfirst_empty) ? -1 : vfirst_count_d; result_scalar_valid_d = '1; // Decrement the commit counter by the entire number of elements, From c042f1b37c0c3ddb86c3be80b55553581c737012 Mon Sep 17 00:00:00 2001 From: sharafat hussain Date: Tue, 13 Dec 2022 20:48:54 +0500 Subject: [PATCH 05/16] [hardware] Handle single lane case in addrgen module --- hardware/src/vlsu/addrgen.sv | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/hardware/src/vlsu/addrgen.sv b/hardware/src/vlsu/addrgen.sv index de7129d05..6944a808d 100644 --- a/hardware/src/vlsu/addrgen.sv +++ b/hardware/src/vlsu/addrgen.sv @@ -667,11 +667,11 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #( // default length is 0, when AXIDataWidth >= (8 << sew) strided_length = 0; eff_stride_size = axi_addrgen_q.vew; - - if ( AxiDataWidth < (8 << axi_addrgen_q.vew)) begin - strided_length = (8 << axi_addrgen_q.vew) / AxiDataWidth - 1; - eff_stride_size = $clog2(AxiDataWidth/8); - end + if (NrLanes == 1) + if ( AxiDataWidth < (8 << axi_addrgen_q.vew)) begin + strided_length = (8 << axi_addrgen_q.vew) / AxiDataWidth - 1; + eff_stride_size = $clog2(AxiDataWidth/8); + end // AR Channel if (axi_addrgen_q.is_load) begin @@ -729,11 +729,11 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #( // default length is 0, when AXIDataWidth >= (8 << sew) indexed_length = 0; eff_index_size = axi_addrgen_q.vew; - - if ( AxiDataWidth < (8 << axi_addrgen_q.vew)) begin - indexed_length = (8 << axi_addrgen_q.vew) / AxiDataWidth - 1; - eff_index_size = $clog2(AxiDataWidth/8); - end + if (NrLanes == 1) + if ( AxiDataWidth < (8 << axi_addrgen_q.vew)) begin + indexed_length = (8 << axi_addrgen_q.vew) / AxiDataWidth - 1; + eff_index_size = $clog2(AxiDataWidth/8); + end if (idx_addr_valid_q) begin // We consumed a word From 75c1b9791d2cbeaf7c0913123807e568a3a4a10a Mon Sep 17 00:00:00 2001 From: M-Ijaz-10x Date: Tue, 13 Dec 2022 21:48:54 +0500 Subject: [PATCH 06/16] [hardware] :bug: fix issue with vfirst hardware --- hardware/src/masku/masku.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hardware/src/masku/masku.sv b/hardware/src/masku/masku.sv index d70210222..cbbf07ce5 100644 --- a/hardware/src/masku/masku.sv +++ b/hardware/src/masku/masku.sv @@ -781,7 +781,7 @@ module masku import ara_pkg::*; import rvv_pkg::*; #( // if this is the last beat, commit the result to the scalar_result queue if (iteration_count_d >= (((8 << vinsn_issue.vtype.vsew)*vinsn_issue.vl)/(DataWidth*NrLanes))) begin - result_scalar_d = (vinsn_issue.op == VCPOP) ? popcount_d : (vfirst_empty) ? -1 : vfirst_count_d; + result_scalar_d = (vinsn_issue.op == VCPOP) ? popcount_d : (&vfirst_count_d) ? -1 : vfirst_count_d; result_scalar_valid_d = '1; // Decrement the commit counter by the entire number of elements, From 5502484f14595b6f045ef6a016c2d084e1c41d6a Mon Sep 17 00:00:00 2001 From: sharafat hussain Date: Thu, 15 Dec 2022 16:44:06 +0500 Subject: [PATCH 07/16] [hardware] Fixed vluxei, vsuxei --- hardware/src/vlsu/addrgen.sv | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hardware/src/vlsu/addrgen.sv b/hardware/src/vlsu/addrgen.sv index 6944a808d..ebb0d18e5 100644 --- a/hardware/src/vlsu/addrgen.sv +++ b/hardware/src/vlsu/addrgen.sv @@ -327,11 +327,11 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #( // Bump lane pointer elm_ptr_d = '0; word_lane_ptr_d += 1; - if (NrLanes == 1) + if (NrLanes != 1) begin if (word_lane_ptr_q == NrLanes - 1) // Ready for the next full word addrgen_operand_ready_o = 1'b1; - else + end else addrgen_operand_ready_o = 1'b1; end else begin // Bump element pointer @@ -821,4 +821,4 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #( end end -endmodule : addrgen \ No newline at end of file +endmodule : addrgen From 074f214bf0486897e90ac0fd5a2399cb41327711 Mon Sep 17 00:00:00 2001 From: sharafat hussain Date: Mon, 19 Dec 2022 17:39:17 +0500 Subject: [PATCH 08/16] [hardware] vid fixed --- hardware/src/masku/masku.sv | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hardware/src/masku/masku.sv b/hardware/src/masku/masku.sv index cbbf07ce5..27c574850 100644 --- a/hardware/src/masku/masku.sv +++ b/hardware/src/masku/masku.sv @@ -562,25 +562,25 @@ module masku import ara_pkg::*; import rvv_pkg::*; #( unique case (vinsn_issue.vtype.vsew) EW8 : begin for (int index = 1; index < (NrLanes*DataWidth)/8; index++) begin - alu_result_vm [(index*8) +: 7] = (((NrLanes * DataWidth)/8) <= vinsn_issue.vl) ? index : index-(((vinsn_issue.vl/((NrLanes * DataWidth)/8))-iteration_count_d)*32); + alu_result_vm [(index*8) +: 7] = (iteration_count_d <= 1)/*(((NrLanes * DataWidth)/8) <= vinsn_issue.vl)*/ ? index : index-(((vinsn_issue.vl/((NrLanes * DataWidth)/8))-iteration_count_d)*32); alu_result_vm_m = alu_result_vm & mask; end end EW16: begin for (int index = 1; index < (NrLanes*DataWidth)/16; index++) begin - alu_result_vm [(index*16) +: 15] = (((NrLanes * DataWidth)/8) <= vinsn_issue.vl) ? index : index-(((vinsn_issue.vl/((NrLanes * DataWidth)/8))-iteration_count_d)*16); + alu_result_vm [(index*16) +: 15] = (iteration_count_d <= 1)/*(((NrLanes * DataWidth)/8) <= vinsn_issue.vl)*/ ? index : index-(((vinsn_issue.vl/((NrLanes * DataWidth)/8))-iteration_count_d)*16); alu_result_vm_m = alu_result_vm & mask; end end EW32: begin for (int index = 1; index < (NrLanes*DataWidth)/32; index++) begin - alu_result_vm [(index*32) +: 31] = (((NrLanes * DataWidth)/8) <= vinsn_issue.vl) ? index : index-(((vinsn_issue.vl/((NrLanes * DataWidth)/8))-iteration_count_d)*8); + alu_result_vm [(index*32) +: 31] = (iteration_count_d <= 1)/*(((NrLanes * DataWidth)/8) <= vinsn_issue.vl)*/ ? index : index-(((vinsn_issue.vl/((NrLanes * DataWidth)/8))-iteration_count_d)*8); alu_result_vm_m = alu_result_vm & mask; end end EW64: begin for (int index = 1; index < (NrLanes*DataWidth)/64; index++) begin - alu_result_vm [(index*64) +: 63] = (((NrLanes * DataWidth)/8) <= vinsn_issue.vl) ? index : index-(((vinsn_issue.vl/((NrLanes * DataWidth)/8))-iteration_count_d)*4); + alu_result_vm [(index*64) +: 63] = (iteration_count_d <= 1)/*(((NrLanes * DataWidth)/8) <= vinsn_issue.vl)*/ ? index : index-(((vinsn_issue.vl/((NrLanes * DataWidth)/8))-iteration_count_d)*4); alu_result_vm_m = alu_result_vm & mask; end end From e0c7f01ced05fbd9f78a64bb900296dcf76e108e Mon Sep 17 00:00:00 2001 From: sharafat hussain Date: Wed, 2 Nov 2022 12:24:48 +0500 Subject: [PATCH 09/16] [config] Add the 1_lane configuration --- config/1_lane.mk | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 config/1_lane.mk diff --git a/config/1_lane.mk b/config/1_lane.mk new file mode 100644 index 000000000..1df56e541 --- /dev/null +++ b/config/1_lane.mk @@ -0,0 +1,25 @@ +# Copyright 2020 ETH Zurich and University of Bologna. +# +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Author: Samuel Riedel, ETH Zurich +# Matheus Cavalcante, ETH Zurich + +# Number of vector lanes +nr_lanes ?= 1 + +# Length of each vector register (in bits) +# Constraints: VLEN >= 512 +vlen ?= 4096 \ No newline at end of file From 70dfdd9244f6b966c857be5f7fc706ef99cbc959 Mon Sep 17 00:00:00 2001 From: sharafat hussain Date: Wed, 2 Nov 2022 12:32:20 +0500 Subject: [PATCH 10/16] [config] Update constraint on VLEN(8*64*NrLanes) for all configration --- config/16_lanes.mk | 4 ++-- config/2_lanes.mk | 4 ++-- config/4_lanes.mk | 2 +- config/8_lanes.mk | 4 ++-- config/README.md | 1 + 5 files changed, 8 insertions(+), 7 deletions(-) diff --git a/config/16_lanes.mk b/config/16_lanes.mk index 5f6acc1f3..b7cc760e0 100644 --- a/config/16_lanes.mk +++ b/config/16_lanes.mk @@ -21,5 +21,5 @@ nr_lanes ?= 16 # Length of each vector register (in bits) -# Constraints: VLEN > 128 -vlen ?= 16384 +# Constraints: VLEN >= 8192 +vlen ?= 8192 diff --git a/config/2_lanes.mk b/config/2_lanes.mk index 0d5ed0de8..4155d0091 100644 --- a/config/2_lanes.mk +++ b/config/2_lanes.mk @@ -21,5 +21,5 @@ nr_lanes ?= 2 # Length of each vector register (in bits) -# Constraints: VLEN > 128 -vlen ?= 2048 +# Constraints: VLEN >= 1024 +vlen ?= 4096 diff --git a/config/4_lanes.mk b/config/4_lanes.mk index 4c638a53b..5b27f82c5 100644 --- a/config/4_lanes.mk +++ b/config/4_lanes.mk @@ -21,5 +21,5 @@ nr_lanes ?= 4 # Length of each vector register (in bits) -# Constraints: VLEN > 128 +# Constraints: VLEN >= 2048 vlen ?= 4096 diff --git a/config/8_lanes.mk b/config/8_lanes.mk index 8a083ca94..a68761776 100644 --- a/config/8_lanes.mk +++ b/config/8_lanes.mk @@ -21,5 +21,5 @@ nr_lanes ?= 8 # Length of each vector register (in bits) -# Constraints: VLEN > 128 -vlen ?= 8192 +# Constraints: VLEN >= 4096 +vlen ?= 4096 diff --git a/config/README.md b/config/README.md index 18e2e9b3b..ef5a281f8 100644 --- a/config/README.md +++ b/config/README.md @@ -6,6 +6,7 @@ parameters such as the number of lanes in the design. This will automatically generate the correct software runtime and the correct hardware. Ara currently has four configurations, which differ on the amount of lanes: +- `1_lane.mk` - `2_lanes.mk` - `4_lanes.mk` - `8_lanes.mk` From 0c18343c4699be7d6818564e159b54e0ca392977 Mon Sep 17 00:00:00 2001 From: sharafat hussain Date: Fri, 23 Dec 2022 13:23:26 +0500 Subject: [PATCH 11/16] [ci] Add the 1_lane configuration to the CI --- .github/workflows/ci.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b2a1c8e11..8dbc70493 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -167,7 +167,7 @@ jobs: runs-on: ubuntu-20.04 strategy: matrix: - ara_config: [2_lanes, 4_lanes, 8_lanes, 16_lanes] + ara_config: [1_lane, 2_lanes, 4_lanes, 8_lanes, 16_lanes] needs: tc-llvm steps: - uses: actions/checkout@v3 @@ -198,7 +198,7 @@ jobs: runs-on: ubuntu-20.04 strategy: matrix: - ara_config: [2_lanes, 4_lanes, 8_lanes, 16_lanes] + ara_config: [1_lane, 2_lanes, 4_lanes, 8_lanes, 16_lanes] needs: ["tc-llvm", "tc-gcc", "tc-isa-sim"] steps: - uses: actions/checkout@v3 @@ -236,7 +236,7 @@ jobs: runs-on: ubuntu-20.04 strategy: matrix: - ara_config: [2_lanes, 4_lanes, 8_lanes, 16_lanes] + ara_config: [1_lane, 2_lanes, 4_lanes, 8_lanes, 16_lanes] needs: ["tc-verilator", "tc-isa-sim"] steps: - uses: actions/checkout@v3 @@ -286,7 +286,7 @@ jobs: strategy: matrix: app: [hello_world, imatmul, fmatmul, iconv2d, fconv2d, fconv3d, jacobi2d, dropout, fft, dwt, exp, softmax, dotproduct, fdotproduct, pathfinder, roi_align] - ara_config: [2_lanes, 4_lanes, 8_lanes, 16_lanes] + ara_config: [1_lane, 2_lanes, 4_lanes, 8_lanes, 16_lanes] needs: ["compile-ara", "compile-apps"] steps: - uses: actions/checkout@v3 @@ -318,7 +318,7 @@ jobs: runs-on: ubuntu-20.04 strategy: matrix: - ara_config: [2_lanes, 4_lanes, 8_lanes, 16_lanes] + ara_config: [1_lane, 2_lanes, 4_lanes, 8_lanes, 16_lanes] needs: ["compile-ara", "compile-riscv-tests"] steps: - uses: actions/checkout@v3 @@ -450,7 +450,7 @@ jobs: runs-on: ubuntu-20.04 strategy: matrix: - ara_config: [2_lanes, 4_lanes, 8_lanes, 16_lanes] + ara_config: [1_lane, 2_lanes, 4_lanes, 8_lanes, 16_lanes] needs: ["compile-ara", "compile-apps"] steps: - uses: actions/checkout@v3 @@ -668,7 +668,7 @@ jobs: runs-on: ubuntu-20.04 strategy: matrix: - ara_config: [2_lanes, 4_lanes, 8_lanes, 16_lanes] + ara_config: [1_lane, 2_lanes, 4_lanes, 8_lanes, 16_lanes] if: always() needs: ["simulate", "riscv-tests-spike", "riscv-tests-simv"] steps: From 588578e447085fef89957b33e38c9aa308c50aa4 Mon Sep 17 00:00:00 2001 From: sharafat hussain Date: Fri, 23 Dec 2022 13:51:21 +0500 Subject: [PATCH 12/16] [riscv-tests] Updated tests for single lane --- .../isa/rv64uv/1_lane_tests/Makefrag | 40 + .../isa/rv64uv/1_lane_tests/vaadd.c | 59 ++ .../isa/rv64uv/1_lane_tests/vaaddu.c | 59 ++ .../isa/rv64uv/1_lane_tests/vadc.c | 103 +++ .../isa/rv64uv/1_lane_tests/vadd.c | 203 +++++ .../isa/rv64uv/1_lane_tests/vand.c | 309 +++++++ .../isa/rv64uv/1_lane_tests/vasub.c | 54 ++ .../isa/rv64uv/1_lane_tests/vasubu.c | 54 ++ .../isa/rv64uv/1_lane_tests/vcompress.c | 26 + .../isa/rv64uv/1_lane_tests/vcpop.c | 47 + .../isa/rv64uv/1_lane_tests/vdiv.c | 232 +++++ .../isa/rv64uv/1_lane_tests/vdivu.c | 232 +++++ .../isa/rv64uv/1_lane_tests/vfadd.c | 449 ++++++++++ .../isa/rv64uv/1_lane_tests/vfclass.c | 90 ++ .../isa/rv64uv/1_lane_tests/vfcvt.c | 834 ++++++++++++++++++ .../isa/rv64uv/1_lane_tests/vfdiv.c | 355 ++++++++ .../isa/rv64uv/1_lane_tests/vfirst.c | 48 + .../isa/rv64uv/1_lane_tests/vfmacc.c | 356 ++++++++ .../isa/rv64uv/1_lane_tests/vfmadd.c | 433 +++++++++ .../isa/rv64uv/1_lane_tests/vfmax.c | 351 ++++++++ .../isa/rv64uv/1_lane_tests/vfmerge.c | 94 ++ .../isa/rv64uv/1_lane_tests/vfmin.c | 348 ++++++++ .../isa/rv64uv/1_lane_tests/vfmsac.c | 454 ++++++++++ .../isa/rv64uv/1_lane_tests/vfmsub.c | 453 ++++++++++ .../isa/rv64uv/1_lane_tests/vfmul.c | 350 ++++++++ .../isa/rv64uv/1_lane_tests/vfmv.c | 68 ++ .../isa/rv64uv/1_lane_tests/vfmvfs.c | 90 ++ .../isa/rv64uv/1_lane_tests/vfmvsf.c | 69 ++ .../isa/rv64uv/1_lane_tests/vfncvt.c | 793 +++++++++++++++++ .../isa/rv64uv/1_lane_tests/vfnmacc.c | 456 ++++++++++ .../isa/rv64uv/1_lane_tests/vfnmadd.c | 458 ++++++++++ .../isa/rv64uv/1_lane_tests/vfnmsac.c | 455 ++++++++++ .../isa/rv64uv/1_lane_tests/vfnmsub.c | 454 ++++++++++ .../isa/rv64uv/1_lane_tests/vfrdiv.c | 179 ++++ .../isa/rv64uv/1_lane_tests/vfredmax.c | 348 ++++++++ .../isa/rv64uv/1_lane_tests/vfredmin.c | 350 ++++++++ .../isa/rv64uv/1_lane_tests/vfredosum.c | 348 ++++++++ .../isa/rv64uv/1_lane_tests/vfredusum.c | 352 ++++++++ .../isa/rv64uv/1_lane_tests/vfrsub.c | 167 ++++ .../isa/rv64uv/1_lane_tests/vfsgnj.c | 408 +++++++++ .../isa/rv64uv/1_lane_tests/vfsgnjn.c | 350 ++++++++ .../isa/rv64uv/1_lane_tests/vfsgnjx.c | 348 ++++++++ .../isa/rv64uv/1_lane_tests/vfslide1down.c | 101 +++ .../isa/rv64uv/1_lane_tests/vfslide1up.c | 90 ++ .../isa/rv64uv/1_lane_tests/vfsqrt.c | 142 +++ .../isa/rv64uv/1_lane_tests/vfsub.c | 349 ++++++++ .../isa/rv64uv/1_lane_tests/vfwadd.c | 531 +++++++++++ .../isa/rv64uv/1_lane_tests/vfwcvt.c | 670 ++++++++++++++ .../isa/rv64uv/1_lane_tests/vfwmacc.c | 351 ++++++++ .../isa/rv64uv/1_lane_tests/vfwmsac.c | 353 ++++++++ .../isa/rv64uv/1_lane_tests/vfwmul.c | 258 ++++++ .../isa/rv64uv/1_lane_tests/vfwnmacc.c | 352 ++++++++ .../isa/rv64uv/1_lane_tests/vfwnmsac.c | 347 ++++++++ .../isa/rv64uv/1_lane_tests/vfwredosum.c | 268 ++++++ .../isa/rv64uv/1_lane_tests/vfwredusum.c | 272 ++++++ .../isa/rv64uv/1_lane_tests/vfwsub.c | 527 +++++++++++ .../riscv-tests/isa/rv64uv/1_lane_tests/vid.c | 31 + .../isa/rv64uv/1_lane_tests/viota.c | 37 + apps/riscv-tests/isa/rv64uv/1_lane_tests/vl.c | 79 ++ .../isa/rv64uv/1_lane_tests/vl1r.c | 439 +++++++++ .../isa/rv64uv/1_lane_tests/vl_nocheck.c | 68 ++ .../isa/rv64uv/1_lane_tests/vle1.c | 45 + .../isa/rv64uv/1_lane_tests/vle16.c | 293 ++++++ .../isa/rv64uv/1_lane_tests/vle32.c | 307 +++++++ .../isa/rv64uv/1_lane_tests/vle64.c | 315 +++++++ .../isa/rv64uv/1_lane_tests/vle8.c | 273 ++++++ .../isa/rv64uv/1_lane_tests/vlff.c | 91 ++ .../riscv-tests/isa/rv64uv/1_lane_tests/vls.c | 190 ++++ .../isa/rv64uv/1_lane_tests/vluxei.c | 167 ++++ .../riscv-tests/isa/rv64uv/1_lane_tests/vlx.c | 101 +++ .../isa/rv64uv/1_lane_tests/vmacc.c | 292 ++++++ .../isa/rv64uv/1_lane_tests/vmadc.c | 224 +++++ .../isa/rv64uv/1_lane_tests/vmadd.c | 292 ++++++ .../isa/rv64uv/1_lane_tests/vmand.c | 79 ++ .../isa/rv64uv/1_lane_tests/vmandnot.c | 68 ++ .../isa/rv64uv/1_lane_tests/vmax.c | 181 ++++ .../isa/rv64uv/1_lane_tests/vmaxu.c | 181 ++++ .../isa/rv64uv/1_lane_tests/vmerge.c | 113 +++ .../isa/rv64uv/1_lane_tests/vmfeq.c | 503 +++++++++++ .../isa/rv64uv/1_lane_tests/vmfge.c | 134 +++ .../isa/rv64uv/1_lane_tests/vmfgt.c | 134 +++ .../isa/rv64uv/1_lane_tests/vmfle.c | 273 ++++++ .../isa/rv64uv/1_lane_tests/vmflt.c | 279 ++++++ .../isa/rv64uv/1_lane_tests/vmfne.c | 503 +++++++++++ .../isa/rv64uv/1_lane_tests/vmin.c | 181 ++++ .../isa/rv64uv/1_lane_tests/vminu.c | 176 ++++ .../isa/rv64uv/1_lane_tests/vmnand.c | 68 ++ .../isa/rv64uv/1_lane_tests/vmnor.c | 68 ++ .../isa/rv64uv/1_lane_tests/vmor.c | 68 ++ .../isa/rv64uv/1_lane_tests/vmornot.c | 68 ++ .../isa/rv64uv/1_lane_tests/vmsbc.c | 160 ++++ .../isa/rv64uv/1_lane_tests/vmsbf.c | 33 + .../isa/rv64uv/1_lane_tests/vmseq.c | 306 +++++++ .../isa/rv64uv/1_lane_tests/vmsgt.c | 168 ++++ .../isa/rv64uv/1_lane_tests/vmsgtu.c | 168 ++++ .../isa/rv64uv/1_lane_tests/vmsif.c | 33 + .../isa/rv64uv/1_lane_tests/vmsle.c | 237 +++++ .../isa/rv64uv/1_lane_tests/vmsleu.c | 237 +++++ .../isa/rv64uv/1_lane_tests/vmslt.c | 163 ++++ .../isa/rv64uv/1_lane_tests/vmsltu.c | 163 ++++ .../isa/rv64uv/1_lane_tests/vmsne.c | 306 +++++++ .../isa/rv64uv/1_lane_tests/vmsof.c | 33 + .../isa/rv64uv/1_lane_tests/vmul.c | 232 +++++ .../isa/rv64uv/1_lane_tests/vmulh.c | 232 +++++ .../isa/rv64uv/1_lane_tests/vmulhsu.c | 232 +++++ .../isa/rv64uv/1_lane_tests/vmulhu.c | 232 +++++ .../riscv-tests/isa/rv64uv/1_lane_tests/vmv.c | 104 +++ .../isa/rv64uv/1_lane_tests/vmvnrr.c | 146 +++ .../isa/rv64uv/1_lane_tests/vmvsx.c | 75 ++ .../isa/rv64uv/1_lane_tests/vmvxs.c | 72 ++ .../isa/rv64uv/1_lane_tests/vmxnor.c | 68 ++ .../isa/rv64uv/1_lane_tests/vmxor.c | 68 ++ .../isa/rv64uv/1_lane_tests/vnclip.c | 78 ++ .../isa/rv64uv/1_lane_tests/vnclipu.c | 78 ++ .../isa/rv64uv/1_lane_tests/vnmsac.c | 292 ++++++ .../isa/rv64uv/1_lane_tests/vnmsub.c | 292 ++++++ .../isa/rv64uv/1_lane_tests/vnsra.c | 242 +++++ .../isa/rv64uv/1_lane_tests/vnsrl.c | 242 +++++ .../riscv-tests/isa/rv64uv/1_lane_tests/vor.c | 309 +++++++ .../isa/rv64uv/1_lane_tests/vpopc_m.c | 30 + .../isa/rv64uv/1_lane_tests/vredand.c | 93 ++ .../isa/rv64uv/1_lane_tests/vredmax.c | 79 ++ .../isa/rv64uv/1_lane_tests/vredmaxu.c | 106 +++ .../isa/rv64uv/1_lane_tests/vredmin.c | 78 ++ .../isa/rv64uv/1_lane_tests/vredminu.c | 78 ++ .../isa/rv64uv/1_lane_tests/vredor.c | 93 ++ .../isa/rv64uv/1_lane_tests/vredsum.c | 178 ++++ .../isa/rv64uv/1_lane_tests/vredxor.c | 44 + .../isa/rv64uv/1_lane_tests/vrem.c | 232 +++++ .../isa/rv64uv/1_lane_tests/vremu.c | 232 +++++ .../isa/rv64uv/1_lane_tests/vrgather.c | 72 ++ .../isa/rv64uv/1_lane_tests/vrsub.c | 136 +++ apps/riscv-tests/isa/rv64uv/1_lane_tests/vs.c | 93 ++ .../isa/rv64uv/1_lane_tests/vs1r.c | 162 ++++ .../isa/rv64uv/1_lane_tests/vsadd.c | 99 +++ .../isa/rv64uv/1_lane_tests/vsaddu.c | 113 +++ .../isa/rv64uv/1_lane_tests/vsbc.c | 76 ++ .../isa/rv64uv/1_lane_tests/vse1.c | 57 ++ .../isa/rv64uv/1_lane_tests/vse16.c | 357 ++++++++ .../isa/rv64uv/1_lane_tests/vse32.c | 408 +++++++++ .../isa/rv64uv/1_lane_tests/vse64.c | 435 +++++++++ .../isa/rv64uv/1_lane_tests/vse8.c | 331 +++++++ .../isa/rv64uv/1_lane_tests/vsetivli.c | 466 ++++++++++ .../isa/rv64uv/1_lane_tests/vsetvl.c | 526 +++++++++++ .../isa/rv64uv/1_lane_tests/vsetvli.c | 528 +++++++++++ .../isa/rv64uv/1_lane_tests/vsext.c | 106 +++ .../isa/rv64uv/1_lane_tests/vslide1down.c | 101 +++ .../isa/rv64uv/1_lane_tests/vslide1up.c | 78 ++ .../isa/rv64uv/1_lane_tests/vslidedown.c | 164 ++++ .../isa/rv64uv/1_lane_tests/vslideup.c | 166 ++++ .../isa/rv64uv/1_lane_tests/vsll.c | 316 +++++++ .../isa/rv64uv/1_lane_tests/vsmul.c | 59 ++ .../isa/rv64uv/1_lane_tests/vsra.c | 316 +++++++ .../isa/rv64uv/1_lane_tests/vsrl.c | 316 +++++++ .../riscv-tests/isa/rv64uv/1_lane_tests/vss.c | 146 +++ .../isa/rv64uv/1_lane_tests/vssra.c | 79 ++ .../isa/rv64uv/1_lane_tests/vssrl.c | 79 ++ .../isa/rv64uv/1_lane_tests/vssub.c | 55 ++ .../isa/rv64uv/1_lane_tests/vssubu.c | 55 ++ .../isa/rv64uv/1_lane_tests/vsub.c | 136 +++ .../isa/rv64uv/1_lane_tests/vsux.c | 104 +++ .../isa/rv64uv/1_lane_tests/vsuxei.c | 137 +++ .../riscv-tests/isa/rv64uv/1_lane_tests/vsx.c | 102 +++ .../isa/rv64uv/1_lane_tests/vwadd.c | 241 +++++ .../isa/rv64uv/1_lane_tests/vwaddu.c | 244 +++++ .../isa/rv64uv/1_lane_tests/vwmacc.c | 248 ++++++ .../isa/rv64uv/1_lane_tests/vwmaccsu.c | 248 ++++++ .../isa/rv64uv/1_lane_tests/vwmaccu.c | 248 ++++++ .../isa/rv64uv/1_lane_tests/vwmaccus.c | 127 +++ .../isa/rv64uv/1_lane_tests/vwmul.c | 188 ++++ .../isa/rv64uv/1_lane_tests/vwmulsu.c | 188 ++++ .../isa/rv64uv/1_lane_tests/vwmulu.c | 188 ++++ .../isa/rv64uv/1_lane_tests/vwredsum.c | 153 ++++ .../isa/rv64uv/1_lane_tests/vwredsumu.c | 153 ++++ .../isa/rv64uv/1_lane_tests/vwsub.c | 246 ++++++ .../isa/rv64uv/1_lane_tests/vwsubu.c | 246 ++++++ .../isa/rv64uv/1_lane_tests/vxor.c | 309 +++++++ .../isa/rv64uv/1_lane_tests/vzext.c | 106 +++ 178 files changed, 38409 insertions(+) create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/Makefrag create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vaadd.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vaaddu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vadc.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vadd.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vand.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vasub.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vasubu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vcompress.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vcpop.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vdiv.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vdivu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfadd.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfclass.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfcvt.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfdiv.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfirst.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmacc.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmadd.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmax.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmerge.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmin.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmsac.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmsub.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmul.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmv.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmvfs.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmvsf.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfncvt.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfnmacc.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfnmadd.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfnmsac.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfnmsub.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfrdiv.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfredmax.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfredmin.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfredosum.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfredusum.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfrsub.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfsgnj.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfsgnjn.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfsgnjx.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfslide1down.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfslide1up.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfsqrt.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfsub.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwadd.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwcvt.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwmacc.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwmsac.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwmul.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwnmacc.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwnmsac.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwredosum.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwredusum.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwsub.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vid.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/viota.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vl.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vl1r.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vl_nocheck.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vle1.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vle16.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vle32.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vle64.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vle8.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vlff.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vls.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vluxei.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vlx.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmacc.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmadc.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmadd.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmand.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmandnot.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmax.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmaxu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmerge.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmfeq.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmfge.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmfgt.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmfle.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmflt.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmfne.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmin.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vminu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmnand.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmnor.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmor.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmornot.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsbc.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsbf.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmseq.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsgt.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsgtu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsif.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsle.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsleu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmslt.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsltu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsne.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsof.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmul.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmulh.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmulhsu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmulhu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmv.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmvnrr.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmvsx.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmvxs.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmxnor.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vmxor.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vnclip.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vnclipu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vnmsac.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vnmsub.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vnsra.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vnsrl.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vor.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vpopc_m.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vredand.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vredmax.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vredmaxu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vredmin.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vredminu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vredor.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vredsum.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vredxor.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vrem.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vremu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vrgather.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vrsub.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vs.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vs1r.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vsadd.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vsaddu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vsbc.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vse1.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vse16.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vse32.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vse64.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vse8.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vsetivli.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vsetvl.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vsetvli.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vsext.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vslide1down.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vslide1up.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vslidedown.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vslideup.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vsll.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vsmul.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vsra.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vsrl.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vss.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vssra.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vssrl.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vssub.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vssubu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vsub.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vsux.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vsuxei.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vsx.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vwadd.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vwaddu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmacc.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmaccsu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmaccu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmaccus.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmul.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmulsu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmulu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vwredsum.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vwredsumu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vwsub.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vwsubu.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vxor.c create mode 100644 apps/riscv-tests/isa/rv64uv/1_lane_tests/vzext.c diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/Makefrag b/apps/riscv-tests/isa/rv64uv/1_lane_tests/Makefrag new file mode 100644 index 000000000..caba44c02 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/Makefrag @@ -0,0 +1,40 @@ +#Copyright 2021 ETH Zurich and University of Bologna. +#Solderpad Hardware License, Version 0.51, see LICENSE for details. +#SPDX - License - Identifier : SHL - 0.51 +# +#Author : Matheus Cavalcante < matheusd @iis.ee.ethz.ch> +#Basile Bougenot < bbougenot @student.ethz.ch> + +rv64uv_sc_tests = vaadd vaaddu vsadd vsaddu vsmul vssra vssrl vnclip vnclipu vadd + vsub vrsub vwaddu vwsubu vwadd vwsub vsext vzext vadc vmadc vsbc vmsbc vand vor + vxor vsll vsrl vsra vnsrl vnsra vmseq vmsne vmsltu vmslt vmsleu vmsle vmsgtu + vmsgt vminu vmin vmaxu vmax vmul vmulh vmulhu vmulhsu vdivu vdiv vremu + vrem vwmul vwmulu vwmulsu vmacc vnmsac vmadd vnmsub vwmaccu vwmacc + vwmaccsu vwmaccus vmerge vmv vmvxs vmvsx vfmvfs vfmvsf vmvnrr + vredsum vredmaxu vredmax vredminu vredmin vredand vredor + vredxor vwredsumu vwredsum vfadd vfsub vfrsub vfwadd + vfwsub vfmul vfdiv vfrdiv vfwmul vfmacc vfnmacc vfmsac + vfnmsac vfmadd vfnmadd vfmsub vfnmsub vfwmacc + vfwnmacc vfwmsac vfwnmsac vfsqrt vfmin vfmax vfredusum vfredosum vfredmin vfredmax + vfwredusum vfwredosum vfclass vfsgnj vfsgnjn vfsgnjx vfmerge + vfmv vmfeq vmfne vmflt vmfle vmfgt vmfge vfcvt vfwcvt vfncvt + vmand vmnand vmandnot vmor vmnor vmornot vmxor vmxnor vslideup vslidedown + vslide1up vfslide1up vslide1down vfslide1down vl + vl1r vle1 vls vluxei vs + vs1r vse1 vss vsuxei vsetivli vsetvli + vsetvl vmsbf vmsof vmsif viota vid vcpop vfirst vle8 + vse8 vle16 vse16 vle32 vse32 vle64 vse64 + +#rv64uv_sc_tests = vaadd vaaddu vadc vasub vasubu vcompress vfirst vid viota \ + vl vlff vl_nocheck vlx vmsbf vmsif vmsof vpopc_m vrgather vsadd vsaddu \ + vsetvl vsetivli vsetvli vsmul vssra vssrl vssub vssubu vsux vsx + + rv64uv_p_tests = $( + addprefix + rv64uv - + p - + , + $(rv64uv_sc_tests)) + + spike_ctests += + $(rv64uv_p_tests) diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vaadd.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vaadd.c new file mode 100644 index 000000000..513b4f4f3 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vaadd.c @@ -0,0 +1,59 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + set_vxrm(0); // setting vxrm to rnu rounding mode + VSET(4, e8, m1); + VLOAD_8(v1, 1, -2, -3, 4); + VLOAD_8(v2, 1, 2, -3, 3); + __asm__ volatile("vaadd.vv v3, v1, v2" ::); + VCMP_U8(1, v3, 1, 0, -3, 4); +} + +void TEST_CASE2(void) { + set_vxrm(1); // setting vxrm to rne rounding mode + VSET(4, e8, m1); + VLOAD_8(v1, 1, -2, -3, 4); + VLOAD_8(v2, 1, 9, -3, 5); + VLOAD_8(v0, 0xA, 0x0, 0x0, 0x0); + VCLEAR(v3); + __asm__ volatile("vaadd.vv v3, v1, v2, v0.t" ::); + VCMP_U8(2, v3, 0, 4, 0, 4); +} + +void TEST_CASE3(void) { + set_vxrm(2); // setting vxrm to rdn rounding mode + VSET(4, e32, m1); + VLOAD_32(v1, 1, -2, 3, -4); + const uint32_t scalar = 5; + __asm__ volatile("vaadd.vx v3, v1, %[A]" ::[A] "r"(scalar)); + VCMP_U32(3, v3, 3, 1, 4, 0); +} + +// Dont use VCLEAR here, it results in a glitch where are values are off by 1 +void TEST_CASE4(void) { + set_vxrm(3); // setting vxrm to rod rounding mode + VSET(4, e32, m1); + VLOAD_32(v1, 1, 2, 3, 4); + const uint32_t scalar = 5; + VLOAD_32(v0, 0xA, 0x0, 0x0, 0x0); + VCLEAR(v3); + __asm__ volatile("vaadd.vx v3, v1, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(4, v3, 0, 3, 0, 5); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vaaddu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vaaddu.c new file mode 100644 index 000000000..ff50b894c --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vaaddu.c @@ -0,0 +1,59 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + set_vxrm(0); // setting vxrm to rnu rounding mode + VSET(4, e8, m1); + VLOAD_8(v1, 1, 2, 3, 5); + VLOAD_8(v2, 1, 3, 8, 4); + __asm__ volatile("vaaddu.vv v3, v1, v2" ::); + VCMP_U8(1, v3, 1, 3, 6, 5); +} + +void TEST_CASE2(void) { + set_vxrm(1); // setting vxrm to rne rounding mode + VSET(4, e8, m1); + VLOAD_8(v1, 5, 8, 3, 7); + VLOAD_8(v2, 7, 5, 3, 5); + VLOAD_8(v0, 0x0A, 0x00, 0x00, 0x00); + VCLEAR(v3); + __asm__ volatile("vaaddu.vv v3, v1, v2, v0.t" ::); + VCMP_U8(2, v3, 0, 6, 0, 6); +} + +void TEST_CASE3(void) { + set_vxrm(2); // setting vxrm to rdn rounding mode + VSET(4, e32, m1); + VLOAD_32(v1, 1, 2, 3, 4); + const uint32_t scalar = 5; + __asm__ volatile("vaaddu.vx v3, v1, %[A]" ::[A] "r"(scalar)); + VCMP_U32(3, v3, 3, 3, 4, 4); +} + +// Dont use VCLEAR here, it results in a glitch where are values are off by 1 +void TEST_CASE4(void) { + set_vxrm(3); // setting vxrm to rod rounding mode + VSET(4, e32, m1); + VLOAD_32(v1, 1, 2, 3, 4); + const uint32_t scalar = 5; + VLOAD_32(v0, 0xA, 0x0, 0x0, 0x0); + VCLEAR(v3); + __asm__ volatile("vaaddu.vx v3, v1, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(4, v3, 0, 3, 0, 5); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vadc.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vadc.c new file mode 100644 index 000000000..ed24c2d45 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vadc.c @@ -0,0 +1,103 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, -7, 7); + VLOAD_8(v2, 8, 7, 6, 5, 4, 3, 2, 1, 1, 2, 3, 4, 5, 6, 7, -8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vadc.vvm v3, v1, v2, v0"); + VCMP_U8(1, v3, 9, 10, 9, 10, 9, 10, 9, 10, 2, 5, 6, 9, 10, 13, 0, 0); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, -7, 7); + VLOAD_16(v4, 8, 7, 6, 5, 4, 3, 2, 1, 1, 2, 3, 4, 5, 6, 7, -8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vadc.vvm v6, v2, v4, v0"); + VCMP_U16(2, v6, 9, 10, 9, 10, 9, 10, 9, 10, 2, 5, 6, 9, 10, 13, 0, 0); + + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, -7, 7); + VLOAD_32(v8, 8, 7, 6, 5, 4, 3, 2, 1, 1, 2, 3, 4, 5, 6, 7, -8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vadc.vvm v12, v4, v8, v0"); + VCMP_U32(3, v12, 9, 10, 9, 10, 9, 10, 9, 10, 2, 5, 6, 9, 10, 13, 0, 0); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, -7, 7); + VLOAD_64(v16, 8, 7, 6, 5, 4, 3, 2, 1, 1, 2, 3, 4, 5, 6, 7, -8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vadc.vvm v24, v8, v16, v0"); + VCMP_U64(4, v24, 9, 10, 9, 10, 9, 10, 9, 10, 2, 5, 6, 9, 10, 13, 0, 0); +}; + +void TEST_CASE2(void) { + const uint32_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vadc.vxm v3, v1, %[A], v0" ::[A] "r"(scalar)); + VCMP_U8(5, v3, 6, 8, 8, 10, 10, 12, 12, 14, 6, 8, 8, 10, 10, 12, 12, 14); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vadc.vxm v4, v2, %[A], v0" ::[A] "r"(scalar)); + VCMP_U16(6, v4, 6, 8, 8, 10, 10, 12, 12, 14, 6, 8, 8, 10, 10, 12, 12, 14); + + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vadc.vxm v8, v4, %[A], v0" ::[A] "r"(scalar)); + VCMP_U32(7, v8, 6, 8, 8, 10, 10, 12, 12, 14, 6, 8, 8, 10, 10, 12, 12, 14); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vadc.vxm v16, v8, %[A], v0" ::[A] "r"(scalar)); + VCMP_U64(8, v16, 6, 8, 8, 10, 10, 12, 12, 14, 6, 8, 8, 10, 10, 12, 12, 14); +}; + +void TEST_CASE3(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vadc.vim v3, v1, 5, v0"); + VCMP_U8(9, v3, 6, 8, 8, 10, 10, 12, 12, 14, 6, 8, 8, 10, 10, 12, 12, 14); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vadc.vim v4, v2, 5, v0"); + VCMP_U16(10, v4, 6, 8, 8, 10, 10, 12, 12, 14, 6, 8, 8, 10, 10, 12, 12, 14); + + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vadc.vim v8, v4, 5, v0"); + VCMP_U32(11, v8, 6, 8, 8, 10, 10, 12, 12, 14, 6, 8, 8, 10, 10, 12, 12, 14); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vadc.vim v16, v8, 5, v0"); + VCMP_U64(12, v16, 6, 8, 8, 10, 10, 12, 12, 14, 6, 8, 8, 10, 10, 12, 12, 14); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vadd.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vadd.c new file mode 100644 index 000000000..a83b07b3a --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vadd.c @@ -0,0 +1,203 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vadd.vv v3, v1, v2"); + VCMP_U8(1, v3, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vadd.vv v6, v2, v4"); + VCMP_U16(2, v6, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); + + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vadd.vv v12, v4, v8"); + VCMP_U32(3, v12, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vadd.vv v24, v8, v16"); + VCMP_U64(4, v24, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); +} + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v3); + asm volatile("vadd.vv v3, v1, v2, v0.t"); + VCMP_U8(5, v3, 0, 4, 0, 8, 0, 12, 0, 16, 0, 4, 0, 8, 0, 12, 0, 16); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + asm volatile("vadd.vv v6, v2, v4, v0.t"); + VCMP_U16(6, v6, 0, 4, 0, 8, 0, 12, 0, 16, 0, 4, 0, 8, 0, 12, 0, 16); + + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + asm volatile("vadd.vv v12, v4, v8, v0.t"); + VCMP_U32(7, v12, 0, 4, 0, 8, 0, 12, 0, 16, 0, 4, 0, 8, 0, 12, 0, 16); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + asm volatile("vadd.vv v24, v8, v16, v0.t"); + VCMP_U64(8, v24, 0, 4, 0, 8, 0, 12, 0, 16, 0, 4, 0, 8, 0, 12, 0, 16); +} + +void TEST_CASE3(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vadd.vi v3, v1, 5"); + VCMP_U8(9, v3, 6, 7, 8, 9, 10, 11, 12, 13, 6, 7, 8, 9, 10, 11, 12, 13); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vadd.vi v4, v2, 5"); + VCMP_U16(10, v4, 6, 7, 8, 9, 10, 11, 12, 13, 6, 7, 8, 9, 10, 11, 12, 13); + + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vadd.vi v8, v4, 5"); + VCMP_U32(11, v8, 6, 7, 8, 9, 10, 11, 12, 13, 6, 7, 8, 9, 10, 11, 12, 13); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vadd.vi v16, v8, 5"); + VCMP_U64(12, v16, 6, 7, 8, 9, 10, 11, 12, 13, 6, 7, 8, 9, 10, 11, 12, 13); +} + +void TEST_CASE4(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v3); + asm volatile("vadd.vi v3, v1, 5, v0.t"); + VCMP_U8(13, v3, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vadd.vi v4, v2, 5, v0.t"); + VCMP_U16(14, v4, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); + + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vadd.vi v8, v4, 5, v0.t"); + VCMP_U32(15, v8, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + asm volatile("vadd.vi v16, v8, 5, v0.t"); + VCMP_U64(16, v16, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); +} + +void TEST_CASE5(void) { + const uint32_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_8(v1, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vadd.vx v3, v1, %[A]" ::[A] "r"(scalar)); + VCMP_U8(17, v3, 6, 3, 8, 1, 10, -1, 12, -3, 14, -5, 16, -7, 18, -9, 20, -11); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vadd.vx v4, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U16(18, v4, 6, 3, 8, 1, 10, -1, 12, -3, 14, -5, 16, -7, 18, -9, 20, -11); + + VSET(16, e32, m4); + VLOAD_32(v4, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vadd.vx v8, v4, %[A]" ::[A] "r"(scalar)); + VCMP_U32(19, v8, 6, 3, 8, 1, 10, -1, 12, -3, 14, -5, 16, -7, 18, -9, 20, -11); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vadd.vx v16, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U64(20, v16, 6, 3, 8, 1, 10, -1, 12, -3, 14, -5, 16, -7, 18, -9, 20, + -11); +} + +void TEST_CASE6(void) { + const uint32_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v3); + asm volatile("vadd.vx v3, v1, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(21, v3, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vadd.vx v4, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(22, v4, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); + + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vadd.vx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(23, v8, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + asm volatile("vadd.vx v16, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(24, v16, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); +} + +// Check that the addition also works when source register EEWs are changed +void TEST_CASE7(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET(8, e16, m1); + asm volatile("vadd.vv v3, v1, v2"); + VSET(16, e8, m1); + VCMP_U8(25, v3, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vand.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vand.c new file mode 100644 index 000000000..d2dd25e0e --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vand.c @@ -0,0 +1,309 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(12, e8, m1); + VLOAD_8(v2, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + 0xf0); + VLOAD_8(v3, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, + 0xf0); + asm volatile("vand.vv v1, v2, v3"); + VCMP_U8(1, v1, 0xf0, 0x01, 0xf0, 0xf0, 0x01, 0xf0, 0xf0, 0x01, 0xf0, 0xf0, + 0x01, 0xf0); + + VSET(12, e16, m2); + VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + 0xf0f0, 0xffff, 0x0001, 0xf0f0); + VLOAD_16(v6, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, + 0xf0f0, 0xff00, 0x0003, 0xf0f0); + asm volatile("vand.vv v2, v4, v6"); + VCMP_U16(2, v2, 0xff00, 0x0001, 0xf0f0, 0xff00, 0x0001, 0xf0f0, 0xff00, + 0x0001, 0xf0f0, 0xff00, 0x0001, 0xf0f0); + + VSET(12, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, + 0x00000001, 0xf0f0f0f0); + VLOAD_32(v12, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, 0x00000003, + 0xf0f0f0f0, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, + 0x00000003, 0xf0f0f0f0); + asm volatile("vand.vv v4, v8, v12"); + VCMP_U32(3, v4, 0xffff0000, 0x00000001, 0xf0f0f0f0, 0xffff0000, 0x00000001, + 0xf0f0f0f0, 0xffff0000, 0x00000001, 0xf0f0f0f0, 0xffff0000, + 0x00000001, 0xf0f0f0f0); + + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); + VLOAD_64(v24, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0); + asm volatile("vand.vv v8, v16, v24"); + VCMP_U64(4, v8, 0xffffffff00000000, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); +} + +void TEST_CASE2() { + VSET(12, e8, m1); + VLOAD_8(v2, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + 0xf0); + VLOAD_8(v3, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, + 0xf0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_8(v1, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + 0xef); + asm volatile("vand.vv v1, v2, v3, v0.t"); + VCMP_U8(5, v1, 0xf0, 0xef, 0xf0, 0xf0, 0xef, 0xf0, 0xf0, 0xef, 0xf0, 0xf0, + 0xef, 0xf0); + + VSET(12, e16, m2); + VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + 0xf0f0, 0xffff, 0x0001, 0xf0f0); + VLOAD_16(v6, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, + 0xf0f0, 0xff00, 0x0003, 0xf0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + 0xbeef, 0xbeef, 0xbeef, 0xbeef); + asm volatile("vand.vv v2, v4, v6, v0.t"); + VCMP_U16(6, v2, 0xff00, 0xbeef, 0xf0f0, 0xff00, 0xbeef, 0xf0f0, 0xff00, + 0xbeef, 0xf0f0, 0xff00, 0xbeef, 0xf0f0); + + VSET(12, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, + 0x00000001, 0xf0f0f0f0); + VLOAD_32(v12, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, 0x00000003, + 0xf0f0f0f0, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, + 0x00000003, 0xf0f0f0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_32(v4, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef); + asm volatile("vand.vv v4, v8, v12, v0.t"); + VCMP_U32(7, v4, 0xffff0000, 0xdeadbeef, 0xf0f0f0f0, 0xffff0000, 0xdeadbeef, + 0xf0f0f0f0, 0xffff0000, 0xdeadbeef, 0xf0f0f0f0, 0xffff0000, + 0xdeadbeef, 0xf0f0f0f0); + + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); + VLOAD_64(v24, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef); + asm volatile("vand.vv v8, v16, v24, v0.t"); + VCMP_U64(8, v8, 0xffffffff00000000, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0f0); +} + +void TEST_CASE3() { + const uint64_t scalar = 0x0ff00ff00ff00ff0; + + VSET(12, e8, m1); + VLOAD_8(v2, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + 0xf0); + asm volatile("vand.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U8(9, v1, 0xf0, 0x00, 0xf0, 0xf0, 0x00, 0xf0, 0xf0, 0x00, 0xf0, 0xf0, + 0x00, 0xf0); + + VSET(12, e16, m2); + VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + 0xf0f0, 0xffff, 0x0001, 0xf0f0); + asm volatile("vand.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VCMP_U16(10, v2, 0x0ff0, 0x0000, 0x00f0, 0x0ff0, 0x0000, 0x00f0, 0x0ff0, + 0x0000, 0x00f0, 0x0ff0, 0x0000, 0x00f0); + + VSET(12, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, + 0x00000001, 0xf0f0f0f0); + asm volatile("vand.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U32(11, v4, 0x0ff00ff0, 0x00000000, 0x00f000f0, 0x0ff00ff0, 0x00000000, + 0x00f000f0, 0x0ff00ff0, 0x00000000, 0x00f000f0, 0x0ff00ff0, + 0x00000000, 0x00f000f0); + + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); + asm volatile("vand.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U64(12, v8, 0x0ff00ff00ff00ff0, 0x0000000000000000, 0x00f000f000f000f0, + 0x0ff00ff00ff00ff0, 0x0000000000000000, 0x00f000f000f000f0, + 0x0ff00ff00ff00ff0, 0x0000000000000000, 0x00f000f000f000f0, + 0x0ff00ff00ff00ff0, 0x0000000000000000, 0x00f000f000f000f0); +} + +void TEST_CASE4() { + const uint64_t scalar = 0x0ff00ff00ff00ff0; + + VSET(12, e8, m1); + VLOAD_8(v2, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + 0xf0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_8(v1, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + 0xef); + asm volatile("vand.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(13, v1, 0xf0, 0xef, 0xf0, 0xf0, 0xef, 0xf0, 0xf0, 0xef, 0xf0, 0xf0, + 0xef, 0xf0); + + VSET(12, e16, m2); + VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + 0xf0f0, 0xffff, 0x0001, 0xf0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + 0xbeef, 0xbeef, 0xbeef, 0xbeef); + asm volatile("vand.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(14, v2, 0x0ff0, 0xbeef, 0x00f0, 0x0ff0, 0xbeef, 0x00f0, 0x0ff0, + 0xbeef, 0x00f0, 0x0ff0, 0xbeef, 0x00f0); + + VSET(12, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, + 0x00000001, 0xf0f0f0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_32(v4, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef); + asm volatile("vand.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(15, v4, 0x0ff00ff0, 0xdeadbeef, 0x00f000f0, 0x0ff00ff0, 0xdeadbeef, + 0x00f000f0, 0x0ff00ff0, 0xdeadbeef, 0x00f000f0, 0x0ff00ff0, + 0xdeadbeef, 0x00f000f0); + + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef); + asm volatile("vand.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(16, v8, 0x0ff00ff00ff00ff0, 0xdeadbeefdeadbeef, 0x00f000f000f000f0, + 0x0ff00ff00ff00ff0, 0xdeadbeefdeadbeef, 0x00f000f000f000f0, + 0x0ff00ff00ff00ff0, 0xdeadbeefdeadbeef, 0x00f000f000f000f0, + 0x0ff00ff00ff00ff0, 0xdeadbeefdeadbeef, 0x00f000f000f000f0); +} + +void TEST_CASE5() { + VSET(12, e8, m1); + VLOAD_8(v2, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + 0xf0); + asm volatile("vand.vi v1, v2, 15"); + VCMP_U8(17, v1, 0x0f, 0x01, 0x00, 0x0f, 0x01, 0x00, 0x0f, 0x01, 0x00, 0x0f, + 0x01, 0x00); + + VSET(12, e16, m2); + VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + 0xf0f0, 0xffff, 0x0001, 0xf0f0); + asm volatile("vand.vi v2, v4, 15"); + VCMP_U16(18, v2, 0x000f, 0x0001, 0x0000, 0x000f, 0x0001, 0x0000, 0x000f, + 0x0001, 0x0000, 0x000f, 0x0001, 0x0000); + + VSET(12, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, + 0x00000001, 0xf0f0f0f0); + asm volatile("vand.vi v4, v8, 15"); + VCMP_U32(19, v4, 0x0000000f, 0x00000001, 0x00000000, 0x0000000f, 0x00000001, + 0x00000000, 0x0000000f, 0x00000001, 0x00000000, 0x0000000f, + 0x00000001, 0x00000000); + + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); + asm volatile("vand.vi v8, v16, 15"); + VCMP_U64(20, v8, 0x000000000000000f, 0x0000000000000001, 0x0000000000000000, + 0x000000000000000f, 0x0000000000000001, 0x0000000000000000, + 0x000000000000000f, 0x0000000000000001, 0x0000000000000000, + 0x000000000000000f, 0x0000000000000001, 0x0000000000000000); +} + +void TEST_CASE6() { + VSET(12, e8, m1); + VLOAD_8(v2, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + 0xf0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_8(v1, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + 0xef); + asm volatile("vand.vi v1, v2, 15, v0.t"); + VCMP_U8(21, v1, 0x0f, 0xef, 0x00, 0x0f, 0xef, 0x00, 0x0f, 0xef, 0x00, 0x0f, + 0xef, 0x00); + + VSET(12, e16, m2); + VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + 0xf0f0, 0xffff, 0x0001, 0xf0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + 0xbeef, 0xbeef, 0xbeef, 0xbeef); + asm volatile("vand.vi v2, v4, 15, v0.t"); + VCMP_U16(22, v2, 0x000f, 0xbeef, 0x0000, 0x000f, 0xbeef, 0x0000, 0x000f, + 0xbeef, 0x0000, 0x000f, 0xbeef, 0x0000); + + VSET(12, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, + 0x00000001, 0xf0f0f0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_32(v4, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef); + asm volatile("vand.vi v4, v8, 15, v0.t"); + VCMP_U32(23, v4, 0x0000000f, 0xdeadbeef, 0x00000000, 0x0000000f, 0xdeadbeef, + 0x00000000, 0x0000000f, 0xdeadbeef, 0x00000000, 0x0000000f, + 0xdeadbeef, 0x00000000); + + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef); + asm volatile("vand.vi v8, v16, 15, v0.t"); + VCMP_U64(24, v8, 0x000000000000000f, 0xdeadbeefdeadbeef, 0x0000000000000000, + 0x000000000000000f, 0xdeadbeefdeadbeef, 0x0000000000000000, + 0x000000000000000f, 0xdeadbeefdeadbeef, 0x0000000000000000, + 0x000000000000000f, 0xdeadbeefdeadbeef, 0x0000000000000000); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vasub.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vasub.c new file mode 100644 index 000000000..760a33369 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vasub.c @@ -0,0 +1,54 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(4, e32, m1); + VLOAD_32(v1, 5, 10, 15, 20); + VLOAD_32(v2, -1, 2, -3, 4); + __asm__ volatile("vasub.vv v3, v1, v2" ::); + VEC_CMP_32(1, v3, 3, 4, 9, 8); +} + +void TEST_CASE2(void) { + VSET(4, e32, m1); + VLOAD_32(v1, 5, 10, 15, 20); + VLOAD_32(v2, 1, 2, 3, -4); + VLOAD_32(v0, 10, 0, 0, 0); + CLEAR(v3); + __asm__ volatile("vasub.vv v3, v1, v2, v0.t" ::); + VEC_CMP_32(2, v3, 0, 4, 0, 12); +} + +void TEST_CASE3(void) { + VSET(4, e32, m1); + VLOAD_U32(v1, 5, 10, 15, 20); + const uint64_t scalar = -5; + __asm__ volatile("vasub.vx v3, v1, %[A]" ::[A] "r"(scalar)); + VEC_CMP_U32(3, v3, 5, 8, 10, 13); +} + +void TEST_CASE4(void) { + VSET(4, e32, m1); + VLOAD_U32(v1, 5, 10, 15, 20); + const uint64_t scalar = -5; + VLOAD_U32(v0, 10, 0, 0, 0); + CLEAR(v3); + __asm__ volatile("vasub.vx v3, v1, %[A], v0.t" ::[A] "r"(scalar)); + VEC_CMP_U32(4, v3, 0, 8, 0, 13); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vasubu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vasubu.c new file mode 100644 index 000000000..68ec9cdbf --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vasubu.c @@ -0,0 +1,54 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(4, e32, m1); + VLOAD_U32(v1, 5, 10, 15, 20); + VLOAD_U32(v2, 1, 2, 3, 4); + __asm__ volatile("vasubu.vv v3, v1, v2" ::); + VEC_CMP_U32(1, v3, 2, 4, 6, 8); +} + +void TEST_CASE2(void) { + VSET(4, e32, m1); + VLOAD_U32(v1, 5, 10, 15, 20); + VLOAD_U32(v2, 1, 2, 3, 4); + VLOAD_U32(v0, 10, 0, 0, 0); + CLEAR(v3); + __asm__ volatile("vasubu.vv v3, v1, v2, v0.t" ::); + VEC_CMP_U32(2, v3, 0, 4, 0, 8); +} + +void TEST_CASE3(void) { + VSET(4, e32, m1); + VLOAD_U32(v1, 5, 10, 15, 20); + const uint64_t scalar = 5; + __asm__ volatile("vasubu.vx v3, v1, %[A]" ::[A] "r"(scalar)); + VEC_CMP_U32(3, v3, 0, 3, 5, 8); +} + +void TEST_CASE4(void) { + VSET(4, e32, m1); + VLOAD_U32(v1, 5, 10, 15, 20); + const uint64_t scalar = 5; + VLOAD_U32(v0, 10, 0, 0, 0); + CLEAR(v3); + __asm__ volatile("vasubu.vx v3, v1, %[A], v0.t" ::[A] "r"(scalar)); + VEC_CMP_U32(4, v3, 0, 3, 0, 8); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vcompress.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vcompress.c new file mode 100644 index 000000000..58917263e --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vcompress.c @@ -0,0 +1,26 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(4, e64, m1); + VLOAD_64(v4, 1, 2, 3, 4); + VLOAD_64(v0, 12, 0, 0, 0); + CLEAR(v2); + __asm__ volatile("vcompress.vm v2, v4, v0"); + DEBUG_64(v2); + VEC_CMP_64(1, v2, 3, 4, 0, 0); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + TEST_CASE1(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vcpop.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vcpop.c new file mode 100644 index 000000000..a9b828e31 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vcpop.c @@ -0,0 +1,47 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +// masked +void TEST_CASE1(void) { + VSET(4, e32, m1); + VCLEAR(v2); + VLOAD_32(v2, 7, 0, 0, 0); + VLOAD_32(v0, 5, 0, 0, 0); + volatile uint32_t scalar = 1337; + volatile uint32_t OUP[] = {0, 0, 0, 0}; + __asm__ volatile( + "vpopc.m %[A], v2, v0.t \n" + "sw %[A], (%1) \n" + : + : [A] "r"(scalar), "r"(OUP)); + XCMP(1, OUP[0], 2); +} + +// unmasked +void TEST_CASE2(void) { + VSET(4, e32, m1); + VLOAD_32(v2, 0xF, 0, 0, 0); + volatile uint32_t scalar = 1337; + volatile uint32_t OUP[] = {0, 0, 0, 0}; + __asm__ volatile( + "vpopc.m %[A], v2 \n" + "sw %[A], (%1) \n" + : + : [A] "r"(scalar), "r"(OUP)); + XCMP(2, OUP[0], 4); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + TEST_CASE1(); + TEST_CASE2(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vdiv.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vdiv.c new file mode 100644 index 000000000..ca92520a4 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vdiv.c @@ -0,0 +1,232 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0xcd, 0x42, 0x2c, 0xc4, 0x7a, 0x7b, 0xd1, 0x21, 0x38, 0x1d, 0x2e, + 0x4f, 0xe7, 0x3d, 0x63, 0xd8); + VLOAD_8(v3, 0x11, 0xa1, 0x7c, 0xde, 0x02, 0x38, 0x4e, 0x03, 0x1e, 0xc6, 0x16, + 0xa0, 0xca, 0x83, 0x54, 0x90); + asm volatile("vdiv.vv v1, v2, v3"); + VCMP_I8(1, v1, 0xfd, 0x00, 0x00, 0x01, 0x3d, 0x02, 0x00, 0x0b, 0x01, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x01, 0x00); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x9e55, 0xf806, 0xa137, 0xa5fc, 0x38ae, 0x58c3, 0x2a66, 0x6bd7, + 0x74e7, 0xa845, 0x2052, 0x6f9a, 0x6d88, 0x2861, 0xdaea, 0x2075); + VLOAD_16(v6, 0x5e64, 0x0a44, 0xdde5, 0x813f, 0x78b9, 0x29be, 0x28b4, 0x1b2f, + 0xc4a3, 0x4a05, 0x5501, 0x49bb, 0xe5f8, 0xfa20, 0x4edf, 0xf892); + asm volatile("vdiv.vv v2, v4, v6"); + VCMP_I16(2, v2, 0xffff, 0x0000, 0x0002, 0x0000, 0x0000, 0x0002, 0x0001, + 0x0003, 0xffff, 0xffff, 0x0000, 0x0001, 0xfffc, 0xfffa, 0x0000, + 0xfffc); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xed578a38, 0xba7b1912, 0xb99934ef, 0x21a85df9, 0xb01c09f2, + 0xd0cb54fc, 0x9b617331, 0xd32cf029, 0xaea08daa, 0xd76f06e2, + 0x99b8e084, 0x9fdc6bfc, 0x3103b573, 0xaf1e96b4, 0x54fe9ea1, + 0x0ceff9c0); + VLOAD_32(v12, 0x0000002d, 0x0000001e, 0x0000003e, 0xffffffe0, 0x0000004b, + 0x00000064, 0xffffff88, 0x0000003b, 0x00000011, 0xffffffc3, + 0xffffffa2, 0x0000004b, 0xffffffcc, 0xffffffb1, 0xffffff9d, + 0xffffffba); + asm volatile("vdiv.vv v4, v8, v12"); + VCMP_I32(3, v4, 0xff95db40, 0xfdaec51b, 0xfedd4f4f, 0xfef2bd11, 0xfeef4ea4, + 0xff872740, 0x00d6a792, 0xff3d81e7, 0xfb369eec, 0x00aa3ed6, + 0x01168b3a, 0xfeb7d87b, 0xff0eb2ab, 0x01061804, 0xff24374a, + 0xffd0afa2); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x7954dd7fc5e0687c, 0xfcf310f44f869aa0, 0x375fcbcf6aae0cac, + 0x6038c1825cfb5a72, 0x59046c6b324e7fb3, 0xbac247d729fb18e6, + 0x0cecf04984784b3f, 0x2cd8e8ea5be0c201, 0xcab2cf17c48b57cb, + 0xcb53d1b55097656d, 0xbeeb18a6fab9af8d, 0xbf94f3a0fa74670b, + 0x817c1dfb5ab3bfd7, 0x40951ef6459642d1, 0x4b5f994556f6ba42, + 0x63a8eaa417e6d29c); + VLOAD_64(v24, 0xffffff9bd8e00c88, 0x0000002057c200e4, 0x0000004c4c93640e, + 0x000000497b7bfcdd, 0x00000001feebe76a, 0xffffffb93a2c242c, + 0xffffff8523c47d2a, 0x00000019f01c4433, 0xffffffd940862ecc, + 0x00000016ac4df9c8, 0x0000001e9f15d00c, 0xffffffd0af22d791, + 0xffffff9feca249bc, 0xffffffac5eae7985, 0x0000003567fe8027, + 0x000000175355cab3); + asm volatile("vdiv.vv v8, v16, v24"); + VCMP_I64(4, v8, 0xfffffffffec9dd87, 0xffffffffffe7dac0, 0x0000000000b9cad9, + 0x00000000014f3850, 0x000000002c9a4382, 0x0000000000fa75a9, + 0xffffffffffe51146, 0x0000000001baa14f, 0x000000000160270e, + 0xfffffffffdad470e, 0xfffffffffddfe832, 0x00000000015c87ee, + 0x0000000001511bae, 0xffffffffff3a4e84, 0x0000000001694c75, + 0x000000000445c6cf); +}; + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0xcd, 0x42, 0x2c, 0xc4, 0x7a, 0x7b, 0xd1, 0x21, 0x38, 0x1d, 0x2e, + 0x4f, 0xe7, 0x3d, 0x63, 0xd8); + VLOAD_8(v3, 0x11, 0xa1, 0x7c, 0xde, 0x02, 0x38, 0x4e, 0x03, 0x1e, 0xc6, 0x16, + 0xa0, 0xca, 0x83, 0x54, 0x90); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v1); + asm volatile("vdiv.vv v1, v2, v3, v0.t"); + VCMP_I8(5, v1, 0, 0x00, 0, 0x01, 0, 0x02, 0, 0x0b, 0, 0x00, 0, 0x00, 0, 0x00, + 0, 0x00); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x9e55, 0xf806, 0xa137, 0xa5fc, 0x38ae, 0x58c3, 0x2a66, 0x6bd7, + 0x74e7, 0xa845, 0x2052, 0x6f9a, 0x6d88, 0x2861, 0xdaea, 0x2075); + VLOAD_16(v6, 0x5e64, 0x0a44, 0xdde5, 0x813f, 0x78b9, 0x29be, 0x28b4, 0x1b2f, + 0xc4a3, 0x4a05, 0x5501, 0x49bb, 0xe5f8, 0xfa20, 0x4edf, 0xf892); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vdiv.vv v2, v4, v6, v0.t"); + VCMP_I16(6, v2, 0, 0x0000, 0, 0x0000, 0, 0x0002, 0, 0x0003, 0, 0xffff, 0, + 0x0001, 0, 0xfffa, 0, 0xfffc); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xed578a38, 0xba7b1912, 0xb99934ef, 0x21a85df9, 0xb01c09f2, + 0xd0cb54fc, 0x9b617331, 0xd32cf029, 0xaea08daa, 0xd76f06e2, + 0x99b8e084, 0x9fdc6bfc, 0x3103b573, 0xaf1e96b4, 0x54fe9ea1, + 0x0ceff9c0); + VLOAD_32(v12, 0x0000002d, 0x0000001e, 0x0000003e, 0xffffffe0, 0x0000004b, + 0x00000064, 0xffffff88, 0x0000003b, 0x00000011, 0xffffffc3, + 0xffffffa2, 0x0000004b, 0xffffffcc, 0xffffffb1, 0xffffff9d, + 0xffffffba); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vdiv.vv v4, v8, v12, v0.t"); + VCMP_I32(7, v4, 0, 0xfdaec51b, 0, 0xfef2bd11, 0, 0xff872740, 0, 0xff3d81e7, 0, + 0x00aa3ed6, 0, 0xfeb7d87b, 0, 0x01061804, 0, 0xffd0afa2); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x7954dd7fc5e0687c, 0xfcf310f44f869aa0, 0x375fcbcf6aae0cac, + 0x6038c1825cfb5a72, 0x59046c6b324e7fb3, 0xbac247d729fb18e6, + 0x0cecf04984784b3f, 0x2cd8e8ea5be0c201, 0xcab2cf17c48b57cb, + 0xcb53d1b55097656d, 0xbeeb18a6fab9af8d, 0xbf94f3a0fa74670b, + 0x817c1dfb5ab3bfd7, 0x40951ef6459642d1, 0x4b5f994556f6ba42, + 0x63a8eaa417e6d29c); + VLOAD_64(v24, 0xffffff9bd8e00c88, 0x0000002057c200e4, 0x0000004c4c93640e, + 0x000000497b7bfcdd, 0x00000001feebe76a, 0xffffffb93a2c242c, + 0xffffff8523c47d2a, 0x00000019f01c4433, 0xffffffd940862ecc, + 0x00000016ac4df9c8, 0x0000001e9f15d00c, 0xffffffd0af22d791, + 0xffffff9feca249bc, 0xffffffac5eae7985, 0x0000003567fe8027, + 0x000000175355cab3); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vdiv.vv v8, v16, v24, v0.t"); + VCMP_I64(8, v8, 0, 0xffffffffffe7dac0, 0, 0x00000000014f3850, 0, + 0x0000000000fa75a9, 0, 0x0000000001baa14f, 0, 0xfffffffffdad470e, 0, + 0x00000000015c87ee, 0, 0xffffffffff3a4e84, 0, 0x000000000445c6cf); +}; + +void TEST_CASE3(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x14, 0xab, 0x4d, 0xcd, 0xc3, 0x7c, 0xb5, 0xf0, 0xc1, 0x90, 0x14, + 0x59, 0x98, 0xda, 0x76, 0x84); + int64_t scalar = 5; + asm volatile("vdiv.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_I8(9, v1, 0x04, 0xef, 0x0f, 0xf6, 0xf4, 0x18, 0xf1, 0xfd, 0xf4, 0xea, + 0x04, 0x11, 0xec, 0xf9, 0x17, 0xe8); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x19c9, 0x865a, 0x3063, 0xd5c2, 0xbe39, 0x98c7, 0x1ca7, 0x5d1e, + 0x8fdc, 0x3396, 0x9442, 0xee77, 0x7da8, 0xf200, 0xaba3, 0x4cd6); + scalar = -538; + asm volatile("vdiv.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VCMP_I16(10, v2, 0xfff4, 0x0039, 0xffe9, 0x0014, 0x001f, 0x0031, 0xfff3, + 0xffd4, 0x0035, 0xffe8, 0x0033, 0x0008, 0xffc5, 0x0006, 0x0028, + 0xffdc); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x49dd393c, 0xfc1d701e, 0x7670b541, 0x5ef6c28f, 0x60da5cab, + 0x6be56bc4, 0x6f629cde, 0xf1ab595a, 0x3d99363b, 0xb8a7840e, + 0x84071026, 0x6697d435, 0x3768cf44, 0x82f1a5a1, 0xf5d4f40e, + 0xcda97e6d); + scalar = 649; + asm volatile("vdiv.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VCMP_I32(11, v4, 0x001d22cc, 0xfffe77b2, 0x002eb818, 0x00257573, 0x00263435, + 0x002a8f5a, 0x002befac, 0xfffa58e4, 0x00184c36, 0xffe3db85, + 0xffcf1946, 0x002877d8, 0x0015db3d, 0xffceabd8, 0xfffbfd39, + 0xffec24e2); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xeea1bad034de2c3e, 0x5acd284816152166, 0x52a24c3b98af20f9, + 0x843d3c2e39d3221e, 0xda6c4bef77118459, 0x8c7e991a77cc3ddc, + 0x58f56c82eceafc72, 0xb4b1bac0a66d4984, 0x126283c905985ab8, + 0x3a859a64dbdb137e, 0x46674604f440792d, 0x04b1df734a3f312a, + 0xde91f735ce81d174, 0x3d254eb16d0c87f4, 0xc06ebbe7936e6774, + 0xb17ccbc475c8724e); + scalar = -59223; + asm volatile("vdiv.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I64(12, v8, 0x000013383ad25844, 0xffff9b84f9ef594c, 0xffffa48eb726f738, + 0x000088f40e45bbd1, 0x0000299522c72a62, 0x00007fd16a16b1db, + 0xffff9d8efec5cf15, 0x000053554738ae55, 0xffffeba7c8cdd664, + 0xffffbf3d66c69bdf, 0xffffb2177f70bf18, 0xffffface02d012e3, + 0x000024fe4bc4a5dc, 0xffffbc56186f1f3d, 0x00004657f2ee1ea3, + 0x000056e1b88b70d9); +}; + +void TEST_CASE4(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x14, 0xab, 0x4d, 0xcd, 0xc3, 0x7c, 0xb5, 0xf0, 0xc1, 0x90, 0x14, + 0x59, 0x98, 0xda, 0x76, 0x84); + int64_t scalar = 5; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v1); + asm volatile("vdiv.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I8(13, v1, 0, 0xef, 0, 0xf6, 0, 0x18, 0, 0xfd, 0, 0xea, 0, 0x11, 0, 0xf9, + 0, 0xe8); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x19c9, 0x865a, 0x3063, 0xd5c2, 0xbe39, 0x98c7, 0x1ca7, 0x5d1e, + 0x8fdc, 0x3396, 0x9442, 0xee77, 0x7da8, 0xf200, 0xaba3, 0x4cd6); + scalar = -538; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vdiv.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I16(14, v2, 0, 0x0039, 0, 0x0014, 0, 0x0031, 0, 0xffd4, 0, 0xffe8, 0, + 0x0008, 0, 0x0006, 0, 0xffdc); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x49dd393c, 0xfc1d701e, 0x7670b541, 0x5ef6c28f, 0x60da5cab, + 0x6be56bc4, 0x6f629cde, 0xf1ab595a, 0x3d99363b, 0xb8a7840e, + 0x84071026, 0x6697d435, 0x3768cf44, 0x82f1a5a1, 0xf5d4f40e, + 0xcda97e6d); + scalar = 649; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vdiv.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I32(15, v4, 0, 0xfffe77b2, 0, 0x00257573, 0, 0x002a8f5a, 0, 0xfffa58e4, + 0, 0xffe3db85, 0, 0x002877d8, 0, 0xffceabd8, 0, 0xffec24e2); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xeea1bad034de2c3e, 0x5acd284816152166, 0x52a24c3b98af20f9, + 0x843d3c2e39d3221e, 0xda6c4bef77118459, 0x8c7e991a77cc3ddc, + 0x58f56c82eceafc72, 0xb4b1bac0a66d4984, 0x126283c905985ab8, + 0x3a859a64dbdb137e, 0x46674604f440792d, 0x04b1df734a3f312a, + 0xde91f735ce81d174, 0x3d254eb16d0c87f4, 0xc06ebbe7936e6774, + 0xb17ccbc475c8724e); + scalar = -59223; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vdiv.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I64(16, v8, 0, 0xffff9b84f9ef594c, 0, 0x000088f40e45bbd1, 0, + 0x00007fd16a16b1db, 0, 0x000053554738ae55, 0, 0xffffbf3d66c69bdf, 0, + 0xffffface02d012e3, 0, 0xffffbc56186f1f3d, 0, 0x000056e1b88b70d9); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vdivu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vdivu.c new file mode 100644 index 000000000..22ae19a22 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vdivu.c @@ -0,0 +1,232 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x85, 0x1d, 0x9f, 0x31, 0x8c, 0x4c, 0x4c, 0xf2, 0x11, 0xfc, 0xc8, + 0xab, 0xc2, 0xff, 0xf5, 0xc2); + VLOAD_8(v3, 0x3d, 0x06, 0x32, 0x36, 0x02, 0x0f, 0x27, 0x35, 0x1e, 0x0f, 0x36, + 0x1c, 0x24, 0x1a, 0x22, 0x01); + asm volatile("vdivu.vv v1, v2, v3"); + VCMP_I8(1, v1, 0x02, 0x04, 0x03, 0x00, 0x46, 0x05, 0x01, 0x04, 0x00, 0x10, + 0x03, 0x06, 0x05, 0x09, 0x07, 0xc2); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xd200, 0xa047, 0x7af8, 0x453d, 0xd6eb, 0xfabb, 0x604a, 0xea35, + 0xbc2d, 0x45e7, 0x8407, 0x3845, 0x1495, 0x8ee6, 0x7da4, 0xf34a); + VLOAD_16(v6, 0x03ad, 0x00b8, 0x001b, 0x0353, 0x013f, 0x008c, 0x015e, 0x01e6, + 0x00cd, 0x0093, 0x00ba, 0x03d0, 0x0117, 0x009d, 0x007b, 0x02cf); + asm volatile("vdivu.vv v2, v4, v6"); + VCMP_I16(2, v2, 0x0039, 0x00de, 0x048d, 0x0014, 0x00ac, 0x01ca, 0x0046, + 0x007b, 0x00ea, 0x0079, 0x00b5, 0x000e, 0x0012, 0x00e9, 0x0105, + 0x0056); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xd56bd03a, 0x1036c5ff, 0xaa01847f, 0x988dc35d, 0x4d8615dc, + 0xb62269e2, 0xe842ba75, 0x02fecbf9, 0xe536c712, 0xe93e2160, + 0x9ba34297, 0x554d290d, 0x319f668c, 0x0d6c2fbb, 0x6a7eb54a, + 0x3fa1cc84); + VLOAD_32(v12, 0x00000025, 0x00000057, 0x0000002e, 0x0000004c, 0x00000052, + 0x00000021, 0x0000001d, 0x0000002f, 0x00000029, 0x00000008, + 0x00000015, 0x00000029, 0x00000048, 0x00000051, 0x0000003f, + 0x00000007); + asm volatile("vdivu.vv v4, v8, v12"); + VCMP_I32(3, v4, 0x05c4a4c3, 0x002fb5c5, 0x03b21eb4, 0x0201dd84, 0x00f20682, + 0x0584ebef, 0x08024d0c, 0x00105098, 0x05973090, 0x1d27c42c, + 0x07694c50, 0x02149d19, 0x00b06fa5, 0x002a6c0b, 0x01b0bdcc, + 0x09171d37); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xf251717441f02108, 0x2668b5d7f073b9d1, 0x4708b49fd356a60a, + 0x195bedcb9ce5956b, 0x41ce2b35b4280568, 0x32e5b89eed21de3b, + 0x73f17f3d0e1f243f, 0x0d0aa119af3b0e95, 0x024cbba6ba662213, + 0xf7df98f44da5e55f, 0x3cf2951aa7e4c0a9, 0x0ed8987446e84f30, + 0xd983a7a0d4c648b4, 0x60eb8249069801a5, 0x529967e8b06df477, + 0x776410b4b0cc22ad); + VLOAD_64(v24, 0x000000000000695b, 0x0000000000007420, 0x000000000001850c, + 0x000000000000b46f, 0x000000000000e92c, 0x0000000000024e72, + 0x0000000000032774, 0x000000000001c36f, 0x00000000000063d2, + 0x0000000000037bb4, 0x000000000003692c, 0x000000000001d60c, + 0x000000000002cf7b, 0x0000000000037899, 0x0000000000038bcf, + 0x000000000003d0e4); + asm volatile("vdivu.vv v8, v16, v24"); + VCMP_I64(4, v8, 0x00024ccd25dd5faf, 0x000054ac6a930494, 0x00002ebddee9df57, + 0x000023fac7321f85, 0x0000483f73b2e3e2, 0x000016114f5d8a9e, + 0x000024c26869df0e, 0x00000765470f410f, 0x000005e5de9b769d, + 0x0000472988fa89c3, 0x000011de6d57a394, 0x00000815e7b8df73, + 0x00004d64ede3b4a6, 0x00001bec0e79307a, 0x0000174af5139f58, + 0x00001f497ec0ff30); +}; + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x85, 0x1d, 0x9f, 0x31, 0x8c, 0x4c, 0x4c, 0xf2, 0x11, 0xfc, 0xc8, + 0xab, 0xc2, 0xff, 0xf5, 0xc2); + VLOAD_8(v3, 0x3d, 0x06, 0x32, 0x36, 0x02, 0x0f, 0x27, 0x35, 0x1e, 0x0f, 0x36, + 0x1c, 0x24, 0x1a, 0x22, 0x01); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v1); + asm volatile("vdivu.vv v1, v2, v3, v0.t"); + VCMP_I8(5, v1, 0, 0x04, 0, 0x00, 0, 0x05, 0, 0x04, 0, 0x10, 0, 0x06, 0, 0x09, + 0, 0xc2); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xd200, 0xa047, 0x7af8, 0x453d, 0xd6eb, 0xfabb, 0x604a, 0xea35, + 0xbc2d, 0x45e7, 0x8407, 0x3845, 0x1495, 0x8ee6, 0x7da4, 0xf34a); + VLOAD_16(v6, 0x03ad, 0x00b8, 0x001b, 0x0353, 0x013f, 0x008c, 0x015e, 0x01e6, + 0x00cd, 0x0093, 0x00ba, 0x03d0, 0x0117, 0x009d, 0x007b, 0x02cf); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vdivu.vv v2, v4, v6, v0.t"); + VCMP_I16(6, v2, 0, 0x00de, 0, 0x0014, 0, 0x01ca, 0, 0x007b, 0, 0x0079, 0, + 0x000e, 0, 0x00e9, 0, 0x0056); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xd56bd03a, 0x1036c5ff, 0xaa01847f, 0x988dc35d, 0x4d8615dc, + 0xb62269e2, 0xe842ba75, 0x02fecbf9, 0xe536c712, 0xe93e2160, + 0x9ba34297, 0x554d290d, 0x319f668c, 0x0d6c2fbb, 0x6a7eb54a, + 0x3fa1cc84); + VLOAD_32(v12, 0x00000025, 0x00000057, 0x0000002e, 0x0000004c, 0x00000052, + 0x00000021, 0x0000001d, 0x0000002f, 0x00000029, 0x00000008, + 0x00000015, 0x00000029, 0x00000048, 0x00000051, 0x0000003f, + 0x00000007); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vdivu.vv v4, v8, v12, v0.t"); + VCMP_I32(7, v4, 0, 0x002fb5c5, 0, 0x0201dd84, 0, 0x0584ebef, 0, 0x00105098, 0, + 0x1d27c42c, 0, 0x02149d19, 0, 0x002a6c0b, 0, 0x09171d37); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xf251717441f02108, 0x2668b5d7f073b9d1, 0x4708b49fd356a60a, + 0x195bedcb9ce5956b, 0x41ce2b35b4280568, 0x32e5b89eed21de3b, + 0x73f17f3d0e1f243f, 0x0d0aa119af3b0e95, 0x024cbba6ba662213, + 0xf7df98f44da5e55f, 0x3cf2951aa7e4c0a9, 0x0ed8987446e84f30, + 0xd983a7a0d4c648b4, 0x60eb8249069801a5, 0x529967e8b06df477, + 0x776410b4b0cc22ad); + VLOAD_64(v24, 0x000000000000695b, 0x0000000000007420, 0x000000000001850c, + 0x000000000000b46f, 0x000000000000e92c, 0x0000000000024e72, + 0x0000000000032774, 0x000000000001c36f, 0x00000000000063d2, + 0x0000000000037bb4, 0x000000000003692c, 0x000000000001d60c, + 0x000000000002cf7b, 0x0000000000037899, 0x0000000000038bcf, + 0x000000000003d0e4); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vdivu.vv v8, v16, v24, v0.t"); + VCMP_I64(8, v8, 0, 0x000054ac6a930494, 0, 0x000023fac7321f85, 0, + 0x000016114f5d8a9e, 0, 0x00000765470f410f, 0, 0x0000472988fa89c3, 0, + 0x00000815e7b8df73, 0, 0x00001bec0e79307a, 0, 0x00001f497ec0ff30); +}; + +void TEST_CASE3(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x0f, 0xbd, 0x0a, 0x58, 0x8e, 0x09, 0xa7, 0x02, 0x4b, 0xe8, 0xd2, + 0xfc, 0xa9, 0x8e, 0x67, 0x49); + uint64_t scalar = 5; + asm volatile("vdivu.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_I8(9, v1, 0x03, 0x25, 0x02, 0x11, 0x1c, 0x01, 0x21, 0x00, 0x0f, 0x2e, + 0x2a, 0x32, 0x21, 0x1c, 0x14, 0x0e); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xcf2f, 0x0c02, 0x1417, 0x1747, 0x5e43, 0x9552, 0xe03e, 0x5367, + 0xb2f9, 0x09d8, 0x3b19, 0x8ed0, 0x4740, 0xa628, 0x5560, 0x713b); + scalar = 538; + asm volatile("vdivu.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VCMP_I16(10, v2, 0x0062, 0x0005, 0x0009, 0x000b, 0x002c, 0x0047, 0x006a, + 0x0027, 0x0055, 0x0004, 0x001c, 0x0043, 0x0021, 0x004f, 0x0028, + 0x0035); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x643498d4, 0xe1e4c6d4, 0x2fef6b6f, 0xe68ef651, 0x9943599a, + 0x68af922e, 0x09a3beb2, 0x117ff561, 0x86a1a3f7, 0x03566f4f, + 0xc3c0c7de, 0x8cb524f8, 0x532e1652, 0xb0c26bf2, 0x886d0b1c, + 0xf94d6b63); + scalar = 649; + asm volatile("vdivu.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VCMP_I32(11, v4, 0x002786be, 0x00591abc, 0x0012e87f, 0x005af1c8, 0x003c7480, + 0x00294b2b, 0x0003cd68, 0x0006e722, 0x00351b13, 0x00015108, + 0x004d3723, 0x003780a6, 0x0020cf84, 0x0045b92a, 0x0035d049, + 0x0062568c); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x751ea878eaf9f85f, 0x9843aba4c3e313af, 0xc2f56d78083dc0f2, + 0x4fcb920a2a4ebc8d, 0x33a71e7364643a7c, 0x76f96f8403af4ad7, + 0xcdbbb2002ea6ac93, 0xc380d0b6a5182bcc, 0x93b79fcc64af88cf, + 0x85d32b075e613f6c, 0x4f1f75bfa6d8f319, 0xd2a34a8d9a02d7f1, + 0x8679a27b237a032e, 0x7e0881a487bbb235, 0x17d97d9849271cec, + 0x1c85ac87ba3c7d1e); + scalar = 9223; + asm volatile("vdivu.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I64(12, v8, 0x000340383152452b, 0x000439f263aaf34a, 0x000569521e089c7c, + 0x0002370079144c76, 0x00016f07b37c5546, 0x00034d65d36c535c, + 0x0005b5e194247d88, 0x00056d3090f69ef0, 0x000419a3026cfde7, + 0x0003b6ebd974c870, 0x000232398140d5dd, 0x0005d8bb7bec2e99, + 0x0003bb8ab6abb03a, 0x00037f8e5aab0783, 0x0000a977deb32c78, + 0x0000caab9b4a8885); +}; + +void TEST_CASE4(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x0f, 0xbd, 0x0a, 0x58, 0x8e, 0x09, 0xa7, 0x02, 0x4b, 0xe8, 0xd2, + 0xfc, 0xa9, 0x8e, 0x67, 0x49); + uint64_t scalar = 5; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v1); + asm volatile("vdivu.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I8(13, v1, 0, 0x25, 0, 0x11, 0, 0x01, 0, 0x00, 0, 0x2e, 0, 0x32, 0, 0x1c, + 0, 0x0e); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xcf2f, 0x0c02, 0x1417, 0x1747, 0x5e43, 0x9552, 0xe03e, 0x5367, + 0xb2f9, 0x09d8, 0x3b19, 0x8ed0, 0x4740, 0xa628, 0x5560, 0x713b); + scalar = 538; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vdivu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I16(14, v2, 0, 0x0005, 0, 0x000b, 0, 0x0047, 0, 0x0027, 0, 0x0004, 0, + 0x0043, 0, 0x004f, 0, 0x0035); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x643498d4, 0xe1e4c6d4, 0x2fef6b6f, 0xe68ef651, 0x9943599a, + 0x68af922e, 0x09a3beb2, 0x117ff561, 0x86a1a3f7, 0x03566f4f, + 0xc3c0c7de, 0x8cb524f8, 0x532e1652, 0xb0c26bf2, 0x886d0b1c, + 0xf94d6b63); + scalar = 649; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vdivu.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I32(15, v4, 0, 0x00591abc, 0, 0x005af1c8, 0, 0x00294b2b, 0, 0x0006e722, + 0, 0x00015108, 0, 0x003780a6, 0, 0x0045b92a, 0, 0x0062568c); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x751ea878eaf9f85f, 0x9843aba4c3e313af, 0xc2f56d78083dc0f2, + 0x4fcb920a2a4ebc8d, 0x33a71e7364643a7c, 0x76f96f8403af4ad7, + 0xcdbbb2002ea6ac93, 0xc380d0b6a5182bcc, 0x93b79fcc64af88cf, + 0x85d32b075e613f6c, 0x4f1f75bfa6d8f319, 0xd2a34a8d9a02d7f1, + 0x8679a27b237a032e, 0x7e0881a487bbb235, 0x17d97d9849271cec, + 0x1c85ac87ba3c7d1e); + scalar = 9223; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vdivu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I64(16, v8, 0, 0x000439f263aaf34a, 0, 0x0002370079144c76, 0, + 0x00034d65d36c535c, 0, 0x00056d3090f69ef0, 0, 0x0003b6ebd974c870, 0, + 0x0005d8bb7bec2e99, 0, 0x00037f8e5aab0783, 0, 0x0000caab9b4a8885); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfadd.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfadd.c new file mode 100644 index 000000000..d2fb46a39 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfadd.c @@ -0,0 +1,449 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// Notes: hard to check if FS is Dirtied by the first vector FP instruction +// since it is not accessible in U mode and it is dirtied before the first vfp +// operation + +// Simple random test with similar values + 1 subnormal +void TEST_CASE1(void) { + VSET(16, e16, m2); + // -0.8896, -0.3406, 0.7324, -0.6846, -0.2969, -0.7739, 0.5737, + // 0.4331, 0.8940, -0.4900, 0.4219, 0.4639, 0.6694, 0.4382, + // 0.1356, 0.5337 + VLOAD_16(v4, 0xbb1e, 0xb573, 0x39dc, 0xb97a, 0xb4c0, 0xba31, 0x3897, 0x36ee, + 0x3b27, 0xb7d7, 0x36c0, 0x376c, 0x395b, 0x3703, 0x3057, 0x0001); + // -0.8164, 0.6533, -0.4685, 0.6284, 0.1666, 0.9438, 0.0445, + // -0.1342, -0.8071, -0.3167, -0.8350, 0.2178, -0.0896, -0.3057, + // -0.3064, 0.2073 + VLOAD_16(v6, 0xba88, 0x393a, 0xb77f, 0x3907, 0x3155, 0x3b8d, 0x29b3, 0xb04b, + 0xba75, 0xb511, 0xbaae, 0x32f8, 0xadbc, 0xb4e4, 0xb4e7, 0x8010); + asm volatile("vfadd.vv v2, v4, v6"); + // -1.7061, 0.3127, 0.2639, -0.0562, -0.1302, 0.1699, 0.6182, + // 0.2988, 0.0869, -0.8066, -0.4131, 0.6816, 0.5801, 0.1326, + // -0.1708, 0.7412 + VCMP_U16(1, v2, 0xbed3, 0x3501, 0x3439, 0xab30, 0xb02b, 0x3170, 0x38f2, + 0x34c8, 0x2d90, 0xba74, 0xb69c, 0x3974, 0x38a4, 0x303e, 0xb177, + 0x800f); + + VSET(16, e32, m4); + // -0.28968573, 0.40292332, 0.33936000, 0.53889370, 0.39942014, + // -0.27004066, 0.78120714, -0.15632398, -0.49984047, + // -0.69259918, -0.03384063, -0.62385744, 0.00338853, 0.33711585, + // -0.34673852, 0.11450682 + VLOAD_32(v8, 0xbe9451b0, 0x3ece4bf7, 0x3eadc098, 0x3f09f4f0, 0x3ecc80cc, + 0xbe8a42c5, 0x3f47fd31, 0xbe201365, 0xbeffeb17, 0xbf314e2e, + 0xbd0a9c78, 0xbf1fb51f, 0x3b5e1209, 0x3eac9a73, 0xbeb187b6, + 0x3dea828d); + // -0.62142891, 0.63306540, 0.26511025, 0.85738784, + // -0.78492641, -0.44331804, -0.84668529, 0.13981950, 0.84909225, + // 0.23569171, 0.34283128, 0.56619811, 0.22596644, 0.55843508, + // 0.53194439, 0.02510819 + VLOAD_32(v12, 0xbf1f15f7, 0x3f221093, 0x3e87bc88, 0x3f5b7dc5, 0xbf48f0f0, + 0xbee2fa95, 0xbf58c05e, 0x3e0f2cd8, 0x3f595e1c, 0x3e71592b, + 0x3eaf8795, 0x3f10f25c, 0x3e6763bf, 0x3f0ef59a, 0x3f082d82, + 0x3ccdafb0); + asm volatile("vfadd.vv v4, v8, v12"); + // -0.91111463, 1.03598869, 0.60447025, 1.39628148, + // -0.38550627, -0.71335870, -0.06547815, -0.01650448, 0.34925178, + // -0.45690745, 0.30899066, -0.05765933, 0.22935496, 0.89555097, + // 0.18520588, 0.13961500 + VCMP_U32(2, v4, 0xbf693ecf, 0x3f849b47, 0x3f1abe90, 0x3fb2b95a, 0xbec56114, + 0xbf369ead, 0xbd861968, 0xbc873468, 0x3eb2d121, 0xbee9efc6, + 0x3e9e3406, 0xbd6c2c30, 0x3e6adc07, 0x3f6542d4, 0x3e3da69c, + 0x3e0ef73c); + + VSET(16, e64, m8); + // -0.1192486190170796, 0.7099687505713703, -0.6001652243371716, + // -0.9559723926483070, 0.7987976623002717, -0.3314459653039117, + // 0.7678805321182058, -0.3118871679402779, -0.7580588930783800, + // 0.5940681950113129, 0.6471754222100761, 0.4175915562917139, + // -0.3690504607938143, 0.0740574148132984, -0.1493616685664843, + // 0.3560295367616439 + VLOAD_64(v16, 0xbfbe8713d6c58260, 0x3fe6b810629c5a40, 0xbfe3348db3573060, + 0xbfee97536a49b50a, 0x3fe98fc01d766dee, 0xbfd536692357c5dc, + 0x3fe8927a3195d944, 0xbfd3f5f598961d8c, 0xbfe84204b946d5d6, + 0x3fe3029b4da55ad8, 0x3fe4b5a93b255a44, 0x3fdab9d1ef56f430, + 0xbfd79e85d2ebb8f0, 0x3fb2f56d3ea64090, 0xbfc31e487ce26ff0, + 0x3fd6c9301c334858); + // -0.7765903295164327, 0.4195489676706889, -0.3911414124398265, + // 0.6922029856623244, 0.5664741772288600, -0.1412820433489181, + // -0.1847941224896075, -0.4907136082532593, -0.9146160877742129, + // -0.7130864084314152, -0.5516927493459973, -0.4203081001100177, + // 0.6487326796833275, -0.5631384800254344, -0.0996872955425372, + // -0.4382844162164241 + VLOAD_64(v24, 0xbfe8d9d3f67536d2, 0x3fdad9e3e9cdd5bc, 0xbfd90875fda29450, + 0x3fe62686e0339faa, 0x3fe2208e74273f2c, 0xbfc21587add90b50, + 0xbfc7a755744afe30, 0xbfdf67da0cc99808, 0xbfed4488f52c57bc, + 0xbfe6d19a966debbe, 0xbfe1a7778d7c344c, 0xbfdae653f20dd9d4, + 0x3fe4c26b0962c342, 0xbfe2053afd5a822c, 0xbfb9851b4a2e8ff0, + 0xbfdc0cda147fbe5c); + asm volatile("vfadd.vv v8, v16, v24"); + // -0.8958389485335123, 1.1295177182420593, -0.9913066367769980, + // -0.2637694069859826, 1.3652718395291317, -0.4727280086528298, + // 0.5830864096285984, -0.8026007761935372, -1.6726749808525929, + // -0.1190182134201023, 0.0954826728640787, -0.0027165438183039, + // 0.2796822188895132, -0.4890810652121360, -0.2490489641090214, + // -0.0822548794547802 + VCMP_U64(3, v8, 0xbfecaab6714de71e, 0x3ff212812bc1a28f, 0xbfefb8c8b2287a88, + 0xbfd0e199142c2ac0, 0x3ff5d82748ced68d, 0xbfde412cfa444b84, + 0x3fe2a8a4d48319b8, 0xbfe9aee7d2afdaca, 0xbffac346d73996c9, + 0xbfbe77fa46448730, 0x3fb8718d6d492fc0, 0xbf6641015b72d200, + 0x3fd1e6503fd9cd94, 0xbfdf4d1aab0b7434, 0xbfcfe0d621f9b7e8, + 0xbfb50ea7e131d810); +}; + +// Simple random test with similar values + 1 subnormal (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + VLOAD_16(v4, 0xbb1e, 0xb573, 0x39dc, 0xb97a, 0xb4c0, 0xba31, 0x3897, 0x36ee, + 0x3b27, 0xb7d7, 0x36c0, 0x376c, 0x395b, 0x3703, 0x3057, 0x0001); + VLOAD_16(v6, 0xba88, 0x393a, 0xb77f, 0x3907, 0x3155, 0x3b8d, 0x29b3, 0xb04b, + 0xba75, 0xb511, 0xbaae, 0x32f8, 0xadbc, 0xb4e4, 0xb4e7, 0x8010); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfadd.vv v2, v4, v6, v0.t"); + VCMP_U16(4, v2, 0, 0x3501, 0, 0xab30, 0, 0x3170, 0, 0x34c8, 0, 0xba74, 0, + 0x3974, 0, 0x303e, 0, 0x800f); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xbe9451b0, 0x3ece4bf7, 0x3eadc098, 0x3f09f4f0, 0x3ecc80cc, + 0xbe8a42c5, 0x3f47fd31, 0xbe201365, 0xbeffeb17, 0xbf314e2e, + 0xbd0a9c78, 0xbf1fb51f, 0x3b5e1209, 0x3eac9a73, 0xbeb187b6, + 0x3dea828d); + VLOAD_32(v12, 0xbf1f15f7, 0x3f221093, 0x3e87bc88, 0x3f5b7dc5, 0xbf48f0f0, + 0xbee2fa95, 0xbf58c05e, 0x3e0f2cd8, 0x3f595e1c, 0x3e71592b, + 0x3eaf8795, 0x3f10f25c, 0x3e6763bf, 0x3f0ef59a, 0x3f082d82, + 0x3ccdafb0); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfadd.vv v4, v8, v12, v0.t"); + VCMP_U32(5, v4, 0, 0x3f849b47, 0, 0x3fb2b95a, 0, 0xbf369ead, 0, 0xbc873468, 0, + 0xbee9efc6, 0, 0xbd6c2c30, 0, 0x3f6542d4, 0, 0x3e0ef73c); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xbfbe8713d6c58260, 0x3fe6b810629c5a40, 0xbfe3348db3573060, + 0xbfee97536a49b50a, 0x3fe98fc01d766dee, 0xbfd536692357c5dc, + 0x3fe8927a3195d944, 0xbfd3f5f598961d8c, 0xbfe84204b946d5d6, + 0x3fe3029b4da55ad8, 0x3fe4b5a93b255a44, 0x3fdab9d1ef56f430, + 0xbfd79e85d2ebb8f0, 0x3fb2f56d3ea64090, 0xbfc31e487ce26ff0, + 0x3fd6c9301c334858); + VLOAD_64(v24, 0xbfe8d9d3f67536d2, 0x3fdad9e3e9cdd5bc, 0xbfd90875fda29450, + 0x3fe62686e0339faa, 0x3fe2208e74273f2c, 0xbfc21587add90b50, + 0xbfc7a755744afe30, 0xbfdf67da0cc99808, 0xbfed4488f52c57bc, + 0xbfe6d19a966debbe, 0xbfe1a7778d7c344c, 0xbfdae653f20dd9d4, + 0x3fe4c26b0962c342, 0xbfe2053afd5a822c, 0xbfb9851b4a2e8ff0, + 0xbfdc0cda147fbe5c); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfadd.vv v8, v16, v24, v0.t"); + VCMP_U64(6, v8, 0, 0x3ff212812bc1a28f, 0, 0xbfd0e199142c2ac0, 0, + 0xbfde412cfa444b84, 0, 0xbfe9aee7d2afdaca, 0, 0xbfbe77fa46448730, 0, + 0xbf6641015b72d200, 0, 0xbfdf4d1aab0b7434, 0, 0xbfb50ea7e131d810); +}; + +// Edge-case tests +void TEST_CASE3(void) { + VSET(16, e16, m2); + VLOAD_16(v4, pInfh, pInfh, mInfh, qNaNh, pMaxh, pMaxh, pZero, mZeroh, pZero, + pMaxh, pZero, qNaNh, mInfh, pInfh, qNaNh, qNaNh); + VLOAD_16(v6, mInfh, pInfh, mInfh, pZero, pMaxh, mMaxh, pZero, mZeroh, mZeroh, + mZeroh, mMaxh, 0x1, 0xba88, pZero, qNaNh, 0xba88); + asm volatile("vfadd.vv v2, v4, v6"); + VCMP_U16(7, v2, qNaNh, pInfh, mInfh, qNaNh, pInfh, pZero, pZero, mZeroh, + pZero, pMaxh, mMaxh, qNaNh, mInfh, pInfh, qNaNh, qNaNh); + + VSET(16, e32, m4); + VLOAD_32(v8, pInff, pInff, mInff, qNaNf, pMaxf, pMaxf, pZero, mZerof, pZero, + pMaxf, pZero, qNaNf, mInff, pInff, qNaNf, qNaNf); + VLOAD_32(v12, mInff, pInff, mInff, pZero, pMaxf, mMaxf, pZero, mZerof, mZerof, + mZerof, mMaxf, 0x1, 0xbf48f0f0, pZero, qNaNf, 0xbf48f0f0); + asm volatile("vfadd.vv v4, v8, v12"); + VCMP_U32(8, v4, qNaNf, pInff, mInff, qNaNf, pInff, pZero, pZero, mZerof, + pZero, pMaxf, mMaxf, qNaNf, mInff, pInff, qNaNf, qNaNf); + + VSET(16, e64, m8); + VLOAD_64(v16, pInfd, pInfd, mInfd, qNaNd, pMaxd, pMaxd, pZero, mZerod, pZero, + pMaxd, pZero, qNaNd, mInfd, pInfd, qNaNd, qNaNd); + VLOAD_64(v24, mInfd, pInfd, mInfd, pZero, pMaxd, mMaxd, pZero, mZerod, mZerod, + mZerod, mMaxd, 0x1, 0xbfd90875fda29450, pZero, qNaNd, + 0xbfd90875fda29450); + asm volatile("vfadd.vv v8, v16, v24"); + VCMP_U64(9, v8, qNaNd, pInfd, mInfd, qNaNd, pInfd, pZero, pZero, mZerod, + pZero, pMaxd, mMaxd, qNaNd, mInfd, pInfd, qNaNd, qNaNd); +}; + +// Imprecise exceptions +// If the check is done immediately after the vector instruction, it fails as it +// is completed before the "faulty" operations are executed by Ara's FPU +void TEST_CASE4(void) { + // Overflow + Inexact + CLEAR_FFLAGS; + VSET(16, e16, m2); + CHECK_FFLAGS(0); + VLOAD_16(v4, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, + pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh); + VLOAD_16(v6, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, + pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh, pMaxh); + asm volatile("vfadd.vv v2, v4, v6"); + VCMP_U16(10, v2, pInfh, pInfh, pInfh, pInfh, pInfh, pInfh, pInfh, pInfh, + pInfh, pInfh, pInfh, pInfh, pInfh, pInfh, pInfh, pInfh); + CHECK_FFLAGS(OF | NX); + + // Invalid operation, overflow + CLEAR_FFLAGS; + VSET(16, e32, m4); + CHECK_FFLAGS(0); + VLOAD_32(v8, pInff, pInff, pInff, pInff, pInff, pInff, pInff, pInff, pInff, + pInff, pInff, pInff, pInff, pInff, pInff, pInff); + VLOAD_32(v12, mInff, mInff, mInff, mInff, mInff, mInff, mInff, mInff, mInff, + mInff, mInff, mInff, mInff, mInff, mInff, mInff); + asm volatile("vfadd.vv v4, v8, v12"); + VCMP_U32(11, v4, qNaNf, qNaNf, qNaNf, qNaNf, qNaNf, qNaNf, qNaNf, qNaNf, + qNaNf, qNaNf, qNaNf, qNaNf, qNaNf, qNaNf, qNaNf, qNaNf); + CHECK_FFLAGS(NV); + + // Invalid operation, overflow, inexact + CLEAR_FFLAGS; + VSET(16, e64, m8); + CHECK_FFLAGS(0); + VLOAD_64(v16, pMaxd, pInfd, pMaxd, pMaxd, pMaxd, pMaxd, pMaxd, pMaxd, pMaxd, + pMaxd, pMaxd, pMaxd, pMaxd, pMaxd, pMaxd, pMaxd); + VLOAD_64(v24, pMaxd, mInfd, 8000000000000001, pMaxd, pMaxd, pMaxd, pMaxd, + pMaxd, pMaxd, pMaxd, pMaxd, pMaxd, pMaxd, pMaxd, pMaxd, pMaxd); + asm volatile("vfadd.vv v8, v16, v24"); + VCMP_U64(12, v8, pInfd, qNaNd, pMaxd, pInfd, pInfd, pInfd, pInfd, pInfd, + pInfd, pInfd, pInfd, pInfd, pInfd, pInfd, pInfd, pInfd); + CHECK_FFLAGS(NV | OF | NX); +}; + +// Different rounding-mode + Back-to-back rm change and vfp operation +// Index 12 (starting from 0) rounds differently for RNE and RTZ +void TEST_CASE5(void) { + VSET(16, e16, m2); + // -0.8896, -0.3406, 0.7324, -0.6846, -0.2969, -0.7739, 0.5737, + // 0.4331, 0.8940, -0.4900, 0.4219, 0.4639, 0.6694, 0.4382, + // 0.1356, 0.5337 + VLOAD_16(v4, 0xbb1e, 0xb573, 0x39dc, 0xb97a, 0xb4c0, 0xba31, 0x3897, 0x36ee, + 0x3b27, 0xb7d7, 0x36c0, 0x376c, 0x395b, 0x3703, 0x3057, 0x0001); + // -0.8164, 0.6533, -0.4685, 0.6284, 0.1666, 0.9438, 0.0445, + // -0.1342, -0.8071, -0.3167, -0.8350, 0.2178, -0.0896, -0.3057, + // -0.3064, 0.2073 + VLOAD_16(v6, 0xba88, 0x393a, 0xb77f, 0x3907, 0x3155, 0x3b8d, 0x29b3, 0xb04b, + 0xba75, 0xb511, 0xbaae, 0x32f8, 0xadbc, 0xb4e4, 0xb4e7, 0x8010); + CHANGE_RM(RM_RTZ); + asm volatile("vfadd.vv v2, v4, v6"); + // -1.7061, 0.3127, 0.2639, -0.0562, -0.1302, 0.1699, 0.6182, + // 0.2988, 0.0869, -0.8066, -0.4131, 0.6816, 0.5801, 0.1326, + // -0.1708, 0.7412 + VCMP_U16(13, v2, 0xbed3, 0x3501, 0x3439, 0xab30, 0xb02b, 0x3170, 0x38f2, + 0x34c8, 0x2d90, 0xba74, 0xb69c, 0x3974, 0x38a3, 0x303e, 0xb177, + 0x800f); + + VSET(16, e16, m4); + // -0.8896, -0.3406, 0.7324, -0.6846, -0.2969, -0.7739, 0.5737, + // 0.4331, 0.8940, -0.4900, 0.4219, 0.4639, 0.6694, 0.4382, + // 0.1356, 0.5337 + VLOAD_16(v8, 0xbb1e, 0xb573, 0x39dc, 0xb97a, 0xb4c0, 0xba31, 0x3897, 0x36ee, + 0x3b27, 0xb7d7, 0x36c0, 0x376c, 0x395b, 0x3703, 0x3057, 0x0001); + // -0.8164, 0.6533, -0.4685, 0.6284, 0.1666, 0.9438, 0.0445, + // -0.1342, -0.8071, -0.3167, -0.8350, 0.2178, -0.0896, -0.3057, + // -0.3064, 0.2073 + VLOAD_16(v12, 0xba88, 0x393a, 0xb77f, 0x3907, 0x3155, 0x3b8d, 0x29b3, 0xb04b, + 0xba75, 0xb511, 0xbaae, 0x32f8, 0xadbc, 0xb4e4, 0xb4e7, 0x8010); + CHANGE_RM(RM_RNE); + asm volatile("vfadd.vv v4, v8, v12"); + // -1.7061, 0.3127, 0.2639, -0.0562, -0.1302, 0.1699, 0.6182, + // 0.2988, 0.0869, -0.8066, -0.4131, 0.6816, 0.5801, 0.1326, + // -0.1708, 0.7412 + VCMP_U16(14, v4, 0xbed3, 0x3501, 0x3439, 0xab30, 0xb02b, 0x3170, 0x38f2, + 0x34c8, 0x2d90, 0xba74, 0xb69c, 0x3974, 0x38a4, 0x303e, 0xb177, + 0x800f); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE6(void) { + VSET(16, e16, m2); + // -0.1481, -0.1797, -0.5454, 0.3228, 0.3237, -0.7212, -0.5195, + // -0.4500, 0.2681, 0.7300, 0.5059, 0.5830, 0.3198, -0.1713, + // -0.6431, 0.4841 + VLOAD_16(v4, 0xb0bd, 0xb1c0, 0xb85d, 0x352a, 0x352e, 0xb9c5, 0xb828, 0xb733, + 0x344a, 0x39d7, 0x380c, 0x38aa, 0x351e, 0xb17b, 0xb925, 0x37bf); + double dscalar_16; + // -0.9380 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xbb81); + asm volatile("vfadd.vf v2, v4, %[A]" ::[A] "f"(dscalar_16)); + // -1.0859, -1.1172, -1.4834, -0.6152, -0.6143, -1.6592, + // -1.4570, -1.3877, -0.6699, -0.2080, -0.4321, -0.3550, + // -0.6182, -1.1094, -1.5811, -0.4539 + VCMP_U16(15, v2, 0xbc58, 0xbc78, 0xbdef, 0xb8ec, 0xb8ea, 0xbea3, 0xbdd4, + 0xbd8d, 0xb95c, 0xb2a8, 0xb6ea, 0xb5ae, 0xb8f2, 0xbc70, 0xbe53, + 0xb743); + + VSET(16, e32, m4); + // 0.86539453, -0.53925377, -0.47128764, 0.99265540, + // 0.32128176, -0.47335613, -0.30028856, 0.44394016, + // -0.72540921, -0.26464799, 0.77351445, -0.21725702, + // -0.25191557, -0.53123665, 0.80404943, 0.81841671 + VLOAD_32(v8, 0x3f5d8a7f, 0xbf0a0c89, 0xbef14c9d, 0x3f7e1eaa, 0x3ea47f0b, + 0xbef25bbc, 0xbe99bf6c, 0x3ee34c20, 0xbf39b46b, 0xbe877ff1, + 0x3f46050b, 0xbe5e78a0, 0xbe80fb14, 0xbf07ff20, 0x3f4dd62f, + 0x3f5183c2); + double dscalar_32; + // -0.96056187 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbf75e762); + asm volatile("vfadd.vf v4, v8, %[A]" ::[A] "f"(dscalar_32)); + // -0.09516734, -1.49981570, -1.43184948, 0.03209352, + // -0.63928008, -1.43391800, -1.26085043, -0.51662171, + // -1.68597102, -1.22520983, -0.18704742, -1.17781889, + // -1.21247745, -1.49179852, -0.15651244, -0.14214516 + VCMP_U32(16, v4, 0xbdc2e718, 0xbfbff9f6, 0xbfb746d8, 0x3d037480, 0xbf23a7dc, + 0xbfb78aa0, 0xbfa1638c, 0xbf044152, 0xbfd7cde6, 0xbf9cd3ad, + 0xbe3f895c, 0xbf96c2c5, 0xbf9b3276, 0xbfbef341, 0xbe2044cc, + 0xbe118e80); + + VSET(16, e64, m8); + // -0.3488917150781869, -0.4501495513738740, 0.8731197104152684, + // 0.3256432550932964, 0.6502591178769535, -0.3169358689246526, + // -0.5396694979141685, -0.5417807430937591, + // -0.7971574213160249, -0.1764794100111047, 0.3564275916066595, + // -0.3754449946313438, 0.6580947137446858, + // -0.3328857144699515, 0.1761214464164236, 0.1429774118511240 + VLOAD_64(v16, 0xbfd6543dea86cb60, 0xbfdccf40105d6e5c, 0x3febf098bf37400c, + 0x3fd4d756ceb279f4, 0x3fe4ceec35a6a266, 0xbfd448ad61fd7c88, + 0xbfe144f8f7861540, 0xbfe1564491a616b8, 0xbfe9825047ca1cd6, + 0xbfc696e097352100, 0x3fd6cfb5ac55edec, 0xbfd8074a7158dd78, + 0x3fe50f1ca5268668, 0xbfd54dffe23d0eec, 0x3fc68b25c63dcaf0, + 0x3fc24d1575fbd080); + double dscalar_64; + // 0.9108707261227378 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fed25da5d7296fe); + asm volatile("vfadd.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); + // 0.5619790110445508, 0.4607211747488638, 1.7839904365380062, + // 1.2365139812160342, 1.5611298439996912, 0.5939348571980851, + // 0.3712012282085693, 0.3690899830289787, 0.1137133048067129, + // 0.7343913161116331, 1.2672983177293973, 0.5354257314913939, + // 1.5689654398674235, 0.5779850116527863, 1.0869921725391614, + // 1.0538481379738618 + VCMP_U64(17, v8, 0x3fe1fbbb682f314e, 0x3fdd7c74aa87bfa0, 0x3ffc8b398e54eb85, + 0x3ff3c8c2e265e9fc, 0x3ff8fa63498c9cb2, 0x3fe30183ac73d8ba, + 0x3fd7c1c2cbd9037c, 0x3fd79f2b9799008c, 0x3fbd1c50ad43d140, + 0x3fe7802237a54ebe, 0x3ff446da99cec6fa, 0x3fe1223524c62842, + 0x3ff91a7b814c8eb3, 0x3fe27eda6c540f88, 0x3ff16451e78104dd, + 0x3ff0dc8fdd78c58f); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE7(void) { + VSET(16, e16, m2); + VLOAD_16(v4, 0xb0bd, 0xb1c0, 0xb85d, 0x352a, 0x352e, 0xb9c5, 0xb828, 0xb733, + 0x344a, 0x39d7, 0x380c, 0x38aa, 0x351e, 0xb17b, 0xb925, 0x37bf); + double dscalar_16; + BOX_HALF_IN_DOUBLE(dscalar_16, 0xbb81); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfadd.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + VCMP_U16(18, v2, 0, 0xbc78, 0, 0xb8ec, 0, 0xbea3, 0, 0xbd8d, 0, 0xb2a8, 0, + 0xb5ae, 0, 0xbc70, 0, 0xb743); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x3f5d8a7f, 0xbf0a0c89, 0xbef14c9d, 0x3f7e1eaa, 0x3ea47f0b, + 0xbef25bbc, 0xbe99bf6c, 0x3ee34c20, 0xbf39b46b, 0xbe877ff1, + 0x3f46050b, 0xbe5e78a0, 0xbe80fb14, 0xbf07ff20, 0x3f4dd62f, + 0x3f5183c2); + double dscalar_32; + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbf75e762); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfadd.vf v4, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + VCMP_U32(19, v4, 0, 0xbfbff9f6, 0, 0x3d037480, 0, 0xbfb78aa0, 0, 0xbf044152, + 0, 0xbf9cd3ad, 0, 0xbf96c2c5, 0, 0xbfbef341, 0, 0xbe118e80); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xbfd6543dea86cb60, 0xbfdccf40105d6e5c, 0x3febf098bf37400c, + 0x3fd4d756ceb279f4, 0x3fe4ceec35a6a266, 0xbfd448ad61fd7c88, + 0xbfe144f8f7861540, 0xbfe1564491a616b8, 0xbfe9825047ca1cd6, + 0xbfc696e097352100, 0x3fd6cfb5ac55edec, 0xbfd8074a7158dd78, + 0x3fe50f1ca5268668, 0xbfd54dffe23d0eec, 0x3fc68b25c63dcaf0, + 0x3fc24d1575fbd080); + double dscalar_64; + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fed25da5d7296fe); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfadd.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); + VCMP_U64(20, v8, 0, 0x3fdd7c74aa87bfa0, 0, 0x3ff3c8c2e265e9fc, 0, + 0x3fe30183ac73d8ba, 0, 0x3fd79f2b9799008c, 0, 0x3fe7802237a54ebe, 0, + 0x3fe1223524c62842, 0, 0x3fe27eda6c540f88, 0, 0x3ff0dc8fdd78c58f); +}; + +// Raise exceptions only on active elements! +void TEST_CASE8(void) { + // Overflow and Inexact. Invalid operation should not be raised. + CLEAR_FFLAGS; + VSET(16, e16, m2); + CHECK_FFLAGS(0); + VLOAD_16(v4, pInfh, pMaxh, pInfh, pMaxh, pInfh, pMaxh, pInfh, pMaxh, pInfh, + pMaxh, pInfh, pMaxh, pInfh, pMaxh, pInfh, pMaxh); + VLOAD_16(v6, mInfh, pMaxh, mInfh, pMaxh, mInfh, pMaxh, mInfh, pMaxh, mInfh, + pMaxh, mInfh, pMaxh, mInfh, pMaxh, mInfh, pMaxh); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfadd.vv v2, v4, v6, v0.t"); + VCMP_U16(21, v2, 0, pInfh, 0, pInfh, 0, pInfh, 0, pInfh, 0, pInfh, 0, pInfh, + 0, pInfh, 0, pInfh); + CHECK_FFLAGS(OF | NX); + + // Invalid operation. Overflow and Inexact should not be raised. + CLEAR_FFLAGS; + VSET(16, e32, m4); + CHECK_FFLAGS(0); + VLOAD_32(v8, pMaxf, pInff, pMaxf, pInff, pMaxf, pInff, pMaxf, pInff, pMaxf, + pInff, pMaxf, pInff, pMaxf, pInff, pMaxf, pInff); + VLOAD_32(v12, pMaxf, mInff, pMaxf, mInff, pMaxf, mInff, pMaxf, mInff, pMaxf, + mInff, pMaxf, mInff, pMaxf, mInff, pMaxf, mInff); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfadd.vv v4, v8, v12, v0.t"); + VCMP_U32(22, v4, 0, qNaNf, 0, qNaNf, 0, qNaNf, 0, qNaNf, 0, qNaNf, 0, qNaNf, + 0, qNaNf, 0, qNaNf); + CHECK_FFLAGS(NV); + + // No exception should be raised + CLEAR_FFLAGS; + VSET(16, e64, m8); + CHECK_FFLAGS(0); + VLOAD_64(v16, pMaxd, 0, pInfd, 0, pMaxd, 0, pMaxd, 0, pMaxd, 0, pMaxd, 0, + pMaxd, 0, pMaxd, 0); + VLOAD_64(v24, pMaxd, 0, mInfd, 0, pMaxd, 0, pMaxd, 0, pMaxd, 0, pMaxd, 0, + pMaxd, 0, pMaxd, 0); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfadd.vv v8, v16, v24, v0.t"); + VCMP_U64(23, v8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + CHECK_FFLAGS(0); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + TEST_CASE6(); + TEST_CASE7(); + TEST_CASE8(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfclass.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfclass.c new file mode 100644 index 000000000..63f9b973a --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfclass.c @@ -0,0 +1,90 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// Test all the different output possibilities +void TEST_CASE1(void) { + CLEAR_FFLAGS; + CHECK_FFLAGS(0); + + VSET(16, e16, m2); + VLOAD_16(v4, mInfh, pInfh, qNaNh, sNaNh, 0x3b27, 0xc767, pZero, mZeroh, + 0x8075, 0x00c5, mInfh, pInfh, qNaNh, sNaNh, 0x3b27, 0xb767); + asm volatile("vfclass.v v2, v4"); + VCMP_U16(1, v2, CLASS_mInf, CLASS_pInf, CLASS_qNAN, CLASS_sNAN, CLASS_pNorm, + CLASS_mNorm, CLASS_pZero, CLASS_mZero, CLASS_mSub, CLASS_pSub, + CLASS_mInf, CLASS_pInf, CLASS_qNAN, CLASS_sNAN, CLASS_pNorm, + CLASS_mNorm); + + VSET(16, e32, m4); + VLOAD_32(v8, mInff, pInff, qNaNf, sNaNf, 0x3f738772, 0xbdef32e4, pZero, + mZerof, 0x80000075, 0x000000c5, mInff, pInff, qNaNf, sNaNf, + 0x3f738772, 0xbdef32e4); + asm volatile("vfclass.v v4, v8"); + VCMP_U32(2, v4, CLASS_mInf, CLASS_pInf, CLASS_qNAN, CLASS_sNAN, CLASS_pNorm, + CLASS_mNorm, CLASS_pZero, CLASS_mZero, CLASS_mSub, CLASS_pSub, + CLASS_mInf, CLASS_pInf, CLASS_qNAN, CLASS_sNAN, CLASS_pNorm, + CLASS_mNorm); + + VSET(16, e64, m8); + VLOAD_64(v16, mInfd, pInfd, qNaNd, sNaNd, 0x3def3136584672de, + 0xbdef3136584672de, pZero, mZerod, 0x8000000000000075, + 0x0000000000000c5, mInfd, pInfd, qNaNd, sNaNd, 0x3def313584672de4, + 0xbdef313654672de4); + asm volatile("vfclass.v v8, v16"); + VCMP_U64(3, v8, CLASS_mInf, CLASS_pInf, CLASS_qNAN, CLASS_sNAN, CLASS_pNorm, + CLASS_mNorm, CLASS_pZero, CLASS_mZero, CLASS_mSub, CLASS_pSub, + CLASS_mInf, CLASS_pInf, CLASS_qNAN, CLASS_sNAN, CLASS_pNorm, + CLASS_mNorm); +}; + +// Test all the different output possibilities +void TEST_CASE2(void) { + VSET(16, e16, m2); + VLOAD_16(v4, mInfh, pInfh, qNaNh, sNaNh, 0x3b27, 0xc767, pZero, mZeroh, + 0x8075, 0x00c5, mInfh, pInfh, qNaNh, sNaNh, 0x3b27, 0xb767); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfclass.v v2, v4, v0.t"); + VCMP_U16(4, v2, 0, CLASS_pInf, 0, CLASS_sNAN, 0, CLASS_mNorm, 0, CLASS_mZero, + 0, CLASS_pSub, 0, CLASS_pInf, 0, CLASS_sNAN, 0, CLASS_mNorm); + + VSET(16, e32, m4); + VLOAD_32(v8, mInff, pInff, qNaNf, sNaNf, 0x3f738772, 0xbdef32e4, pZero, + mZerof, 0x80000075, 0x000000c5, mInff, pInff, qNaNf, sNaNf, + 0x3f738772, 0xbdef32e4); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfclass.v v4, v8, v0.t"); + VCMP_U32(5, v4, 0, CLASS_pInf, 0, CLASS_sNAN, 0, CLASS_mNorm, 0, CLASS_mZero, + 0, CLASS_pSub, 0, CLASS_pInf, 0, CLASS_sNAN, 0, CLASS_mNorm); + + VSET(16, e64, m8); + VLOAD_64(v16, mInfd, pInfd, qNaNd, sNaNd, 0x3def313584672de4, + 0xbdef313658467de4, pZero, mZerod, 0x8000000000000075, + 0x0000000000000c5, mInfd, pInfd, qNaNd, sNaNd, 0x3def313658672de4, + 0xbdef313654672de4); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfclass.v v8, v16, v0.t"); + VCMP_U64(6, v8, 0, CLASS_pInf, 0, CLASS_sNAN, 0, CLASS_mNorm, 0, CLASS_mZero, + 0, CLASS_pSub, 0, CLASS_pInf, 0, CLASS_sNAN, 0, CLASS_mNorm); + CHECK_FFLAGS(0); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + // No exception should be raised by vfclass.v + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfcvt.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfcvt.c new file mode 100644 index 000000000..64aea6e58 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfcvt.c @@ -0,0 +1,834 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// We assume RNE rounding when not specified by the encoding + +//////////////// +// vfcvt.xu.f // +//////////////// + +void TEST_CASE1(void) { + VSET(16, e16, m2); + // -2372.000, 2978.000, -5132.000, -3426.000, -3878.000, + // 9680.000, 76.000, -8128.000, -2314.000, -4660.000, 8672.000, + // 8824.000, -5732.000, -1557.000, -2302.000, -407.250 + VLOAD_16(v4, 0xe8a2, 0x69d1, 0xed03, 0xeab1, 0xeb93, 0x70ba, 0x54c0, 0xeff0, + 0xe885, 0xec8d, 0x703c, 0x704f, 0xed99, 0xe615, 0xe87f, 0xde5d); + asm volatile("vfcvt.xu.f.v v6, v4"); + // 0, 2978, 0, 0, 0, + // 9680, 76, 0, 0, 0, + // 8672, 8824, 0, 0, 0, + // 0 + VCMP_U16(1, v6, 0x0000, 0x0ba2, 0x0000, 0x0000, 0x0000, 0x25d0, 0x004c, + 0x0000, 0x0000, 0x0000, 0x21e0, 0x2278, 0x0000, 0x0000, 0x0000, + 0x0000); + + VSET(16, e32, m4); + // -82436.352, -5427.481, -30119.082, 53784.066, 76500.719, + // 65152.020, -94151.375, 71894.320, -20547.545, 95485.906, + // 92834.711, -28081.711, -9716.506, 62508.508, 90410.883, + // 42708.285 + VLOAD_32(v8, 0xc7a1022d, 0xc5a99bd9, 0xc6eb4e2a, 0x47521811, 0x47956a5c, + 0x477e8005, 0xc7b7e3b0, 0x478c6b29, 0xc6a08717, 0x47ba7ef4, + 0x47b5515b, 0xc6db636c, 0xc617d206, 0x47742c82, 0x47b09571, + 0x4726d449); + asm volatile("vfcvt.xu.f.v v12, v8"); + // 0, 0, 0, 53784, 76501, + // 65152, 0, 71894, 0, 95486, + // 92835, 0, 0, 62509, + // 90411, 42708 + VCMP_U32(2, v12, 0x00000000, 0x00000000, 0x00000000, 0x0000d218, 0x00012ad5, + 0x0000fe80, 0x00000000, 0x000118d6, 0x00000000, 0x000174fe, + 0x00016aa3, 0x00000000, 0x00000000, 0x0000f42d, 0x0001612b, + 0x0000a6d4); + + VSET(16, e64, m8); + // 3554390.405, 3670449.443, 3880983.535, 3452087.537, + // -5447847.496, 498812.179, 9535291.051, 113884.868, + // 2124622.198, -2164534.614, 1377445.305, -2114478.485, + // -4704971.356, -7866057.432, 7002504.380, -2981734.692 + VLOAD_64(v16, 0x414b1e2b33d13be4, 0x414c00d8b8b34d48, 0x414d9c0bc4751d78, + 0x414a565bc4adf2d0, 0xc154c829dfc2d9f6, 0x411e71f0b7161c00, + 0x41622fe7619e55e2, 0x40fbcdcde34f1a00, 0x414035a7194d9794, + 0xc140839b4e886550, 0x413504a54df56888, 0xc14021d73e1606dc, + 0xc151f2b2d6cc57c8, 0xc15e01b25baceaba, 0x415ab6621850fa94, + 0xc146bfb358869da2); + asm volatile("vfcvt.xu.f.v v24, v16"); + // 3554390, 3670449, 3880984, + // 3452088, 0, 498812, + // 9535291, 113885, 2124622, 0, + // 1377445, 0, 0, 0, 7002504, + // 0 + VCMP_U64(3, v24, 0x0000000000363c56, 0x00000000003801b1, 0x00000000003b3818, + 0x000000000034acb8, 0x0000000000000000, 0x0000000000079c7c, + 0x0000000000917f3b, 0x000000000001bcdd, 0x0000000000206b4e, + 0x0000000000000000, 0x00000000001504a5, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x00000000006ad988, + 0x0000000000000000); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // -2372.000, 2978.000, -5132.000, -3426.000, -3878.000, + // 9680.000, 76.000, -8128.000, -2314.000, -4660.000, 8672.000, + // 8824.000, -5732.000, -1557.000, -2302.000, -407.250 + VLOAD_16(v4, 0xe8a2, 0x69d1, 0xed03, 0xeab1, 0xeb93, 0x70ba, 0x54c0, 0xeff0, + 0xe885, 0xec8d, 0x703c, 0x704f, 0xed99, 0xe615, 0xe87f, 0xde5d); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + asm volatile("vfcvt.xu.f.v v6, v4, v0.t"); + // 0, 2978, 0, 0, 0, + // 9680, 0, 0, 0, 0, + // 0, 8824, 0, 0, 0, 0 + VCMP_U16(4, v6, 0x0000, 0x0ba2, 0x0000, 0x0000, 0x0000, 0x25d0, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x2278, 0x0000, 0x0000, 0x0000, + 0x0000); + + VSET(16, e32, m4); + // -82436.352, -5427.481, -30119.082, 53784.066, 76500.719, + // 65152.020, -94151.375, 71894.320, -20547.545, 95485.906, + // 92834.711, -28081.711, -9716.506, 62508.508, 90410.883, + // 42708.285 + VLOAD_32(v8, 0xc7a1022d, 0xc5a99bd9, 0xc6eb4e2a, 0x47521811, 0x47956a5c, + 0x477e8005, 0xc7b7e3b0, 0x478c6b29, 0xc6a08717, 0x47ba7ef4, + 0x47b5515b, 0xc6db636c, 0xc617d206, 0x47742c82, 0x47b09571, + 0x4726d449); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + asm volatile("vfcvt.xu.f.v v12, v8, v0.t"); + // 0, 0, 0, 53784, 0, + // 65152, 0, 71894, 0, 95486, + // 0, 0, 0, 62509, 0, + // 42708 + VCMP_U32(5, v12, 0x00000000, 0x00000000, 0x00000000, 0x0000d218, 0x00000000, + 0x0000fe80, 0x00000000, 0x000118d6, 0x00000000, 0x000174fe, + 0x00000000, 0x00000000, 0x00000000, 0x0000f42d, 0x00000000, + 0x0000a6d4); + + VSET(16, e64, m8); + // 3554390.405, 3670449.443, 3880983.535, 3452087.537, + // -5447847.496, 498812.179, 9535291.051, 113884.868, + // 2124622.198, -2164534.614, 1377445.305, -2114478.485, + // -4704971.356, -7866057.432, 7002504.380, -2981734.692 + VLOAD_64(v16, 0x414b1e2b33d13be4, 0x414c00d8b8b34d48, 0x414d9c0bc4751d78, + 0x414a565bc4adf2d0, 0xc154c829dfc2d9f6, 0x411e71f0b7161c00, + 0x41622fe7619e55e2, 0x40fbcdcde34f1a00, 0x414035a7194d9794, + 0xc140839b4e886550, 0x413504a54df56888, 0xc14021d73e1606dc, + 0xc151f2b2d6cc57c8, 0xc15e01b25baceaba, 0x415ab6621850fa94, + 0xc146bfb358869da2); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + asm volatile("vfcvt.xu.f.v v24, v16, v0.t"); + // 0, 3670449, 0, 3452088, 0, + // 498812, 0, 113885, + // 0, 0, 0, 0, 0, 0, 0, + // 0 + VCMP_U64(6, v24, 0x0000000000000000, 0x00000000003801b1, 0x0000000000000000, + 0x000000000034acb8, 0x0000000000000000, 0x0000000000079c7c, + 0x0000000000000000, 0x000000000001bcdd, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000); +}; + +/////////////// +// vfcvt.x.f // +/////////////// + +// Unmasked vfcvt.x.f.c +void TEST_CASE3(void) { + VSET(16, e16, m2); + // 4144.000, -861.500, -8680.000, 3792.000, 8800.000, + // -2330.000, -3066.000, -6148.000, 4776.000, 7360.000, + // -7880.000, -1843.000, -7896.000, -6520.000, -7092.000, + // -8344.000 + VLOAD_16(v4, 0x6c0c, 0xe2bb, 0xf03d, 0x6b68, 0x704c, 0xe88d, 0xe9fd, 0xee01, + 0x6caa, 0x6f30, 0xefb2, 0xe733, 0xefb6, 0xee5e, 0xeeed, 0xf013); + asm volatile("vfcvt.x.f.v v6, v4"); + // 4144, -862, -8680, 3792, 8800, + // -2330, -3066, -6148, 4776, 7360, + // -7880, -1843, -7896, -6520, -7092, + // -8344 + VCMP_U16(7, v6, 0x1030, 0xfca2, 0xde18, 0x0ed0, 0x2260, 0xf6e6, 0xf406, + 0xe7fc, 0x12a8, 0x1cc0, 0xe138, 0xf8cd, 0xe128, 0xe688, 0xe44c, + 0xdf68); + + VSET(16, e32, m4); + // -28075.818, -5455.616, 6106.086, -11952.592, -50887.914, + // -23028.832, -9221.246, -71657.047, -6655.005, -21208.561, + // -30018.096, -19766.838, 48541.953, -62313.625, 13515.192, + // -83224.820 + VLOAD_32(v8, 0xc6db57a3, 0xc5aa7ced, 0x45bed0b1, 0xc63ac25e, 0xc746c7ea, + 0xc6b3e9aa, 0xc61014fc, 0xc78bf486, 0xc5cff80a, 0xc6a5b11f, + 0xc6ea8431, 0xc69a6dad, 0x473d9df4, 0xc77369a0, 0x46532cc5, + 0xc7a28c69); + asm volatile("vfcvt.x.f.v v12, v8"); + // -28076, -5456, 6106, -11953, + // -50888, -23029, -9221, -71657, -6655, + // -21209, -30018, -19767, 48542, -62314, + // 13515, -83225 + VCMP_U32(8, v12, 0xffff9254, 0xffffeab0, 0x000017da, 0xffffd14f, 0xffff3938, + 0xffffa60b, 0xffffdbfb, 0xfffee817, 0xffffe601, 0xffffad27, + 0xffff8abe, 0xffffb2c9, 0x0000bd9e, 0xffff0c96, 0x000034cb, + 0xfffebae7); + + VSET(16, e64, m8); + // 3087905.033, -2534011.630, 7824302.813, + // -9294206.521, 6436555.847, 6645117.193, + // 1358075.867, 5694551.012, -9840938.636, + // 4621816.383, 2584370.751, 5569558.860, + // 495487.041, 4759865.418, -6831172.669, + // 8371055.296 + VLOAD_64(v16, 0x41478f10842c8b9c, 0xc143553dd0971c82, 0x415dd8ebb40e1fe0, + 0xc161ba2fd0a8a593, 0x41588db2f632700c, 0x4159595f4c588b60, + 0x4134b8fbde131210, 0x4155b915c0cb4294, 0xc162c52554566300, + 0x4151a17e187d1aa8, 0x4143b7996029dc68, 0x41553f05b70b6824, + 0x411e3dfc2a598ba0, 0x4152284e5ac4da5a, 0xc15a0f112acbf258, + 0x415feedbd2ed6038); + asm volatile("vfcvt.x.f.v v24, v16"); + // 3087905, -2534012, 7824303, + // -9294207, 6436556, 6645117, + // 1358076, 5694551, -9840939, + // 4621816, 2584371, 5569559, + // 495487, 4759865, -6831173, + // 8371055 + VCMP_U64(9, v24, 0x00000000002f1e21, 0xffffffffffd95584, 0x00000000007763af, + 0xffffffffff722e81, 0x00000000006236cc, 0x000000000065657d, + 0x000000000014b8fc, 0x000000000056e457, 0xffffffffff69d6d5, + 0x00000000004685f8, 0x0000000000276f33, 0x000000000054fc17, + 0x0000000000078f7f, 0x000000000048a139, 0xffffffffff97c3bb, + 0x00000000007fbb6f); +}; + +// Masked vfcvt.x.f.c +void TEST_CASE4(void) { + VSET(16, e16, m2); + // 4144.000, -861.500, -8680.000, 3792.000, 8800.000, + // -2330.000, -3066.000, -6148.000, 4776.000, 7360.000, + // -7880.000, -1843.000, -7896.000, -6520.000, -7092.000, + // -8344.000 + VLOAD_16(v4, 0x6c0c, 0xe2bb, 0xf03d, 0x6b68, 0x704c, 0xe88d, 0xe9fd, 0xee01, + 0x6caa, 0x6f30, 0xefb2, 0xe733, 0xefb6, 0xee5e, 0xeeed, 0xf013); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + asm volatile("vfcvt.x.f.v v6, v4, v0.t"); + // 0, -862, 0, 3792, 0, + // -2330, 0, -6148, 0, 7360, 0, + // -1843, 0, -6520, 0, -8344 + VCMP_U16(10, v6, 0x0000, 0xfca2, 0x0000, 0x0ed0, 0x0000, 0xf6e6, 0x0000, + 0xe7fc, 0x0000, 0x1cc0, 0x0000, 0xf8cd, 0x0000, 0xe688, 0x0000, + 0xdf68); + + VSET(16, e32, m4); + // -28075.818, -5455.616, 6106.086, -11952.592, + // -50887.914, -23028.832, -9221.246, -71657.047, + // -6655.005, -21208.561, -30018.096, -19766.838, 48541.953, + // -62313.625, 13515.192, -83224.820 + VLOAD_32(v8, 0xc6db57a3, 0xc5aa7ced, 0x45bed0b1, 0xc63ac25e, 0xc746c7ea, + 0xc6b3e9aa, 0xc61014fc, 0xc78bf486, 0xc5cff80a, 0xc6a5b11f, + 0xc6ea8431, 0xc69a6dad, 0x473d9df4, 0xc77369a0, 0x46532cc5, + 0xc7a28c69); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + asm volatile("vfcvt.x.f.v v12, v8, v0.t"); + // 0, -5456, 0, -11953, 0, + // -23029, 0, -71657, 0, + // -21209, 0, -19767, 0, + // -62314, 0, -83225 + VCMP_U32(11, v12, 0x00000000, 0xffffeab0, 0x00000000, 0xffffd14f, 0x00000000, + 0xffffa60b, 0x00000000, 0xfffee817, 0x00000000, 0xffffad27, + 0x00000000, 0xffffb2c9, 0x00000000, 0xffff0c96, 0x00000000, + 0xfffebae7); + + VSET(16, e64, m8); + // 3087905.033, -2534011.630, 7824302.813, -9294206.521, + // 6436555.847, 6645117.193, 1358075.867, 5694551.012, + // -9840938.636, 4621816.383, 2584370.751, 5569558.860, + // 495487.041, 4759865.418, -6831172.669, 8371055.296 + VLOAD_64(v16, 0x41478f10842c8b9c, 0xc143553dd0971c82, 0x415dd8ebb40e1fe0, + 0xc161ba2fd0a8a593, 0x41588db2f632700c, 0x4159595f4c588b60, + 0x4134b8fbde131210, 0x4155b915c0cb4294, 0xc162c52554566300, + 0x4151a17e187d1aa8, 0x4143b7996029dc68, 0x41553f05b70b6824, + 0x411e3dfc2a598ba0, 0x4152284e5ac4da5a, 0xc15a0f112acbf258, + 0x415feedbd2ed6038); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + asm volatile("vfcvt.x.f.v v24, v16, v0.t"); + // 0, -2534012, 0, -9294207, + // 0, 6645117, 0, 5694551, 0, + // 4621816, 0, 5569559, + // 0, 4759865, 0, 8371055 + VCMP_U64(12, v24, 0x0000000000000000, 0xffffffffffd95584, 0x0000000000000000, + 0xffffffffff722e81, 0x0000000000000000, 0x000000000065657d, + 0x0000000000000000, 0x000000000056e457, 0x0000000000000000, + 0x00000000004685f8, 0x0000000000000000, 0x000000000054fc17, + 0x0000000000000000, 0x000000000048a139, 0x0000000000000000, + 0x00000000007fbb6f); +}; + +//////////////////// +// vfcvt.rtz.xu.f // +//////////////////// + +// Simple random test with similar values +void TEST_CASE5(void) { + VSET(16, e16, m2); + // 6996.000, -7512.000, -4792.000, 7240.000, 8336.000, + // 6332.000, -277.750, -4074.000, 9352.000, 8832.000, + // -65.000, 5860.000, 6892.000, 2944.000, 9608.000, + // 4608.000 + VLOAD_16(v4, 0x6ed5, 0xef56, 0xecae, 0x6f12, 0x7012, 0x6e2f, 0xdc57, 0xebf5, + 0x7091, 0x7050, 0xd410, 0x6db9, 0x6ebb, 0x69c0, 0x70b1, 0x6c80); + asm volatile("vfcvt.rtz.xu.f.v v6, v4"); + // 6996, 0, 0, 7240, 8336, + // 6332, 0, 0, 9352, 8832, 0, + // 5860, 6892, 2944, 9608, 4608 + VCMP_U16(13, v6, 0x1b54, 0x0000, 0x0000, 0x1c48, 0x2090, 0x18bc, 0x0000, + 0x0000, 0x2488, 0x2280, 0x0000, 0x16e4, 0x1aec, 0x0b80, 0x2588, + 0x1200); + + VSET(16, e32, m4); + // 85074.883, -2035.769, 67397.633, -57745.480, 82113.172, + // 18415.770, 57859.465, 83291.773, -83693.375, 43321.199, + // 94626.156, -53520.090, 9604.658, -5764.834, 94299.633, + // 57572.980 + VLOAD_32(v8, 0x47a62971, 0xc4fe789f, 0x4783a2d1, 0xc761917b, 0x47a06096, + 0x468fdf8a, 0x47620377, 0x47a2ade3, 0xc7a376b0, 0x47293933, + 0x47b8d114, 0xc7511017, 0x461612a2, 0xc5b426ac, 0x47b82dd1, + 0x4760e4fb); + asm volatile("vfcvt.rtz.xu.f.v v12, v8"); + // 85074, 0, 67397, 0, 82113, + // 18415, 57859, 83291, 0, 43321, + // 94626, 0, 9604, 0, 94299, + // 57572 + VCMP_U32(14, v12, 0x00014c52, 0x00000000, 0x00010745, 0x00000000, 0x000140c1, + 0x000047ef, 0x0000e203, 0x0001455b, 0x00000000, 0x0000a939, + 0x000171a2, 0x00000000, 0x00002584, 0x00000000, 0x0001705b, + 0x0000e0e4); + + VSET(16, e64, m8); + // -5386285.220, -9081004.335, -9603879.062, -4621060.923, + // 2017661.058, 1106405.978, -2095853.299, 1911589.313, + // 4833261.528, 1291127.404, -9941577.120, 9259799.184, + // -8569693.727, 4926687.920, -7537625.130, -6328586.289 + VLOAD_64(v16, 0xc1548c0b4e12be63, 0xc16152158ab92a41, 0xc1625164e1fd6af4, + 0xc151a0c13b0c041c, 0x413ec97d0edd7a68, 0x4130e1e5fa5c8120, + 0xc13ffaed4c78fc7c, 0x413d2b2550357b50, 0x41526ffb61d23f42, + 0x4133b377675b6328, 0xc162f64923d5cce3, 0x4161a962e5e3a1e8, + 0xc160586bb74734b0, 0x4152cb37fae70f80, 0xc15cc0f6484f174c, + 0xc1582442928257b8); + asm volatile("vfcvt.rtz.xu.f.v v24, v16"); + // 0, 0, 0, 0, 2017661, + // 1106405, 0, 1911589, + // 4833261, 1291127, 0, + // 9259799, 0, 4926687, + // 0, 0 + VCMP_U64(15, v24, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x00000000001ec97d, 0x000000000010e1e5, + 0x0000000000000000, 0x00000000001d2b25, 0x000000000049bfed, + 0x000000000013b377, 0x0000000000000000, 0x00000000008d4b17, + 0x0000000000000000, 0x00000000004b2cdf, 0x0000000000000000, + 0x0000000000000000); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE6(void) { + VSET(16, e16, m2); + // 6996.000, -7512.000, -4792.000, 7240.000, 8336.000, + // 6332.000, -277.750, -4074.000, 9352.000, 8832.000, + // -65.000, 5860.000, 6892.000, 2944.000, 9608.000, + // 4608.000 + VLOAD_16(v4, 0x6ed5, 0xef56, 0xecae, 0x6f12, 0x7012, 0x6e2f, 0xdc57, 0xebf5, + 0x7091, 0x7050, 0xd410, 0x6db9, 0x6ebb, 0x69c0, 0x70b1, 0x6c80); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + asm volatile("vfcvt.rtz.xu.f.v v6, v4, v0.t"); + // 0, 0, 0, 7240, 0, + // 6332, 0, 0, 0, 8832, + // 0, 5860, 0, 2944, 0, + // 4608 + VCMP_U16(16, v6, 0x0000, 0x0000, 0x0000, 0x1c48, 0x0000, 0x18bc, 0x0000, + 0x0000, 0x0000, 0x2280, 0x0000, 0x16e4, 0x0000, 0x0b80, 0x0000, + 0x1200); + + VSET(16, e32, m4); + // 85074.883, -2035.769, 67397.633, -57745.480, 82113.172, + // 18415.770, 57859.465, 83291.773, -83693.375, 43321.199, + // 94626.156, -53520.090, 9604.658, -5764.834, 94299.633, + // 57572.980 + VLOAD_32(v8, 0x47a62971, 0xc4fe789f, 0x4783a2d1, 0xc761917b, 0x47a06096, + 0x468fdf8a, 0x47620377, 0x47a2ade3, 0xc7a376b0, 0x47293933, + 0x47b8d114, 0xc7511017, 0x461612a2, 0xc5b426ac, 0x47b82dd1, + 0x4760e4fb); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + asm volatile("vfcvt.rtz.xu.f.v v12, v8, v0.t"); + // 0, 0, 0, 0, 0, 18415, + // 0, 83291, 0, 43321, 0, 0, + // 0, 0, 0, 57572 + VCMP_U32(17, v12, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x000047ef, 0x00000000, 0x0001455b, 0x00000000, 0x0000a939, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x0000e0e4); + + VSET(16, e64, m8); + // -5386285.220, -9081004.335, -9603879.062, -4621060.923, + // 2017661.058, 1106405.978, -2095853.299, 1911589.313, + // 4833261.528, 1291127.404, -9941577.120, 9259799.184, + // -8569693.727, 4926687.920, -7537625.130, -6328586.289 + VLOAD_64(v16, 0xc1548c0b4e12be63, 0xc16152158ab92a41, 0xc1625164e1fd6af4, + 0xc151a0c13b0c041c, 0x413ec97d0edd7a68, 0x4130e1e5fa5c8120, + 0xc13ffaed4c78fc7c, 0x413d2b2550357b50, 0x41526ffb61d23f42, + 0x4133b377675b6328, 0xc162f64923d5cce3, 0x4161a962e5e3a1e8, + 0xc160586bb74734b0, 0x4152cb37fae70f80, 0xc15cc0f6484f174c, + 0xc1582442928257b8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + asm volatile("vfcvt.rtz.xu.f.v v24, v16, v0.t"); + // 0, 0, 0, 0, 0, + // 1106405, 0, 1911589, + // 0, 1291127, 0, 9259799, 0, + // 4926687, 0, 0 + VCMP_U64(18, v24, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x000000000010e1e5, + 0x0000000000000000, 0x00000000001d2b25, 0x0000000000000000, + 0x000000000013b377, 0x0000000000000000, 0x00000000008d4b17, + 0x0000000000000000, 0x00000000004b2cdf, 0x0000000000000000, + 0x0000000000000000); +}; + +/////////////////// +// vfcvt.rtz.x.f // +/////////////////// + +// Simple random test with similar values +void TEST_CASE7(void) { + VSET(16, e16, m2); + // 5116.000, 4640.000, 5720.000, 1316.000, 8104.000, + // 9952.000, 9400.000, -4120.000, -9368.000, 6076.000, + // 1782.000, -5332.000, -4284.000, -2878.000, -2752.000, + // 3714.000 + VLOAD_16(v4, 0x6cff, 0x6c88, 0x6d96, 0x6524, 0x6fea, 0x70dc, 0x7097, 0xec06, + 0xf093, 0x6def, 0x66f6, 0xed35, 0xec2f, 0xe99f, 0xe960, 0x6b41); + asm volatile("vfcvt.rtz.x.f.v v6, v4"); + // 5116, 4640, 5720, 1316, 8104, + // 9952, 9400, -4120, -9368, 6076, + // 1782, -5332, -4284, -2878, -2752, + // 3714 + VCMP_U16(19, v6, 0x13fc, 0x1220, 0x1658, 0x0524, 0x1fa8, 0x26e0, 0x24b8, + 0xefe8, 0xdb68, 0x17bc, 0x06f6, 0xeb2c, 0xef44, 0xf4c2, 0xf540, + 0x0e82); + + VSET(16, e32, m4); + // -31395.312, 38407.539, 39625.664, -19419.770, -77414.898, + // -96104.727, -8227.330, -45789.250, -74805.781, 78266.945, + // 1635.832, -33150.762, 17428.920, -93694.898, 93592.562, + // -83328.680 + VLOAD_32(v8, 0xc6f546a0, 0x4716078a, 0x471ac9aa, 0xc697b78a, 0xc7973373, + 0xc7bbb45d, 0xc6008d52, 0xc732dd40, 0xc7921ae4, 0x4798dd79, + 0x44cc7aa0, 0xc7017ec3, 0x468829d7, 0xc7b6ff73, 0x47b6cc48, + 0xc7a2c057); + asm volatile("vfcvt.rtz.x.f.v v12, v8"); + // -31395, 38407, 39625, -19419, + // -77414, -96104, -8227, -45789, + // -74805, 78266, 1635, -33150, 17428, + // -93694, 93592, -83328 + VCMP_U32(20, v12, 0xffff855d, 0x00009607, 0x00009ac9, 0xffffb425, 0xfffed19a, + 0xfffe8898, 0xffffdfdd, 0xffff4d23, 0xfffedbcb, 0x000131ba, + 0x00000663, 0xffff7e82, 0x00004414, 0xfffe9202, 0x00016d98, + 0xfffeba80); + + VSET(16, e64, m8); + // 1347922.217, 7326256.926, 2532328.150, -4365139.352, + // -3892733.643, -3401324.772, -2109243.969, 61221.157, + // -307581.498, -6001564.901, -1299579.664, -2048360.900, + // 3486773.936, -5491246.977, -2222467.648, 1432204.815 + VLOAD_64(v16, 0x413491523797bd28, 0x415bf28c3b410560, 0x414351f41339c8f8, + 0xc150a6d4d6864763, 0xc14db2fed245a01c, 0xc149f33662d1f60e, + 0xc140179dfc15a4ac, 0x40ede4a503831a00, 0xc112c5f5fdac3c80, + 0xc156e4e739a40168, 0xc133d47ba9e7da00, 0xc13f4168e650cc0c, + 0x414a9a1af7c5dda0, 0xc154f28bbe844db6, 0xc140f4c1d2e7a21a, + 0x4135da8cd09570f8); + asm volatile("vfcvt.rtz.x.f.v v24, v16"); + // 1347922, 7326256, 2532328, + // -4365139, -3892733, -3401324, + // -2109243, 61221, -307581, + // -6001564, -1299579, -2048360, + // 3486773, -5491246, -2222467, + // 1432204 + VCMP_U64(21, v24, 0x0000000000149152, 0x00000000006fca30, 0x000000000026a3e8, + 0xffffffffffbd64ad, 0xffffffffffc49a03, 0xffffffffffcc1994, + 0xffffffffffdfd0c5, 0x000000000000ef25, 0xfffffffffffb4e83, + 0xffffffffffa46c64, 0xffffffffffec2b85, 0xffffffffffe0be98, + 0x0000000000353435, 0xffffffffffac35d2, 0xffffffffffde167d, + 0x000000000015da8c); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE8(void) { + VSET(16, e16, m2); + // 5116.000, 4640.000, 5720.000, 1316.000, 8104.000, + // 9952.000, 9400.000, -4120.000, -9368.000, 6076.000, + // 1782.000, -5332.000, -4284.000, -2878.000, -2752.000, + // 3714.000 + VLOAD_16(v4, 0x6cff, 0x6c88, 0x6d96, 0x6524, 0x6fea, 0x70dc, 0x7097, 0xec06, + 0xf093, 0x6def, 0x66f6, 0xed35, 0xec2f, 0xe99f, 0xe960, 0x6b41); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + asm volatile("vfcvt.rtz.x.f.v v6, v4, v0.t"); + // 0, 4640, 0, 1316, 0, + // 9952, 0, -4120, 0, 6076, + // 0, -5332, 0, -2878, 0, + // 3714 + VCMP_U16(22, v6, 0x0000, 0x1220, 0x0000, 0x0524, 0x0000, 0x26e0, 0x0000, + 0xefe8, 0x0000, 0x17bc, 0x0000, 0xeb2c, 0x0000, 0xf4c2, 0x0000, + 0x0e82); + + VSET(16, e32, m4); + // -31395.312, 38407.539, 39625.664, -19419.770, -77414.898, + // -96104.727, -8227.330, -45789.250, -74805.781, 78266.945, + // 1635.832, -33150.762, 17428.920, -93694.898, 93592.562, + // -83328.680 + VLOAD_32(v8, 0xc6f546a0, 0x4716078a, 0x471ac9aa, 0xc697b78a, 0xc7973373, + 0xc7bbb45d, 0xc6008d52, 0xc732dd40, 0xc7921ae4, 0x4798dd79, + 0x44cc7aa0, 0xc7017ec3, 0x468829d7, 0xc7b6ff73, 0x47b6cc48, + 0xc7a2c057); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + asm volatile("vfcvt.rtz.x.f.v v12, v8, v0.t"); + // 0, 38407, 0, -19419, 0, + // -96104, 0, -45789, 0, 78266, + // 0, -33150, 0, -93694, 0, + // -83328 + VCMP_U32(23, v12, 0x00000000, 0x00009607, 0x00000000, 0xffffb425, 0x00000000, + 0xfffe8898, 0x00000000, 0xffff4d23, 0x00000000, 0x000131ba, + 0x00000000, 0xffff7e82, 0x00000000, 0xfffe9202, 0x00000000, + 0xfffeba80); + + VSET(16, e64, m8); + // 1347922.217, 7326256.926, 2532328.150, -4365139.352, + // -3892733.643, -3401324.772, -2109243.969, 61221.157, + // -307581.498, -6001564.901, -1299579.664, -2048360.900, + // 3486773.936, -5491246.977, -2222467.648, 1432204.815 + VLOAD_64(v16, 0x413491523797bd28, 0x415bf28c3b410560, 0x414351f41339c8f8, + 0xc150a6d4d6864763, 0xc14db2fed245a01c, 0xc149f33662d1f60e, + 0xc140179dfc15a4ac, 0x40ede4a503831a00, 0xc112c5f5fdac3c80, + 0xc156e4e739a40168, 0xc133d47ba9e7da00, 0xc13f4168e650cc0c, + 0x414a9a1af7c5dda0, 0xc154f28bbe844db6, 0xc140f4c1d2e7a21a, + 0x4135da8cd09570f8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + asm volatile("vfcvt.rtz.x.f.v v24, v16, v0.t"); + // 0, 7326256, 0, -4365139, + // 0, -3401324, 0, 61221, 0, + // -6001564, 0, + // -2048360, 0, + // -5491246, 0, 1432204 + VCMP_U64(24, v24, 0x0000000000000000, 0x00000000006fca30, 0x0000000000000000, + 0xffffffffffbd64ad, 0x0000000000000000, 0xffffffffffcc1994, + 0x0000000000000000, 0x000000000000ef25, 0x0000000000000000, + 0xffffffffffa46c64, 0x0000000000000000, 0xffffffffffe0be98, + 0x0000000000000000, 0xffffffffffac35d2, 0x0000000000000000, + 0x000000000015da8c); +}; + +//////////////// +// vfcvt.f.xu // +//////////////// + +void TEST_CASE9(void) { + VSET(16, e16, m2); + // 55973, 61786, 64322, 55940, 55857, + // 3425, 1068, 4246, 57901, 7342, + // 8693, 60988, 9047, 63358, 58389, + // 8076 + VLOAD_16(v4, 0xdaa5, 0xf15a, 0xfb42, 0xda84, 0xda31, 0x0d61, 0x042c, 0x1096, + 0xe22d, 0x1cae, 0x21f5, 0xee3c, 0x2357, 0xf77e, 0xe415, 0x1f8c); + asm volatile("vfcvt.f.xu.v v6, v4"); + // 55968.000, 61792.000, 64320.000, 55936.000, 55872.000, + // 3424.000, 1068.000, 4248.000, 57888.000, 7344.000, + // 8696.000, 60992.000, 9048.000, 63360.000, 58400.000, + // 8076.000 + VCMP_U16(25, v6, 0x7ad5, 0x7b8b, 0x7bda, 0x7ad4, 0x7ad2, 0x6ab0, 0x642c, + 0x6c26, 0x7b11, 0x6f2c, 0x703f, 0x7b72, 0x706b, 0x7bbc, 0x7b21, + 0x6fe3); + + VSET(16, e32, m4); + // 72473, 4294949057, 50975, 4294915723, + // 4294876584, 4294895088, 24967, 34761, + // 83805, 68361, 49397, 51562, 24877, + // 4294942241, 4294909502, 42562 + VLOAD_32(v8, 0x00011b19, 0xffffb8c1, 0x0000c71f, 0xffff368b, 0xfffe9da8, + 0xfffee5f0, 0x00006187, 0x000087c9, 0x0001475d, 0x00010b09, + 0x0000c0f5, 0x0000c96a, 0x0000612d, 0xffff9e21, 0xffff1e3e, + 0x0000a642); + asm volatile("vfcvt.f.xu.v v12, v8"); + // 72473.000, 4294949120.000, 50975.000, 4294915840.000, + // 4294876672.000, 4294895104.000, 24967.000, 34761.000, + // 83805.000, 68361.000, 49397.000, 51562.000, 24877.000, + // 4294942208.000, 4294909440.000, 42562.000 + VCMP_U32(26, v12, 0x478d8c80, 0x4f7fffb9, 0x47471f00, 0x4f7fff37, 0x4f7ffe9e, + 0x4f7ffee6, 0x46c30e00, 0x4707c900, 0x47a3ae80, 0x47858480, + 0x4740f500, 0x47496a00, 0x46c25a00, 0x4f7fff9e, 0x4f7fff1e, + 0x47264200); + + VSET(16, e64, m8); + // 18446744073707704187, 18446744073702261660, 4325496, + // 3834488, 18446744073707063867, 18446744073706356425, + // 5215660, 18446744073707545423, 69532, + // 18446744073707444829, 4236283, 3402850, + // 18446744073708706866, 275183, 4230347, + // 18446744073704794800 + VLOAD_64(v16, 0xffffffffffe3cf7b, 0xffffffffff90c39c, 0x0000000000420078, + 0x00000000003a8278, 0xffffffffffda0a3b, 0xffffffffffcf3ec9, + 0x00000000004f95ac, 0xffffffffffe1634f, 0x0000000000010f9c, + 0xffffffffffdfda5d, 0x000000000040a3fb, 0x000000000033ec62, + 0xfffffffffff31c32, 0x00000000000432ef, 0x0000000000408ccb, + 0xffffffffffb76ab0); + asm volatile("vfcvt.f.xu.v v24, v16"); + // 18446744073707704320.000, 18446744073702260736.000, + // 4325496.000, 3834488.000, 18446744073707063296.000, + // 18446744073706356736.000, 5215660.000, + // 18446744073707544576.000, 69532.000, + // 18446744073707444224.000, 4236283.000, 3402850.000, + // 18446744073708707840.000, 275183.000, 4230347.000, + // 18446744073704794112.000 + VCMP_U64(27, v24, 0x43effffffffffc7a, 0x43effffffffff218, 0x4150801e00000000, + 0x414d413c00000000, 0x43effffffffffb41, 0x43effffffffff9e8, + 0x4153e56b00000000, 0x43effffffffffc2c, 0x40f0f9c000000000, + 0x43effffffffffbfb, 0x415028fec0000000, 0x4149f63100000000, + 0x43effffffffffe64, 0x4110cbbc00000000, 0x41502332c0000000, + 0x43effffffffff6ed); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE10(void) { + VSET(16, e16, m2); + // 55973, 61786, 64322, 55940, 55857, + // 3425, 1068, 4246, 57901, 7342, + // 8693, 60988, 9047, 63358, 58389, + // 8076 + VLOAD_16(v4, 0xdaa5, 0xf15a, 0xfb42, 0xda84, 0xda31, 0x0d61, 0x042c, 0x1096, + 0xe22d, 0x1cae, 0x21f5, 0xee3c, 0x2357, 0xf77e, 0xe415, 0x1f8c); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + asm volatile("vfcvt.f.xu.v v6, v4, v0.t"); + // 0.000, 61792.000, 0.000, 55936.000, 0.000, 3424.000, + // 0.000, 4248.000, 0.000, 7344.000, 0.000, 60992.000, + // 0.000, 63360.000, 0.000, 8076.000 + VCMP_U16(28, v6, 0x0, 0x7b8b, 0x0, 0x7ad4, 0x0, 0x6ab0, 0x0, 0x6c26, 0x0, + 0x6f2c, 0x0, 0x7b72, 0x0, 0x7bbc, 0x0, 0x6fe3); + + VSET(16, e32, m4); + // 72473, 4294949057, 50975, 4294915723, + // 4294876584, 4294895088, 24967, 34761, + // 83805, 68361, 49397, 51562, 24877, + // 4294942241, 4294909502, 42562 + VLOAD_32(v8, 0x00011b19, 0xffffb8c1, 0x0000c71f, 0xffff368b, 0xfffe9da8, + 0xfffee5f0, 0x00006187, 0x000087c9, 0x0001475d, 0x00010b09, + 0x0000c0f5, 0x0000c96a, 0x0000612d, 0xffff9e21, 0xffff1e3e, + 0x0000a642); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + asm volatile("vfcvt.f.xu.v v12, v8, v0.t"); + // 0.000, 4294949120.000, 0.000, 4294915840.000, 0.000, + // 4294895104.000, 0.000, 34761.000, 0.000, 68361.000, + // 0.000, 51562.000, 0.000, 4294942208.000, 0.000, + // 42562.000 + VCMP_U32(29, v12, 0x0, 0x4f7fffb9, 0x0, 0x4f7fff37, 0x0, 0x4f7ffee6, 0x0, + 0x4707c900, 0x0, 0x47858480, 0x0, 0x47496a00, 0x0, 0x4f7fff9e, 0x0, + 0x47264200); + + VSET(16, e64, m8); + // 18446744073707704187, 18446744073702261660, 4325496, + // 3834488, 18446744073707063867, 18446744073706356425, + // 5215660, 18446744073707545423, 69532, + // 18446744073707444829, 4236283, 3402850, + // 18446744073708706866, 275183, 4230347, + // 18446744073704794800 + VLOAD_64(v16, 0xffffffffffe3cf7b, 0xffffffffff90c39c, 0x0000000000420078, + 0x00000000003a8278, 0xffffffffffda0a3b, 0xffffffffffcf3ec9, + 0x00000000004f95ac, 0xffffffffffe1634f, 0x0000000000010f9c, + 0xffffffffffdfda5d, 0x000000000040a3fb, 0x000000000033ec62, + 0xfffffffffff31c32, 0x00000000000432ef, 0x0000000000408ccb, + 0xffffffffffb76ab0); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + asm volatile("vfcvt.f.xu.v v24, v16, v0.t"); + // 0.000, 18446744073702260736.000, 0.000, 3834488.000, + // 0.000, 18446744073706356736.000, 0.000, + // 18446744073707544576.000, 0.000, 18446744073707444224.000, + // 0.000, 3402850.000, 0.000, 275183.000, 0.000, + // 18446744073704794112.000 + VCMP_U64(30, v24, 0x0, 0x43effffffffff218, 0x0, 0x414d413c00000000, 0x0, + 0x43effffffffff9e8, 0x0, 0x43effffffffffc2c, 0x0, 0x43effffffffffbfb, + 0x0, 0x4149f63100000000, 0x0, 0x4110cbbc00000000, 0x0, + 0x43effffffffff6ed); +}; + +/////////////// +// vfcvt.f.x // +/////////////// + +void TEST_CASE11(void) { + VSET(16, e16, m2); + // -4779, 465, 9893, -6763, -4072, + // 1612, -9552, 2426, 325, 7561, + // -8581, -1741, -8518, -4699, 3653, + // 9937 + VLOAD_16(v4, 0xed55, 0x01d1, 0x26a5, 0xe595, 0xf018, 0x064c, 0xdab0, 0x097a, + 0x0145, 0x1d89, 0xde7b, 0xf933, 0xdeba, 0xeda5, 0x0e45, 0x26d1); + asm volatile("vfcvt.f.x.v v6, v4"); + // -4780.000, 465.000, 9896.000, -6764.000, -4072.000, + // 1612.000, -9552.000, 2426.000, 325.000, 7560.000, + // -8584.000, -1741.000, -8520.000, -4700.000, 3652.000, + // 9936.000 + VCMP_U16(31, v6, 0xecab, 0x5f44, 0x70d5, 0xee9b, 0xebf4, 0x664c, 0xf0aa, + 0x68bd, 0x5d14, 0x6f62, 0xf031, 0xe6cd, 0xf029, 0xec97, 0x6b22, + 0x70da); + + VSET(16, e32, m4); + // -39422, 54262, 12833, -40266, + // -64918, 28317, 89178, 54320, -99922, + // -73005, 95070, -24716, 60663, 59516, + // 14865, 26328 + VLOAD_32(v8, 0xffff6602, 0x0000d3f6, 0x00003221, 0xffff62b6, 0xffff026a, + 0x00006e9d, 0x00015c5a, 0x0000d430, 0xfffe79ae, 0xfffee2d3, + 0x0001735e, 0xffff9f74, 0x0000ecf7, 0x0000e87c, 0x00003a11, + 0x000066d8); + asm volatile("vfcvt.f.x.v v12, v8"); + // -39422.000, 54262.000, 12833.000, -40266.000, -64918.000, + // 28317.000, 89178.000, 54320.000, -99922.000, -73005.000, + // 95070.000, -24716.000, 60663.000, 59516.000, 14865.000, + // 26328.000 + VCMP_U32(32, v12, 0xc719fe00, 0x4753f600, 0x46488400, 0xc71d4a00, 0xc77d9600, + 0x46dd3a00, 0x47ae2d00, 0x47543000, 0xc7c32900, 0xc78e9680, + 0x47b9af00, 0xc6c11800, 0x476cf700, 0x47687c00, 0x46684400, + 0x46cdb000); + + VSET(16, e64, m8); + // -8860682, 8064547, -5636078, + // -3712253, 8492493, 9839246, + // -8271278, -6234598, -4538479, + // 8807688, 5640899, 3839761, + // -1394518, -6118355, 1783927, + // 5819812 + VLOAD_64(v16, 0xffffffffff78cbf6, 0x00000000007b0e23, 0xffffffffffaa0012, + 0xffffffffffc75b03, 0x00000000008195cd, 0x000000000096228e, + 0xffffffffff81ca52, 0xffffffffffa0de1a, 0xffffffffffbabf91, + 0x0000000000866508, 0x00000000005612c3, 0x00000000003a9711, + 0xffffffffffeab8aa, 0xffffffffffa2a42d, 0x00000000001b3877, + 0x000000000058cda4); + asm volatile("vfcvt.f.x.v v24, v16"); + // -8860682.000, 8064547.000, -5636078.000, -3712253.000, + // 8492493.000, 9839246.000, -8271278.000, -6234598.000, + // -4538479.000, 8807688.000, 5640899.000, 3839761.000, + // -1394518.000, -6118355.000, 1783927.000, 5819812.000 + VCMP_U64(33, v24, 0xc160e68140000000, 0x415ec388c0000000, 0xc1557ffb80000000, + 0xc14c527e80000000, 0x416032b9a0000000, 0x4162c451c0000000, + 0xc15f8d6b80000000, 0xc157c87980000000, 0xc151501bc0000000, + 0x4160cca100000000, 0x415584b0c0000000, 0x414d4b8880000000, + 0xc135475600000000, 0xc15756f4c0000000, 0x413b387700000000, + 0x4156336900000000); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE12(void) { + VSET(16, e16, m2); + // -4779, 465, 9893, -6763, -4072, + // 1612, -9552, 2426, 325, 7561, + // -8581, -1741, -8518, -4699, 3653, + // 9937 + VLOAD_16(v4, 0xed55, 0x01d1, 0x26a5, 0xe595, 0xf018, 0x064c, 0xdab0, 0x097a, + 0x0145, 0x1d89, 0xde7b, 0xf933, 0xdeba, 0xeda5, 0x0e45, 0x26d1); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + asm volatile("vfcvt.f.x.v v6, v4, v0.t"); + // 0.000, 465.000, 0.000, -6764.000, 0.000, 1612.000, + // 0.000, 2426.000, 0.000, 7560.000, 0.000, -1741.000, + // 0.000, -4700.000, 0.000, 9936.000 + VCMP_U16(34, v6, 0x0, 0x5f44, 0x0, 0xee9b, 0x0, 0x664c, 0x0, 0x68bd, 0x0, + 0x6f62, 0x0, 0xe6cd, 0x0, 0xec97, 0x0, 0x70da); + + VSET(16, e32, m4); + // -39422, 54262, 12833, -40266, + // -64918, 28317, 89178, 54320, -99922, + // -73005, 95070, -24716, 60663, 59516, + // 14865, 26328 + VLOAD_32(v8, 0xffff6602, 0x0000d3f6, 0x00003221, 0xffff62b6, 0xffff026a, + 0x00006e9d, 0x00015c5a, 0x0000d430, 0xfffe79ae, 0xfffee2d3, + 0x0001735e, 0xffff9f74, 0x0000ecf7, 0x0000e87c, 0x00003a11, + 0x000066d8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + asm volatile("vfcvt.f.x.v v12, v8, v0.t"); + // 0.000, 54262.000, 0.000, -40266.000, 0.000, 28317.000, + // 0.000, 54320.000, 0.000, -73005.000, 0.000, + // -24716.000, 0.000, 59516.000, 0.000, 26328.000 + VCMP_U32(35, v12, 0x0, 0x4753f600, 0x0, 0xc71d4a00, 0x0, 0x46dd3a00, 0x0, + 0x47543000, 0x0, 0xc78e9680, 0x0, 0xc6c11800, 0x0, 0x47687c00, 0x0, + 0x46cdb000); + + VSET(16, e64, m8); + // -8860682, 8064547, -5636078, + // -3712253, 8492493, 9839246, + // -8271278, -6234598, -4538479, + // 8807688, 5640899, 3839761, + // -1394518, -6118355, 1783927, + // 5819812 + VLOAD_64(v16, 0xffffffffff78cbf6, 0x00000000007b0e23, 0xffffffffffaa0012, + 0xffffffffffc75b03, 0x00000000008195cd, 0x000000000096228e, + 0xffffffffff81ca52, 0xffffffffffa0de1a, 0xffffffffffbabf91, + 0x0000000000866508, 0x00000000005612c3, 0x00000000003a9711, + 0xffffffffffeab8aa, 0xffffffffffa2a42d, 0x00000000001b3877, + 0x000000000058cda4); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + asm volatile("vfcvt.f.x.v v24, v16, v0.t"); + // 0.000, 8064547.000, 0.000, -3712253.000, 0.000, + // 9839246.000, 0.000, -6234598.000, 0.000, 8807688.000, + // 0.000, 3839761.000, 0.000, -6118355.000, 0.000, + // 5819812.000 + VCMP_U64(36, v24, 0x0, 0x415ec388c0000000, 0x0, 0xc14c527e80000000, 0x0, + 0x4162c451c0000000, 0x0, 0xc157c87980000000, 0x0, 0x4160cca100000000, + 0x0, 0x414d4b8880000000, 0x0, 0xc15756f4c0000000, 0x0, + 0x4156336900000000); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + + TEST_CASE3(); + TEST_CASE4(); + + TEST_CASE5(); + TEST_CASE6(); + + TEST_CASE7(); + TEST_CASE8(); + + TEST_CASE9(); + TEST_CASE10(); + + TEST_CASE11(); + TEST_CASE12(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfdiv.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfdiv.c new file mode 100644 index 000000000..f7262f26b --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfdiv.c @@ -0,0 +1,355 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values + 1 subnormal +void TEST_CASE1(void) { + VSET(16, e16, m2); + // -0.8057, -0.8564, 0.3425, -0.3066, -0.7314, -0.6396, 0.7588, + // -0.3743, 0.8706, -0.3064, 0.0390, 0.6123, 0.0237, -0.6201, + // -0.4524, 0.3337 + VLOAD_16(v4, 0xba72, 0xbada, 0x357b, 0xb4e8, 0xb9da, 0xb91e, 0x3a12, 0xb5fd, + 0x3af7, 0xb4e7, 0x28fe, 0x38e6, 0x2612, 0xb8f6, 0xb73d, 0x3557); + // -0.4094, 0.0410, -0.7305, 0.9038, -0.3545, 0.2830, -0.7051, + // -0.7124, -0.6348, 0.1256, 0.5576, 0.1334, 0.8779, -0.4836, + // 0.3215, 0.4167 + VLOAD_16(v6, 0xb68d, 0x293e, 0xb9d8, 0x3b3b, 0xb5ac, 0x3487, 0xb9a4, 0xb9b3, + 0xb914, 0x3005, 0x3876, 0x3045, 0x3b06, 0xb7bd, 0x3525, 0x36ab); + asm volatile("vfdiv.vv v2, v4, v6"); + // 1.9678, -20.9062, -0.4690, -0.3394, 2.0625, -2.2598, + // -1.0762, 0.5254, -1.3711, -2.4395, 0.0699, 4.5898, 0.0270, + // 1.2822, -1.4072, 0.8008 + VCMP_U16(1, v2, 0x3fdf, 0xcd3a, 0xb780, 0xb56d, 0x4020, 0xc085, 0xbc4e, + 0x3833, 0xbd7c, 0xc0e0, 0x2c79, 0x4496, 0x26ea, 0x3d20, 0xbda0, + 0x3a68); + + VSET(16, e32, m4); + // 0.64838839, 0.00666664, -0.13619921, 0.21094505, + // -0.51040554, -0.77216595, 0.42111391, 0.82974166, + // -0.31227046, 0.68854737, -0.72970057, 0.10843290, + // -0.38442346, 0.18102080, 0.57249051, 0.76465768 + VLOAD_32(v8, 0x3f25fcc8, 0x3bda73da, 0xbe0b77ce, 0x3e5801fb, 0xbf02a9f0, + 0xbf45acab, 0x3ed79c3e, 0x3f5469f3, 0xbe9fe1ea, 0x3f3044a4, + 0xbf3acda8, 0x3dde1212, 0xbec4d327, 0x3e395d84, 0x3f128ebd, + 0x3f43c09b); + // -0.59629226, -0.46890569, 0.99662799, -0.49397555, + // 0.80701596, 0.55786854, -0.26524273, -0.04642257, + // -0.67671824, 0.64403933, 0.06642481, 0.26544699, + // -0.00225505, 0.27478188, 0.76509053, 0.36194146 + VLOAD_32(v12, 0xbf18a69c, 0xbef01468, 0x3f7f2303, 0xbefcea5d, 0x3f4e9899, + 0x3f0ed079, 0xbe87cde5, 0xbd3e2597, 0xbf2d3d68, 0x3f24dfc3, + 0x3d8809bb, 0x3e87e8ab, 0xbb13c97d, 0x3e8cb036, 0x3f43dcf9, + 0x3eb95064); + asm volatile("vfdiv.vv v4, v8, v12"); + // -1.08736682, -0.01421745, -0.13666002, -0.42703542, + // -0.63246030, -1.38413608, -1.58765483, -17.87367058, + // 0.46144828, 1.06910765, -10.98536205, 0.40849173, + // 170.47213745, 0.65877998, 0.74826509, 2.11265564 + VCMP_U32(2, v4, 0xbf8b2ed5, 0xbc68f04d, 0xbe0bf09b, 0xbedaa462, 0xbf21e8ea, + 0xbfb12b5e, 0xbfcb3846, 0xc18efd46, 0x3eec42f2, 0x3f88d884, + 0xc12fc40a, 0x3ed125d4, 0x432a78dd, 0x3f28a5cd, 0x3f3f8e4c, + 0x400735c0); + + VSET(16, e64, m8); + // -0.6201645522687720, 0.7701971477336478, 0.3292637140913006, + // -0.8434179184761514, -0.7347451981263740, 0.6543864439701519, + // 0.1228421097534835, -0.5052233099528094, -0.5128552707464591, + // 0.9434287237802566, -0.5723896115412233, -0.5719579148082712, + // -0.6537028651114556, 0.1091378410914579, -0.7602559429758879, + // 0.2908894437497427 + VLOAD_64(v16, 0xbfe3d86354c44060, 0x3fe8a5747d1fa1c6, 0x3fd512a81cf2063c, + 0xbfeafd479316e516, 0xbfe783085c9b10ee, 0x3fe4f0bbd6f98570, + 0x3fbf72949bf67da0, 0xbfe02aca132d92f2, 0xbfe0694f74edfe18, + 0x3fee30916f57c874, 0xbfe2510404c47868, 0xbfe24d7aaf5946b0, + 0xbfe4eb22455e9102, 0x3fbbf075223e6d60, 0xbfe854044575797c, + 0x3fd29deec1ea08a0); + // 0.6660375425590812, -0.9603615652916235, -0.1168804546788573, + // -0.3258082002843947, 0.0488865860405421, + // -0.1515621417461690, -0.1189568642850463, + // -0.1213016259965920, -0.1369814061459547, 0.5914369694708146, + // 0.7538814889966272, 0.2346701936201294, 0.9227364529293489, + // 0.9447507336323382, -0.4250995717346850, -0.0882167932097473 + VLOAD_64(v24, 0x3fe5502df6e661fe, 0xbfeebb482d68699c, 0xbfbdebe0a2632640, + 0xbfd4da0aa33f5db0, 0x3fa907a9a083b220, 0xbfc36663650e4608, + 0xbfbe73f501bd2e10, 0xbfbf0d9f949b6370, 0xbfc1889b51c74ac0, + 0x3fe2ed0d3930b850, 0x3fe81fcc12899c0a, 0x3fce09ac4378e388, + 0x3fed870e9905133a, 0x3fee3b65e3fa5532, 0xbfdb34d4d5893894, + 0xbfb6956031cb3a60); + asm volatile("vfdiv.vv v8, v16, v24"); + // -0.9311255186696326, -0.8019866429158581, -2.8170981623573508, + // 2.5886945685834193, -15.0295870019854370, -4.3176114854994303, + // -1.0326609606918302, 4.1650167984310764, 3.7439772679805032, + // 1.5951466892987514, -0.7592567530780479, -2.4372840282141826, + // -0.7084394065458121, 0.1155202501636058, 1.7884185106880846, + // -3.2974384260161655 + VCMP_U64(3, v8, 0xbfedcbc7be65070a, 0xbfe9a9dfe464e0ca, 0xc006896ac2e79279, + 0x4004b5a57f7b305c, 0xc02e0f26070bd40a, 0xc011453bf1fc3753, + 0xbff085c77fe07008, 0x4010a8fa29e23558, 0x400df3aa5a978cad, + 0x3ff985b888edc5e0, 0xbfe84bd4d177987a, 0xc0037f8ec4c1f1c6, + 0xbfe6ab891e49fb2d, 0x3fbd92bc307a7a1b, 0x3ffc9d5cba6f762a, + 0xc00a612765c28153); +}; + +// Simple random test with similar values + 1 subnormal (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // -0.8057, -0.8564, 0.3425, -0.3066, -0.7314, -0.6396, 0.7588, + // -0.3743, 0.8706, -0.3064, 0.0390, 0.6123, 0.0237, -0.6201, + // -0.4524, 0.3337 + VLOAD_16(v4, 0xba72, 0xbada, 0x357b, 0xb4e8, 0xb9da, 0xb91e, 0x3a12, 0xb5fd, + 0x3af7, 0xb4e7, 0x28fe, 0x38e6, 0x2612, 0xb8f6, 0xb73d, 0x3557); + // -0.4094, 0.0410, -0.7305, 0.9038, -0.3545, 0.2830, -0.7051, + // -0.7124, -0.6348, 0.1256, 0.5576, 0.1334, 0.8779, -0.4836, + // 0.3215, 0.4167 + VLOAD_16(v6, 0xb68d, 0x293e, 0xb9d8, 0x3b3b, 0xb5ac, 0x3487, 0xb9a4, 0xb9b3, + 0xb914, 0x3005, 0x3876, 0x3045, 0x3b06, 0xb7bd, 0x3525, 0x36ab); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfdiv.vv v2, v4, v6, v0.t"); + // 0.0000, -20.9062, 0.0000, -0.3394, 0.0000, -2.2598, 0.0000, + // 0.5254, 0.0000, -2.4395, 0.0000, 4.5898, 0.0000, 1.2822, + // 0.0000, 0.8008 + VCMP_U16(4, v2, 0x0, 0xcd3a, 0x0, 0xb56d, 0x0, 0xc085, 0x0, 0x3833, 0x0, + 0xc0e0, 0x0, 0x4496, 0x0, 0x3d20, 0x0, 0x3a68); + + VSET(16, e32, m4); + // 0.64838839, 0.00666664, -0.13619921, 0.21094505, + // -0.51040554, -0.77216595, 0.42111391, 0.82974166, + // -0.31227046, 0.68854737, -0.72970057, 0.10843290, + // -0.38442346, 0.18102080, 0.57249051, 0.76465768 + VLOAD_32(v8, 0x3f25fcc8, 0x3bda73da, 0xbe0b77ce, 0x3e5801fb, 0xbf02a9f0, + 0xbf45acab, 0x3ed79c3e, 0x3f5469f3, 0xbe9fe1ea, 0x3f3044a4, + 0xbf3acda8, 0x3dde1212, 0xbec4d327, 0x3e395d84, 0x3f128ebd, + 0x3f43c09b); + // -0.59629226, -0.46890569, 0.99662799, -0.49397555, + // 0.80701596, 0.55786854, -0.26524273, -0.04642257, + // -0.67671824, 0.64403933, 0.06642481, 0.26544699, + // -0.00225505, 0.27478188, 0.76509053, 0.36194146 + VLOAD_32(v12, 0xbf18a69c, 0xbef01468, 0x3f7f2303, 0xbefcea5d, 0x3f4e9899, + 0x3f0ed079, 0xbe87cde5, 0xbd3e2597, 0xbf2d3d68, 0x3f24dfc3, + 0x3d8809bb, 0x3e87e8ab, 0xbb13c97d, 0x3e8cb036, 0x3f43dcf9, + 0x3eb95064); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfdiv.vv v4, v8, v12, v0.t"); + // 0.00000000, -0.01421745, 0.00000000, -0.42703542, + // 0.00000000, -1.38413608, 0.00000000, -17.87367058, + // 0.00000000, 1.06910765, 0.00000000, 0.40849173, + // 0.00000000, 0.65877998, 0.00000000, 2.11265564 + VCMP_U32(5, v4, 0x0, 0xbc68f04d, 0x0, 0xbedaa462, 0x0, 0xbfb12b5e, 0x0, + 0xc18efd46, 0x0, 0x3f88d884, 0x0, 0x3ed125d4, 0x0, 0x3f28a5cd, 0x0, + 0x400735c0); + + VSET(16, e64, m8); + // -0.6201645522687720, 0.7701971477336478, 0.3292637140913006, + // -0.8434179184761514, -0.7347451981263740, 0.6543864439701519, + // 0.1228421097534835, -0.5052233099528094, -0.5128552707464591, + // 0.9434287237802566, -0.5723896115412233, -0.5719579148082712, + // -0.6537028651114556, 0.1091378410914579, -0.7602559429758879, + // 0.2908894437497427 + VLOAD_64(v16, 0xbfe3d86354c44060, 0x3fe8a5747d1fa1c6, 0x3fd512a81cf2063c, + 0xbfeafd479316e516, 0xbfe783085c9b10ee, 0x3fe4f0bbd6f98570, + 0x3fbf72949bf67da0, 0xbfe02aca132d92f2, 0xbfe0694f74edfe18, + 0x3fee30916f57c874, 0xbfe2510404c47868, 0xbfe24d7aaf5946b0, + 0xbfe4eb22455e9102, 0x3fbbf075223e6d60, 0xbfe854044575797c, + 0x3fd29deec1ea08a0); + // 0.6660375425590812, -0.9603615652916235, -0.1168804546788573, + // -0.3258082002843947, 0.0488865860405421, + // -0.1515621417461690, -0.1189568642850463, + // -0.1213016259965920, -0.1369814061459547, 0.5914369694708146, + // 0.7538814889966272, 0.2346701936201294, 0.9227364529293489, + // 0.9447507336323382, -0.4250995717346850, -0.0882167932097473 + VLOAD_64(v24, 0x3fe5502df6e661fe, 0xbfeebb482d68699c, 0xbfbdebe0a2632640, + 0xbfd4da0aa33f5db0, 0x3fa907a9a083b220, 0xbfc36663650e4608, + 0xbfbe73f501bd2e10, 0xbfbf0d9f949b6370, 0xbfc1889b51c74ac0, + 0x3fe2ed0d3930b850, 0x3fe81fcc12899c0a, 0x3fce09ac4378e388, + 0x3fed870e9905133a, 0x3fee3b65e3fa5532, 0xbfdb34d4d5893894, + 0xbfb6956031cb3a60); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfdiv.vv v8, v16, v24, v0.t"); + // 0.0000000000000000, -0.8019866429158581, + // 0.0000000000000000, 2.5886945685834193, 0.0000000000000000, + // -4.3176114854994303, 0.0000000000000000, 4.1650167984310764, + // 0.0000000000000000, 1.5951466892987514, 0.0000000000000000, + // -2.4372840282141826, 0.0000000000000000, 0.1155202501636058, + // 0.0000000000000000, -3.2974384260161655 + VCMP_U64(6, v8, 0x0, 0xbfe9a9dfe464e0ca, 0x0, 0x4004b5a57f7b305c, 0x0, + 0xc011453bf1fc3753, 0x0, 0x4010a8fa29e23558, 0x0, 0x3ff985b888edc5e0, + 0x0, 0xc0037f8ec4c1f1c6, 0x0, 0x3fbd92bc307a7a1b, 0x0, + 0xc00a612765c28153); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + // -0.0933, 0.4983, 0.5918, -0.0608, 0.0790, -0.2864, -0.7656, + // 0.4878, 0.8862, 0.4255, 0.9561, -0.7158, -0.3247, 0.9961, + // -0.4963, -0.4114 + VLOAD_16(v4, 0xadf9, 0x37f9, 0x38bc, 0xabc7, 0x2d0f, 0xb495, 0xba20, 0x37ce, + 0x3b17, 0x36cf, 0x3ba6, 0xb9ba, 0xb532, 0x3bf8, 0xb7f1, 0xb695); + double dscalar_16; + // -0.3206 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xb521); + asm volatile("vfdiv.vf v2, v4, %[A]" ::[A] "f"(dscalar_16)); + // 0.2910, -1.5547, -1.8457, 0.1896, -0.2466, 0.8936, 2.3887, + // -1.5215, -2.7656, -1.3271, -2.9824, 2.2324, 1.0127, + // -3.1074, 1.5488, 1.2832 + VCMP_U16(7, v2, 0x34a8, 0xbe37, 0xbf62, 0x3210, 0xb3e3, 0x3b25, 0x40c6, + 0xbe16, 0xc187, 0xbd4f, 0xc1f7, 0x4077, 0x3c0d, 0xc236, 0x3e31, + 0x3d22); + + VSET(16, e32, m4); + // 0.74354362, 0.49774653, 0.25714639, 0.51635689, + // 0.74569613, 0.41876560, 0.21346331, 0.08743033, + // -0.15111920, -0.93289024, 0.08753468, -0.33427054, + // 0.06167563, -0.54564798, 0.78990245, -0.77273035 + VLOAD_32(v8, 0x3f3e58e0, 0x3efed8a2, 0x3e83a8b1, 0x3f042ff7, 0x3f3ee5f1, + 0x3ed66872, 0x3e5a9620, 0x3db30eac, 0xbe1abefe, 0xbf6ed1e5, + 0x3db34562, 0xbeab2582, 0x3d7c9f95, 0xbf0baf96, 0x3f4a370c, + 0xbf45d1a8); + double dscalar_32; + // -0.45971388 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbeeb5f9e); + asm volatile("vfdiv.vf v4, v8, %[A]" ::[A] "f"(dscalar_32)); + // -1.61740518, -1.08273113, -0.55936182, -1.12321365, + // -1.62208748, -0.91092664, -0.46433949, -0.19018422, + // 0.32872447, 2.02928448, -0.19041122, 0.72712737, + // -0.13416091, 1.18692958, -1.71824801, 1.68089414 + VCMP_U32(8, v4, 0xbfcf0722, 0xbf8a96ef, 0xbf0f3255, 0xbf8fc576, 0xbfcfa090, + 0xbf69327c, 0xbeedbde7, 0xbe42bfa7, 0x3ea84e93, 0x4001dfcc, + 0xbe42fb28, 0x3f3a2504, 0xbe096179, 0x3f97ed4e, 0xbfdbef8c, + 0x3fd72789); + + VSET(16, e64, m8); + // -0.8580137874650531, -0.4775160339931992, 0.3831482495481682, + // -0.3582952848420831, 0.0009796501269754, 0.5485795361059773, + // -0.8055070333165963, -0.2632019868496005, + // -0.0782680309690278, -0.7119901734202212, + // -0.5084969452651125, -0.7586325258910223, 0.6253847342253853, + // 0.5751160060426936, 0.0609762717873976, -0.7366654110036495 + VLOAD_64(v16, 0xbfeb74d95495be72, 0xbfde8f9f69544024, 0x3fd885803c550ed0, + 0xbfd6ee4f58ad4bcc, 0x3f500cf35070c000, 0x3fe18df6abda8f8e, + 0xbfe9c6b6af995e52, 0xbfd0d84d2570b86c, 0xbfb4095fa9559400, + 0xbfe6c89f9dbd3916, 0xbfe0459b62c0f228, 0xbfe846b7b80c4834, + 0x3fe40326d89d4d44, 0x3fe26759aeab8116, 0x3faf38482a5158c0, + 0xbfe792c3570b5cc2); + double dscalar_64; + // 0.0072652319849018 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3f7dc225dc5e3c00); + asm volatile("vfdiv.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); + // -118.0986084474836275, + // -65.7261922242188206, 52.7372354171766275, + // -49.3164272781202087, + // 0.1348408597290880, 75.5075044053657223, + // -110.8714814599939871, -36.2276094413188474, + // -10.7729568899768022, -97.9996474854262800, + // -69.9904622896889919, + // -104.4195873535172723, 86.0791142698573424, 79.1600333255520070, + // 8.3928870976336363, -101.3959929338175954 + VCMP_U64(9, v8, 0xc05d864f99ce434b, 0xc0506e79eef36846, 0x404a5e5dbaeb1cfe, + 0xc048a880b0658b57, 0x3fc142771d59b8f1, 0x4052e07af3c1e7c9, + 0xc05bb7c65a2c6fa6, 0xc0421d224e615cd6, 0xc0258bc101675622, + 0xc0587ffa39725bce, 0xc0517f63bbf188ac, 0xc05a1ada84ea4b00, + 0x40558510354c6bc9, 0x4053ca3dfc6ae106, 0x4020c9287f66b6b2, + 0xc0595957f2bf0c64); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + // -0.0933, 0.4983, 0.5918, -0.0608, 0.0790, -0.2864, + // -0.7656, 0.4878, 0.8862, 0.4255, 0.9561, -0.7158, + // -0.3247, 0.9961, -0.4963, -0.4114 + VLOAD_16(v4, 0xadf9, 0x37f9, 0x38bc, 0xabc7, 0x2d0f, 0xb495, 0xba20, 0x37ce, + 0x3b17, 0x36cf, 0x3ba6, 0xb9ba, 0xb532, 0x3bf8, 0xb7f1, 0xb695); + double dscalar_16; + // -0.3206 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xb521); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfdiv.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + // 0.0000, -1.5547, 0.0000, 0.1896, 0.0000, 0.8936, 0.0000, + // -1.5215, 0.0000, -1.3271, 0.0000, 2.2324, 0.0000, + // -3.1074, 0.0000, 1.2832 + VCMP_U16(10, v2, 0x0, 0xbe37, 0x0, 0x3210, 0x0, 0x3b25, 0x0, 0xbe16, 0x0, + 0xbd4f, 0x0, 0x4077, 0x0, 0xc236, 0x0, 0x3d22); + + VSET(16, e32, m4); + // 0.74354362, 0.49774653, 0.25714639, 0.51635689, + // 0.74569613, 0.41876560, 0.21346331, 0.08743033, + // -0.15111920, -0.93289024, 0.08753468, -0.33427054, + // 0.06167563, -0.54564798, 0.78990245, -0.77273035 + VLOAD_32(v8, 0x3f3e58e0, 0x3efed8a2, 0x3e83a8b1, 0x3f042ff7, 0x3f3ee5f1, + 0x3ed66872, 0x3e5a9620, 0x3db30eac, 0xbe1abefe, 0xbf6ed1e5, + 0x3db34562, 0xbeab2582, 0x3d7c9f95, 0xbf0baf96, 0x3f4a370c, + 0xbf45d1a8); + double dscalar_32; + // -0.45971388 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbeeb5f9e); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfdiv.vf v4, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + // 0.00000000, -1.08273113, 0.00000000, -1.12321365, + // 0.00000000, -0.91092664, 0.00000000, -0.19018422, + // 0.00000000, 2.02928448, 0.00000000, 0.72712737, + // 0.00000000, 1.18692958, 0.00000000, 1.68089414 + VCMP_U32(11, v4, 0x0, 0xbf8a96ef, 0x0, 0xbf8fc576, 0x0, 0xbf69327c, 0x0, + 0xbe42bfa7, 0x0, 0x4001dfcc, 0x0, 0x3f3a2504, 0x0, 0x3f97ed4e, 0x0, + 0x3fd72789); + + VSET(16, e64, m8); + // -0.8580137874650531, -0.4775160339931992, + // 0.3831482495481682, -0.3582952848420831, 0.0009796501269754, + // 0.5485795361059773, -0.8055070333165963, + // -0.2632019868496005, -0.0782680309690278, + // -0.7119901734202212, -0.5084969452651125, + // -0.7586325258910223, 0.6253847342253853, + // 0.5751160060426936, 0.0609762717873976, -0.7366654110036495 + VLOAD_64(v16, 0xbfeb74d95495be72, 0xbfde8f9f69544024, 0x3fd885803c550ed0, + 0xbfd6ee4f58ad4bcc, 0x3f500cf35070c000, 0x3fe18df6abda8f8e, + 0xbfe9c6b6af995e52, 0xbfd0d84d2570b86c, 0xbfb4095fa9559400, + 0xbfe6c89f9dbd3916, 0xbfe0459b62c0f228, 0xbfe846b7b80c4834, + 0x3fe40326d89d4d44, 0x3fe26759aeab8116, 0x3faf38482a5158c0, + 0xbfe792c3570b5cc2); + double dscalar_64; + // 0.0072652319849018 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3f7dc225dc5e3c00); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfdiv.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); + // 0.0000000000000000, -65.7261922242188206, + // 0.0000000000000000, -49.3164272781202087, + // 0.0000000000000000, 75.5075044053657223, + // 0.0000000000000000, -36.2276094413188474, + // 0.0000000000000000, -97.9996474854262800, + // 0.0000000000000000, -104.4195873535172723, + // 0.0000000000000000, 79.1600333255520070, + // 0.0000000000000000, -101.3959929338175954 + VCMP_U64(12, v8, 0x0, 0xc0506e79eef36846, 0x0, 0xc048a880b0658b57, 0x0, + 0x4052e07af3c1e7c9, 0x0, 0xc0421d224e615cd6, 0x0, 0xc0587ffa39725bce, + 0x0, 0xc05a1ada84ea4b00, 0x0, 0x4053ca3dfc6ae106, 0x0, + 0xc0595957f2bf0c64); +}; + +int main(void) { + enable_vec(); + enable_fp(); + // Change RM to RTZ since there are issues with FDIV + RNE in fpnew + // Update: there are issues also with RTZ... + CHANGE_RM(RM_RTZ); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfirst.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfirst.c new file mode 100644 index 000000000..fd9615af4 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfirst.c @@ -0,0 +1,48 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() +{ + VSET(4, e32, m1); + VLOAD_32(v2, 3); + VLOAD_32(v0, 2, 0, 0, 0); + volatile uint32_t scalar = 1337; + volatile uint32_t OUP[] = {0}; + __asm__ volatile( + "vfirst.m %[A], v2, v0.t \n" + "sw %[A], (%1) \n" + : + : [A] "r"(scalar), "r"(OUP)); + XCMP(1, OUP[0], 1); +} + +void TEST_CASE2() +{ + VSET(4, e32, m1); + VLOAD_32(v2, 1, 2, 3, 4); + VLOAD_32(v0, 0, 0, 0, 0); + volatile int32_t scalar = 1337; + volatile int32_t OUP[] = {0}; + __asm__ volatile( + "vfirst.m %[A], v2, v0.t \n" + "sw %[A], (%1) \n" + : + : [A] "r"(scalar), "r"(OUP)); + XCMP(2, OUP[0], -1); +} + +int main(void) +{ + INIT_CHECK(); + enable_vec(); + enable_fp(); + TEST_CASE1(); + TEST_CASE2(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmacc.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmacc.c new file mode 100644 index 000000000..70503aa4c --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmacc.c @@ -0,0 +1,356 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values +void TEST_CASE1(void) { + VSET(16, e16, m2); + // -0.6353, -0.2290, 0.6870, -0.1031, 0.5410, 0.4211, -0.4939, + // -0.8779, -0.3213, -0.6846, 0.9229, 0.0103, -0.5068, 0.8706, + // 0.6309, -0.3054 + VLOAD_16(v4, 0xb915, 0xb354, 0x397f, 0xae9a, 0x3854, 0x36bd, 0xb7e7, 0xbb06, + 0xb524, 0xb97a, 0x3b62, 0x2142, 0xb80e, 0x3af7, 0x390c, 0xb4e3); + // -0.8042, -0.9463, 0.4431, 0.3757, -0.5259, -0.1290, 0.4697, + // 0.0952, -0.9995, 0.8823, -0.6128, -0.5010, -0.9976, 0.0081, + // 0.9746, -0.7734 + VLOAD_16(v6, 0xba6f, 0xbb92, 0x3717, 0x3603, 0xb835, 0xb021, 0x3784, 0x2e17, + 0xbbff, 0x3b0f, 0xb8e7, 0xb802, 0xbbfb, 0x2022, 0x3bcc, 0xba30); + // 0.6509, 0.3452, 0.9360, 0.3616, -0.4258, -0.0945, -0.7295, + // -0.7734, 0.3411, -0.1519, -0.3557, 0.6060, 0.2598, + // -0.0171, -0.8042, -0.4419 + VLOAD_16(v2, 0x3935, 0x3586, 0x3b7d, 0x35c9, 0xb6d0, 0xae0d, 0xb9d6, 0xba30, + 0x3575, 0xb0dc, 0xb5b1, 0x38d9, 0x3428, 0xa45e, 0xba6f, 0xb712); + asm volatile("vfmacc.vv v2, v4, v6"); + // 1.1621, 0.5620, 1.2402, 0.3228, -0.7100, -0.1489, -0.9614, + // -0.8569, 0.6621, -0.7559, -0.9209, 0.6006, 0.7651, + // -0.0100, -0.1895, -0.2057 + VCMP_U16(1, v2, 0x3ca6, 0x387f, 0x3cf6, 0x352a, 0xb9af, 0xb0c4, 0xbbb1, + 0xbadb, 0x394c, 0xba0c, 0xbb5f, 0x38ce, 0x3a1f, 0xa123, 0xb20f, + 0xb295); + + VSET(16, e32, m4); + // 0.72754014, 0.34003398, 0.70107144, -0.41727209, + // -0.52331781, -0.11821542, -0.16069038, 0.30835113, + // -0.59407759, -0.53240144, -0.92390168, 0.33251825, + // -0.45979658, 0.32465541, -0.99342769, -0.16221718 + VLOAD_32(v8, 0x3f3a4012, 0x3eae18ef, 0x3f33796b, 0xbed5a4b0, 0xbf05f828, + 0xbdf21aed, 0xbe248c05, 0x3e9de033, 0xbf181578, 0xbf084b76, + 0xbf6c84d2, 0x3eaa3fd5, 0xbeeb6a75, 0x3ea6393c, 0xbf7e5147, + 0xbe261c43); + // 0.95104939, -0.11575679, 0.13276713, 0.22784369, + // 0.93318671, -0.32301557, 0.41414812, 0.81797487, + // -0.21847244, -0.00211347, -0.72070456, -0.58624452, + // 0.07381243, -0.16745377, 0.55389816, -0.23427610 + VLOAD_32(v12, 0x3f7377f9, 0xbded11e6, 0x3e07f41b, 0x3e694fdb, 0x3f6ee553, + 0xbea5624c, 0x3ed40b39, 0x3f5166cd, 0xbe5fb73d, 0xbb0a8224, + 0xbf388018, 0xbf16141f, 0x3d972af9, 0xbe2b7900, 0x3f0dcc45, + 0xbe6fe613); + // -0.07459558, -0.00461283, -0.97654468, 0.94394064, + // 0.24971253, 0.97819000, 0.55116856, -0.97427863, 0.61764765, + // 0.86367106, 0.48787504, -0.26353455, -0.22228357, 0.40454853, + // 0.64000225, -0.51787829 + VLOAD_32(v4, 0xbd98c591, 0xbb97273a, 0xbf79fed5, 0x3f71a618, 0x3e7fb4a4, + 0x3f7a6aa9, 0x3f0d1962, 0xbf796a53, 0x3f1e1e28, 0x3f5d198c, + 0x3ef9cac2, 0xbe86ee00, 0xbe639e4e, 0x3ecf20fc, 0x3f23d730, + 0xbf0493ac); + asm volatile("vfmacc.vv v4, v8, v12"); + // 0.61733103, -0.04397407, -0.88346541, 0.84886783, + // -0.23864070, 1.01637542, 0.48461893, -0.72205520, + // 0.74743724, 0.86479628, 1.15373516, -0.45847154, + // -0.25622228, 0.35018376, 0.08974451, -0.47987467 + VCMP_U32(2, v4, 0x3f1e0968, 0xbd341e29, 0xbf622aca, 0x3f594f67, 0xbe745e3a, + 0x3f821897, 0x3ef81ff9, 0xbf38d89b, 0x3f3f580c, 0x3f5d634a, + 0x3f93ad98, 0xbeeabcc8, 0xbe832f91, 0x3eb34b49, 0x3db7cbf5, + 0xbef5b222); + + VSET(16, e64, m8); + // -0.8992497708533775, 0.5795977429472710, -0.9421852470430045, + // 0.3407052467776674, -0.1137141395145149, 0.3284679540868891, + // 0.9781857174570949, 0.6033619236526551, -0.1287683269222892, + // 0.6555379481826638, 0.6785468173738887, 0.6923267883951645, + // 0.2185923779321672, -0.1310544396012536, -0.7596952716763763, + // -0.4011231994121780, + VLOAD_64(v16, 0xbfecc6a774980626, 0x3fe28c1090d967fc, 0xbfee2661acda592c, + 0x3fd5ce1d611f1590, 0xbfbd1c5eae4ec060, 0x3fd5059e742594fc, + 0x3fef4d4c223c8f84, 0x3fe34ebdaa37ac76, 0xbfc07b7b047228c0, + 0x3fe4fa2ab8176850, 0x3fe5b6a7d0ad9fa2, 0x3fe6278a8249a986, + 0x3fcbfad5c52fcfd8, 0xbfc0c664520a9f78, 0xbfe84f6c7558d3f0, + 0xbfd9ac00a3c919a8); + // 0.3028184794479449, 0.5016121947684244, 0.1900289524299839, + // 0.3294240614689632, 0.5945396967575391, -0.8758223026547887, + // 0.3719808177193829, 0.9159354723876536, 0.0805670751146079, + // 0.1775335284298603, -0.7021940272509897, 0.9279338928738479, + // -0.7358371767028979, 0.2529700403354449, + // -0.8333759771774525, -0.4016540133317048, + VLOAD_64(v24, 0x3fd36160c2769da4, 0x3fe00d350479c3ea, 0x3fc852de63fd6e08, + 0x3fd51548a8a19488, 0x3fe306781d37ea9a, 0xbfec06bc7e604fb8, + 0x3fd7ce88a1b60584, 0x3fed4f57e864d750, 0x3fb4a00b38c069f0, + 0x3fc6b96b2d465dc0, 0xbfe6785f9bcfaa42, 0x3fedb1a26b57c7d6, + 0xbfe78bfa6823d662, 0x3fd030a94086f244, 0xbfeaab0418e7f974, + 0xbfd9b4b308e446c8); + // -0.0664052564688480, -0.6742544994800144, 0.4321518669568931, + // -0.1627512425330113, 0.0193121553139675, -0.3517684494272582, + // -0.4834881433176264, 0.8328623424117183, 0.0264604353835154, + // 0.0322804237161178, -0.8345203693668675, 0.7175251091228996, + // -0.7419013213335950, -0.2977694001417877, 0.4556506623709609, + // -0.7832443836668095, + VLOAD_64(v8, 0xbfb0ffef54d0f220, 0xbfe5937e2c0e5202, 0x3fdba8604ddf0d80, + 0xbfc4d508600804d8, 0x3f93c690cdf47e40, 0xbfd6835fd0838044, + 0xbfdef17840e363cc, 0x3feaa6ceed574e1a, 0x3f9b1871c270c340, + 0x3fa0870f4852d0c0, 0xbfeab4640fc8d962, 0x3fe6f5f737b7bbe2, + 0xbfe7bda7d6ff9552, 0xbfd30ea762d6f1ec, 0x3fdd296165522d4c, + 0xbfe910568693fcea); + asm volatile("vfmacc.vv v8, v16, v24"); + // -0.3387147047225807, -0.3835212035574087, 0.2531093914663254, + // -0.0505147363757267, -0.0482954147100367, -0.6394480093239447, + // -0.1196218202565150, 1.3855029309732363, 0.0160859479159849, + // 0.1486603886766570, -1.3109918917369803, 1.3599586010192732, + // -0.9027497195599737, -0.3309222470138559, 1.0887624517613512, + // -0.6221316407824544, + VCMP_U64(3, v8, 0xbfd5ad8070dd4c48, 0xbfd88b9c84a68118, 0x3fd032f1bbaa2211, + 0xbfa9dd1149664d37, 0xbfa8ba2d3573e621, 0xbfe4765babf13c96, + 0xbfbe9f891de3c4d6, 0x3ff62b051f10acd5, 0x3f9078d5b0e5b2ba, + 0x3fc3074db9c9d78e, 0xbff4f9d2a2454dd5, 0x3ff5c263f334aac4, + 0xbfece353613f76db, 0xbfd52dd4811c5fc3, 0x3ff16b922d36d831, + 0xbfe3e8809d5ef572); +}; + +// Simple random test with similar values (masked, the numbers are taken from +// TEST_CASE1) +void TEST_CASE2(void) { + VSET(16, e16, m2); + VLOAD_16(v4, 0xb915, 0xb354, 0x397f, 0xae9a, 0x3854, 0x36bd, 0xb7e7, 0xbb06, + 0xb524, 0xb97a, 0x3b62, 0x2142, 0xb80e, 0x3af7, 0x390c, 0xb4e3); + VLOAD_16(v6, 0xba6f, 0xbb92, 0x3717, 0x3603, 0xb835, 0xb021, 0x3784, 0x2e17, + 0xbbff, 0x3b0f, 0xb8e7, 0xb802, 0xbbfb, 0x2022, 0x3bcc, 0xba30); + VLOAD_16(v2, 0x3935, 0x3586, 0x3b7d, 0x35c9, 0xb6d0, 0xae0d, 0xb9d6, 0xba30, + 0x3575, 0xb0dc, 0xb5b1, 0x38d9, 0x3428, 0xa45e, 0xba6f, 0xb712); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vfmacc.vv v2, v4, v6, v0.t"); + VCMP_U16(4, v2, 0x3935, 0x387f, 0x3b7d, 0x352a, 0xb6d0, 0xb0c4, 0xb9d6, + 0xbadb, 0x3575, 0xba0c, 0xb5b1, 0x38ce, 0x3428, 0xa123, 0xba6f, + 0xb295); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x3f3a4012, 0x3eae18ef, 0x3f33796b, 0xbed5a4b0, 0xbf05f828, + 0xbdf21aed, 0xbe248c05, 0x3e9de033, 0xbf181578, 0xbf084b76, + 0xbf6c84d2, 0x3eaa3fd5, 0xbeeb6a75, 0x3ea6393c, 0xbf7e5147, + 0xbe261c43); + VLOAD_32(v12, 0x3f7377f9, 0xbded11e6, 0x3e07f41b, 0x3e694fdb, 0x3f6ee553, + 0xbea5624c, 0x3ed40b39, 0x3f5166cd, 0xbe5fb73d, 0xbb0a8224, + 0xbf388018, 0xbf16141f, 0x3d972af9, 0xbe2b7900, 0x3f0dcc45, + 0xbe6fe613); + VLOAD_32(v4, 0xbd98c591, 0xbb97273a, 0xbf79fed5, 0x3f71a618, 0x3e7fb4a4, + 0x3f7a6aa9, 0x3f0d1962, 0xbf796a53, 0x3f1e1e28, 0x3f5d198c, + 0x3ef9cac2, 0xbe86ee00, 0xbe639e4e, 0x3ecf20fc, 0x3f23d730, + 0xbf0493ac); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vfmacc.vv v4, v8, v12, v0.t"); + VCMP_U32(5, v4, 0xbd98c591, 0xbd341e29, 0xbf79fed5, 0x3f594f67, 0x3e7fb4a4, + 0x3f821897, 0x3f0d1962, 0xbf38d89b, 0x3f1e1e28, 0x3f5d634a, + 0x3ef9cac2, 0xbeeabcc8, 0xbe639e4e, 0x3eb34b49, 0x3f23d730, + 0xbef5b222); + + VSET(16, e64, m8); + VLOAD_64(v24, 0xbfecc6a774980626, 0x3fe28c1090d967fc, 0xbfee2661acda592c, + 0x3fd5ce1d611f1590, 0xbfbd1c5eae4ec060, 0x3fd5059e742594fc, + 0x3fef4d4c223c8f84, 0x3fe34ebdaa37ac76, 0xbfc07b7b047228c0, + 0x3fe4fa2ab8176850, 0x3fe5b6a7d0ad9fa2, 0x3fe6278a8249a986, + 0x3fcbfad5c52fcfd8, 0xbfc0c664520a9f78, 0xbfe84f6c7558d3f0, + 0xbfd9ac00a3c919a8); + VLOAD_64(v16, 0x3fd36160c2769da4, 0x3fe00d350479c3ea, 0x3fc852de63fd6e08, + 0x3fd51548a8a19488, 0x3fe306781d37ea9a, 0xbfec06bc7e604fb8, + 0x3fd7ce88a1b60584, 0x3fed4f57e864d750, 0x3fb4a00b38c069f0, + 0x3fc6b96b2d465dc0, 0xbfe6785f9bcfaa42, 0x3fedb1a26b57c7d6, + 0xbfe78bfa6823d662, 0x3fd030a94086f244, 0xbfeaab0418e7f974, + 0xbfd9b4b308e446c8); + VLOAD_64(v8, 0xbfb0ffef54d0f220, 0xbfe5937e2c0e5202, 0x3fdba8604ddf0d80, + 0xbfc4d508600804d8, 0x3f93c690cdf47e40, 0xbfd6835fd0838044, + 0xbfdef17840e363cc, 0x3feaa6ceed574e1a, 0x3f9b1871c270c340, + 0x3fa0870f4852d0c0, 0xbfeab4640fc8d962, 0x3fe6f5f737b7bbe2, + 0xbfe7bda7d6ff9552, 0xbfd30ea762d6f1ec, 0x3fdd296165522d4c, + 0xbfe910568693fcea); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vfmacc.vv v8, v16, v24, v0.t"); + VCMP_U64(6, v8, 0xbfb0ffef54d0f220, 0xbfd88b9c84a68118, 0x3fdba8604ddf0d80, + 0xbfa9dd1149664d37, 0x3f93c690cdf47e40, 0xbfe4765babf13c96, + 0xbfdef17840e363cc, 0x3ff62b051f10acd5, 0x3f9b1871c270c340, + 0x3fc3074db9c9d78e, 0xbfeab4640fc8d962, 0x3ff5c263f334aac4, + 0xbfe7bda7d6ff9552, 0xbfd52dd4811c5fc3, 0x3fdd296165522d4c, + 0xbfe3e8809d5ef572); +}; + +// Simple random test with similar values (with scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + // 0.7407, -0.1365, 0.0000, -0.8525, -0.0812, 0.9609, -0.3740, + // 0.2800, 0.9692, 0.4045, 0.0205, -0.5503, 0.6499, 0.4470, + // -0.9360, -0.4426 + VLOAD_16(v4, 0x39ed, 0xb05e, 0x0000, 0xbad2, 0xad33, 0x3bb0, 0xb5fc, 0x347b, + 0x3bc1, 0x3679, 0x253e, 0xb867, 0x3933, 0x3727, 0xbb7d, 0xb715); + double dscalar_16; + // 0.5757 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x389b); + // -0.1472, -0.8906, 0.2247, 0.6118, -0.0908, -0.6450, -0.5415, + // 0.0505, -0.4595, 0.1157, -0.3494, 0.6670, -0.9658, -0.2944, + // -0.8096, -0.3364 + VLOAD_16(v2, 0xb0b6, 0xbb20, 0x3331, 0x38e5, 0xadcf, 0xb929, 0xb855, 0x2a77, + 0xb75a, 0x2f68, 0xb597, 0x3956, 0xbbba, 0xb4b6, 0xba7a, 0xb562); + asm volatile("vfmacc.vf v2, %[A], v4" ::[A] "f"(dscalar_16)); + // 0.2793, -0.9692, 0.2247, 0.1210, -0.1375, -0.0918, -0.7568, + // 0.2118, 0.0986, 0.3486, -0.3376, 0.3501, -0.5918, -0.0371, + // -1.3486, -0.5913 + VCMP_U16(7, v2, 0x3478, 0xbbc1, 0x3331, 0x2fbf, 0xb067, 0xade0, 0xba0e, + 0x32c6, 0x2e4e, 0x3594, 0xb567, 0x359a, 0xb8bc, 0xa8bf, 0xbd65, + 0xb8bb); + + VSET(16, e32, m4); + // -0.79164708, -0.13258822, -0.94492996, -0.93729085, + // 0.80344391, 0.77393818, 0.31253836, -0.42539355, + // -0.20085664, -0.63946086, 0.24876182, -0.45639724, + // 0.92842573, 0.39117134, -0.70563781, 0.13946204 + VLOAD_32(v8, 0xbf4aa962, 0xbe07c535, 0xbf71e6ee, 0xbf6ff24b, 0x3f4dae80, + 0x3f4620d0, 0x3ea00507, 0xbed9cd2f, 0xbe4dad5d, 0xbf23b3b5, + 0x3e7ebb6b, 0xbee9ace6, 0x3f6dad4f, 0x3ec8479c, 0xbf34a4ae, + 0x3e0ecf23); + double dscalar_32; + // 0.97630060 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f79eed6); + // -0.43768027, -0.74227923, 0.60234988, 0.43624315, + // 0.34759882, 0.65410614, 0.99296939, -0.31534156, + // -0.89647168, 0.47623411, -0.68185741, 0.77072626, + // 0.19827089, -0.16254151, 0.81625229, -0.24369264 + VLOAD_32(v4, 0xbee017a1, 0xbf3e0603, 0x3f1a339a, 0x3edf5b43, 0x3eb1f879, + 0x3f277380, 0x3f7e333e, 0xbea17473, 0xbf657f2b, 0x3ef3d4f5, + 0xbf2e8e35, 0x3f454e51, 0x3e4b0786, 0xbe267148, 0x3f50f5e9, + 0xbe798a90); + asm volatile("vfmacc.vf v4, %[A], v8" ::[A] "f"(dscalar_32)); + // -1.21056581, -0.87172520, -0.32018578, + // -0.47883448, 1.13200164, 1.40970242, 1.29810071, + // -0.73065352, -1.09256816, -0.14807191, -0.43899110, + // 0.32514536, 1.10469353, 0.21935931, 0.12733769, -0.10753576 + VCMP_U32(8, v4, 0xbf9af3d2, 0xbf5f2962, 0xbea3ef65, 0xbef529cb, 0x3f90e56e, + 0x3fb47121, 0x3fa6282b, 0xbf3b0c1c, 0xbf8bd946, 0xbe17a02a, + 0xbee0c371, 0x3ea67974, 0x3f8d6699, 0x3e609fb9, 0x3e0264cf, + 0xbddc3bb6); + + VSET(16, e64, m8); + // -0.1981785436218435, 0.2324321764718080, 0.3529425082887112, + // -0.4889737836823891, 0.1335009259637479, -0.7964186221277452, + // -0.2707335519445100, 0.8070543770008602, -0.1237072120160827, + // -0.2357903062216291, -0.0812498320849093, 0.8656662449573254, + // 0.7178262144151533, -0.3106178959409680, -0.1410836751949509, + // 0.6904294937898030 + VLOAD_64(v16, 0xbfc95dea1dcff710, 0x3fcdc0566a3e04a0, 0x3fd6969c2c9df760, + 0xbfdf4b58b2611a74, 0x3fc1168eef800078, 0xbfe97c42e7fed97a, + 0xbfd153b2d1e20588, 0x3fe9d363b369fec4, 0xbfbfab469de36f10, + 0xbfce2e6072f7c5c0, 0xbfb4ccc9fb9c3490, 0x3febb389b26af886, + 0x3fe6f86eae63fc74, 0xbfd3e129e2279a3c, 0xbfc20f07a57b1c48, + 0x3fe617ff9800ac5a); + double dscalar_64; + // 0.8738839355493300 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3febf6db7175e482); + // -0.9433584234417285, -0.0696473591160720, -0.8171557896146857, + // -0.9495656113293445, -0.6353537919969880, 0.8159507202507001, + // 0.0288919190409849, -0.6024741558584952, -0.9583084411212592, + // 0.7665070398551490, -0.7817863527411446, -0.2155326059803253, + // -0.7807395886866346, 0.2528540140694266, -0.1740695080779533, + // 0.7247829241803623 + VLOAD_64(v8, 0xbfee2ffe0122d3b6, 0xbfb1d468c9a80310, 0xbfea2623e6043a6c, + 0xbfee62d76bc21ae2, 0xbfe454d179c08866, 0x3fea1c44af53fb1a, + 0x3f9d95d7dd994d80, 0xbfe34777e1831e42, 0xbfeeaa7676c316f0, + 0x3fe88739c58a9cbe, 0xbfe90464d02f6f4c, 0xbfcb96928af41d88, + 0xbfe8fbd197034034, 0x3fd02ec29a45caf0, 0xbfc647e8de367aa0, + 0x3fe7316bf581b994); + asm volatile("vfmacc.vf v8, %[A], v16" ::[A] "f"(dscalar_64)); + // -1.1165434690834197, 0.1334713860074080, -0.5087250014486948, + // -1.3768719457941574, -0.5186894774163083, 0.1199732804009315, + // -0.2076977828175325, 0.1027976993173292, -1.0664141864137089, + // 0.5604536790898100, -0.8527892757662274, 0.5409592190351925, + // -0.1534427913930433, -0.0185899752875187, -0.2973602653990804, + // 1.3281381674327271 + VCMP_U64(9, v8, 0xbff1dd5caf44692a, 0x3fc1159722ed4311, 0xbfe04779a77c2679, + 0xbff607aae09f73e1, 0xbfe0991aacc90937, 0x3fbeb691a3b74133, + 0xbfca95d7485395ec, 0x3fba50f334ac0644, 0xbff11008526a327e, + 0x3fe1ef3c8dd3a2b9, 0xbfeb4a0cbc397482, 0x3fe14f89b5473a2f, + 0xbfc3a4036d6b8775, 0xbf9309401f92c802, 0xbfd307f359c13629, + 0x3ff5400dce9b1643); +}; + +// Simple random test with similar values (masked with scalar, values taken from +// TEST_CASE3) +void TEST_CASE4(void) { + VSET(16, e16, m2); + VLOAD_16(v4, 0x39ed, 0xb05e, 0x0000, 0xbad2, 0xad33, 0x3bb0, 0xb5fc, 0x347b, + 0x3bc1, 0x3679, 0x253e, 0xb867, 0x3933, 0x3727, 0xbb7d, 0xb715); + double dscalar_16; + BOX_HALF_IN_DOUBLE(dscalar_16, 0x389b); + VLOAD_16(v2, 0xb0b6, 0xbb20, 0x3331, 0x38e5, 0xadcf, 0xb929, 0xb855, 0x2a77, + 0xb75a, 0x2f68, 0xb597, 0x3956, 0xbbba, 0xb4b6, 0xba7a, 0xb562); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vfmacc.vf v2, %[A], v4, v0.t" ::[A] "f"(dscalar_16)); + VCMP_U16(10, v2, 0xb0b6, 0xbbc1, 0x3331, 0x2fbf, 0xadcf, 0xade0, 0xb855, + 0x32c6, 0xb75a, 0x3594, 0xb597, 0x359a, 0xbbba, 0xa8bf, 0xba7a, + 0xb8bb); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xbf4aa962, 0xbe07c535, 0xbf71e6ee, 0xbf6ff24b, 0x3f4dae80, + 0x3f4620d0, 0x3ea00507, 0xbed9cd2f, 0xbe4dad5d, 0xbf23b3b5, + 0x3e7ebb6b, 0xbee9ace6, 0x3f6dad4f, 0x3ec8479c, 0xbf34a4ae, + 0x3e0ecf23); + double dscalar_32; + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f79eed6); + VLOAD_32(v4, 0xbee017a1, 0xbf3e0603, 0x3f1a339a, 0x3edf5b43, 0x3eb1f879, + 0x3f277380, 0x3f7e333e, 0xbea17473, 0xbf657f2b, 0x3ef3d4f5, + 0xbf2e8e35, 0x3f454e51, 0x3e4b0786, 0xbe267148, 0x3f50f5e9, + 0xbe798a90); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vfmacc.vf v4, %[A], v8, v0.t" ::[A] "f"(dscalar_32)); + VCMP_U32(11, v4, 0xbee017a1, 0xbf5f2962, 0x3f1a339a, 0xbef529cb, 0x3eb1f879, + 0x3fb47121, 0x3f7e333e, 0xbf3b0c1c, 0xbf657f2b, 0xbe17a02a, + 0xbf2e8e35, 0x3ea67974, 0x3e4b0786, 0x3e609fb9, 0x3f50f5e9, + 0xbddc3bb6); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xbfc95dea1dcff710, 0x3fcdc0566a3e04a0, 0x3fd6969c2c9df760, + 0xbfdf4b58b2611a74, 0x3fc1168eef800078, 0xbfe97c42e7fed97a, + 0xbfd153b2d1e20588, 0x3fe9d363b369fec4, 0xbfbfab469de36f10, + 0xbfce2e6072f7c5c0, 0xbfb4ccc9fb9c3490, 0x3febb389b26af886, + 0x3fe6f86eae63fc74, 0xbfd3e129e2279a3c, 0xbfc20f07a57b1c48, + 0x3fe617ff9800ac5a); + double dscalar_64; + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3febf6db7175e482); + VLOAD_64(v8, 0xbfee2ffe0122d3b6, 0xbfb1d468c9a80310, 0xbfea2623e6043a6c, + 0xbfee62d76bc21ae2, 0xbfe454d179c08866, 0x3fea1c44af53fb1a, + 0x3f9d95d7dd994d80, 0xbfe34777e1831e42, 0xbfeeaa7676c316f0, + 0x3fe88739c58a9cbe, 0xbfe90464d02f6f4c, 0xbfcb96928af41d88, + 0xbfe8fbd197034034, 0x3fd02ec29a45caf0, 0xbfc647e8de367aa0, + 0x3fe7316bf581b994); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vfmacc.vf v8, %[A], v16, v0.t" ::[A] "f"(dscalar_64)); + VCMP_U64(12, v8, 0xbfee2ffe0122d3b6, 0x3fc1159722ed4311, 0xbfea2623e6043a6c, + 0xbff607aae09f73e1, 0xbfe454d179c08866, 0x3fbeb691a3b74133, + 0x3f9d95d7dd994d80, 0x3fba50f334ac0644, 0xbfeeaa7676c316f0, + 0x3fe1ef3c8dd3a2b9, 0xbfe90464d02f6f4c, 0x3fe14f89b5473a2f, + 0xbfe8fbd197034034, 0xbf9309401f92c802, 0xbfc647e8de367aa0, + 0x3ff5400dce9b1643); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmadd.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmadd.c new file mode 100644 index 000000000..d7d816f0c --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmadd.c @@ -0,0 +1,433 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values + 1 subnormal +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 0.3501, -0.3289, -0.8853, -0.4082, -0.4346, -0.2659, 0.9316, + // 0.5444, -0.0538, 0.7686, 0.8203, -0.8623, 0.3059, 0.0372, + // 0.5337, -0.5815 + VLOAD_16(v4, 0x359a, 0xb543, 0xbb15, 0xb688, 0xb6f4, 0xb441, 0x3b74, 0x385b, + 0xaae4, 0x3a26, 0x3a90, 0xbae6, 0x34e5, 0x28c4, 0x3845, 0xb8a7); + // -0.8105, 0.5000, -0.8374, -0.8394, 0.3098, 0.1328, -0.2864, + // -0.4041, -0.1729, 0.0196, 0.2739, 0.8071, -0.1553, 0.2815, + // -0.9067, -0.2495 + VLOAD_16(v6, 0xba7c, 0x3800, 0xbab3, 0xbab7, 0x34f5, 0x3040, 0xb495, 0xb677, + 0xb188, 0x2502, 0x3462, 0x3a75, 0xb0f8, 0x3481, 0xbb41, 0xb3fc); + // -0.6558, -0.1006, 0.4558, -0.0784, 0.1539, 0.6748, 0.3347, + // -0.3416, 0.0614, 0.2289, -0.0829, 0.3838, -0.6348, 0.0843, + // -0.6890, -0.2598 + VLOAD_16(v2, 0xb93f, 0xae71, 0x374b, 0xad05, 0x30ed, 0x3966, 0x355b, 0xb577, + 0x2bdc, 0x3353, 0xad4f, 0x3624, 0xb914, 0x2d65, 0xb983, 0xb428); + asm volatile("vfmadd.vv v2, v4, v6"); + // -1.0400, 0.5332, -1.2412, -0.8071, 0.2429, -0.0466, 0.0254, + // -0.5898, -0.1761, 0.1954, 0.2058, 0.4761, -0.3496, 0.2847, + // -1.2744, -0.0984 + VCMP_U16(1, v2, 0xbc29, 0x3844, 0xbcf7, 0xba75, 0x33c6, 0xa9f7, 0x2684, + 0xb8b8, 0xb1a3, 0x3241, 0x3297, 0x379e, 0xb597, 0x348e, 0xbd19, + 0xae4d); + + VSET(16, e32, m4); + // -0.20637949, -0.63321692, 0.40850523, 0.58702314, + // -0.25534528, -0.22053087, 0.96057665, 0.85530519, + // 0.74252450, -0.87175107, -0.00987994, -0.52556008, 0.26113954, + // -0.71307814, 0.78942811, 0.48685852 + VLOAD_32(v8, 0xbe535525, 0xbf221a81, 0x3ed12799, 0x3f164726, 0xbe82bc9e, + 0xbe61d2d8, 0x3f75e85a, 0x3f5af548, 0x3f3e1616, 0xbf5f2b14, + 0xbc21df78, 0xbf068b1b, 0x3e85b415, 0xbf368c4a, 0x3f4a17f6, + 0x3ef94585); + // -0.15712014, 0.83088422, 0.57509524, 0.85365236, + // -0.96695948, 0.71368766, 0.23281342, -0.67807233, + // 0.79363507, 0.62817359, 0.37205252, 0.27726358, + // -0.85021532, -0.16634122, -0.58148408, 0.06963744 + VLOAD_32(v12, 0xbe20e41a, 0x3f54b4d4, 0x3f133971, 0x3f5a88f6, 0xbf778aa8, + 0x3f36b43c, 0x3e6e66a4, 0xbf2d9626, 0x3f4b2bab, 0x3f20cffc, + 0x3ebe7dab, 0x3e8df57e, 0xbf59a7b6, 0xbe2a555a, 0xbf14dc24, + 0x3d8e9e13); + // -0.63061494, 0.57643133, 0.08198822, -0.06029604, + // -0.84276563, 0.00681775, 0.30881208, 0.27571887, + // 0.12349209, 0.29805747, -0.55497122, -0.52685922, 0.82809180, + // -0.83231467, 0.20959182, 0.15603130 + VLOAD_32(v4, 0xbf216ffb, 0x3f139101, 0x3da7e970, 0xbd76f8fa, 0xbf57bf7d, + 0x3bdf676d, 0x3e9e1c9e, 0x3e8d2b06, 0x3dfce96c, 0x3e989afd, + 0xbf0e1298, 0xbf06e03f, 0x3f53fdd3, 0xbf551293, 0x3e569f3d, + 0x3e1fc6ab); + asm volatile("vfmadd.vv v4, v8, v12"); + // -0.02697416, 0.46587816, 0.60858786, 0.81825721, + // -0.75176322, 0.71218413, 0.52945113, -0.44224855, + // 0.88533098, 0.36834168, 0.37753561, 0.55415976, + // -0.63396782, 0.42716417, -0.41602641, 0.14560261 + VCMP_U32(2, v4, 0xbcdcf8e5, 0x3eee8795, 0x3f1bcc6a, 0x3f51794e, 0xbf40738e, + 0x3f3651b3, 0x3f078a1b, 0xbee26e67, 0x3f62a50d, 0x3ebc9748, + 0x3ec14c59, 0x3f0ddd6a, 0xbf224bb7, 0x3edab544, 0xbed5016a, + 0x3e1518d9); + + VSET(16, e64, m8); + // 0.0308264568094008, 0.5865382185158325, 0.4543411851187289, + // 0.0036656924511687, -0.3103508259554966, 0.9658177901158624, + // -0.3381631341283657, -0.2003719333831677, 0.8989532087589025, + // -0.8054516243685412, 0.8701363884969631, + // -0.3585976675814562, 0.4150155349314333, + // -0.6908185611649824, 0.8412555125501906, -0.3357469205066645 + VLOAD_64(v16, 0x3f9f90f87f644880, 0x3fe2c4ebcc4c25b4, 0x3fdd13ed0cd3e484, + 0x3f6e0783a63d2400, 0xbfd3dcc9b5f0fd10, 0x3feee7fab5ce29f4, + 0xbfd5a476fc72d40c, 0xbfc9a5c99a756020, 0x3fecc43985081eb2, + 0xbfe9c6427c2588e6, 0x3febd8284474eda0, 0xbfd6f343a1abca7c, + 0x3fda8f9d51773268, 0xbfe61b2f86f58c4a, 0x3feaeb90ae3f72fc, + 0xbfd57ce0a6d3c3f8); + // 0.5881481456806663, -0.9882550591195853, -0.8483939717953815, + // -0.9684864200393222, -0.6743741213041285, 0.4372709704288931, + // -0.8339944484196176, -0.1519222509233684, + // -0.2540075520951832, 0.6661048539265222, 0.3013290199421905, + // -0.0367795249610035, -0.7178804756969177, 0.1577316726139908, + // -0.1242681642824526, -0.9006297759672148 + VLOAD_64(v24, 0x3fe2d21c0f5cd922, 0xbfef9fc912e0ce28, 0xbfeb260b1d5f82be, + 0xbfeefdd73b960c5a, 0xbfe594790988a396, 0x3fdbfc3f615edda8, + 0xbfeab01520204008, 0xbfc3723035a012c8, 0xbfd041a8e44be49c, + 0x3fe550bb206a47d8, 0x3fd348f9837f3238, 0xbfa2d4c411bd66e0, + 0xbfe6f8e079b1c412, 0x3fc4308d2bb5d1f0, 0xbfbfd009d586ef50, + 0xbfecd1f58932a7e4); + // -0.8344616273245185, 0.7077884806720691, -0.1882041492960900, + // -0.2751607560371576, 0.2338395078923734, -0.9938305657796487, + // -0.5345602642671559, 0.0887204597208056, 0.3045224871958914, + // -0.3946645040604191, 0.6818539464440989, 0.9719861381061521, + // -0.8471643748461517, 0.8077493118513845, 0.2789872574353331, + // 0.7073875082318823 + VLOAD_64(v8, 0xbfeab3e8dee4061e, 0x3fe6a634071f1b28, 0xbfc81712d5195ee0, + 0xbfd19c3bdc149f5c, 0x3fcdee73f7748a88, 0xbfefcd75c2393d96, + 0xbfe11b1e209897fa, 0x3fb6b66250fca870, 0x3fd37d4be2d9c9a4, + 0xbfd9422ee8753844, 0x3fe5d1bf5e1407b4, 0x3fef1a82ac6a99b4, + 0xbfeb1bf876899dc0, 0x3fe9d91515b8951c, 0x3fd1daed5eabdf0c, + 0x3fe6a2eb20ae8e42); + asm volatile("vfmadd.vv v8, v16, v24"); + // 0.5624246503668447, -0.5731100645801621, -0.9339028680308291, + // -0.9694950747455855, -0.7469464057195535, + // -0.5225882703620045, -0.6532258740745487, + // -0.1696993409682697, 0.0197439149088051, 0.9839880198025914, + // 0.8946349503834604, -0.3853314870073767, -1.0694668518985466, + // -0.4002765447811873, 0.1104314039662806, -1.1381329534609521 + VCMP_U64(3, v8, 0x3fe1ff61faf9464f, 0xbfe256eaeb0c2af6, 0xbfede288447aa80f, + 0xbfef061a88f54aac, 0xbfe7e6fc260dc471, 0xbfe0b90b094f4be7, + 0xbfe4e739f2c1a370, 0xbfc5b8b53fce44b1, 0x3f9437bfb3503463, + 0x3fef7cd47196a75e, 0x3feca0d979b82d6d, 0xbfd8a94565b434f4, + 0xbff11c894610f720, 0xbfd99e21834c3aa4, 0x3fbc453b847c8ddf, + 0xbff235cae659d5ba); +}; + +// Simple random test with similar values + 1 subnormal (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // 0.3501, -0.3289, -0.8853, -0.4082, -0.4346, -0.2659, 0.9316, + // 0.5444, -0.0538, 0.7686, 0.8203, -0.8623, 0.3059, 0.0372, + // 0.5337, -0.5815 + VLOAD_16(v4, 0x359a, 0xb543, 0xbb15, 0xb688, 0xb6f4, 0xb441, 0x3b74, 0x385b, + 0xaae4, 0x3a26, 0x3a90, 0xbae6, 0x34e5, 0x28c4, 0x3845, 0xb8a7); + // -0.8105, 0.5000, -0.8374, -0.8394, 0.3098, 0.1328, -0.2864, + // -0.4041, -0.1729, 0.0196, 0.2739, 0.8071, -0.1553, 0.2815, + // -0.9067, -0.2495 + VLOAD_16(v6, 0xba7c, 0x3800, 0xbab3, 0xbab7, 0x34f5, 0x3040, 0xb495, 0xb677, + 0xb188, 0x2502, 0x3462, 0x3a75, 0xb0f8, 0x3481, 0xbb41, 0xb3fc); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.6558, -0.1006, 0.4558, -0.0784, 0.1539, 0.6748, 0.3347, + // -0.3416, 0.0614, 0.2289, -0.0829, 0.3838, -0.6348, 0.0843, + // -0.6890, -0.2598 + VLOAD_16(v2, 0xb93f, 0xae71, 0x374b, 0xad05, 0x30ed, 0x3966, 0x355b, 0xb577, + 0x2bdc, 0x3353, 0xad4f, 0x3624, 0xb914, 0x2d65, 0xb983, 0xb428); + asm volatile("vfmadd.vv v2, v4, v6, v0.t"); + VCMP_U16(4, v2, 0xb93f, 0x3844, 0x374b, 0xba75, 0x30ed, 0xa9f7, 0x355b, + 0xb8b8, 0x2bdc, 0x3241, 0xad4f, 0x379e, 0xb914, 0x348e, 0xb983, + 0xae4d); + + VSET(16, e32, m4); + // -0.20637949, -0.63321692, 0.40850523, 0.58702314, + // -0.25534528, -0.22053087, 0.96057665, 0.85530519, + // 0.74252450, -0.87175107, -0.00987994, -0.52556008, 0.26113954, + // -0.71307814, 0.78942811, 0.48685852 + VLOAD_32(v8, 0xbe535525, 0xbf221a81, 0x3ed12799, 0x3f164726, 0xbe82bc9e, + 0xbe61d2d8, 0x3f75e85a, 0x3f5af548, 0x3f3e1616, 0xbf5f2b14, + 0xbc21df78, 0xbf068b1b, 0x3e85b415, 0xbf368c4a, 0x3f4a17f6, + 0x3ef94585); + // -0.15712014, 0.83088422, 0.57509524, 0.85365236, + // -0.96695948, 0.71368766, 0.23281342, -0.67807233, + // 0.79363507, 0.62817359, 0.37205252, 0.27726358, + // -0.85021532, -0.16634122, -0.58148408, 0.06963744 + VLOAD_32(v12, 0xbe20e41a, 0x3f54b4d4, 0x3f133971, 0x3f5a88f6, 0xbf778aa8, + 0x3f36b43c, 0x3e6e66a4, 0xbf2d9626, 0x3f4b2bab, 0x3f20cffc, + 0x3ebe7dab, 0x3e8df57e, 0xbf59a7b6, 0xbe2a555a, 0xbf14dc24, + 0x3d8e9e13); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.63061494, 0.57643133, 0.08198822, -0.06029604, + // -0.84276563, 0.00681775, 0.30881208, 0.27571887, + // 0.12349209, 0.29805747, -0.55497122, -0.52685922, 0.82809180, + // -0.83231467, 0.20959182, 0.15603130 + VLOAD_32(v4, 0xbf216ffb, 0x3f139101, 0x3da7e970, 0xbd76f8fa, 0xbf57bf7d, + 0x3bdf676d, 0x3e9e1c9e, 0x3e8d2b06, 0x3dfce96c, 0x3e989afd, + 0xbf0e1298, 0xbf06e03f, 0x3f53fdd3, 0xbf551293, 0x3e569f3d, + 0x3e1fc6ab); + asm volatile("vfmadd.vv v4, v8, v12, v0.t"); + VCMP_U32(5, v4, 0xbf216ffb, 0x3eee8795, 0x3da7e970, 0x3f51794e, 0xbf57bf7d, + 0x3f3651b3, 0x3e9e1c9e, 0xbee26e67, 0x3dfce96c, 0x3ebc9748, + 0xbf0e1298, 0x3f0ddd6a, 0x3f53fdd3, 0x3edab544, 0x3e569f3d, + 0x3e1518d9); + + VSET(16, e64, m8); + // 0.0308264568094008, 0.5865382185158325, 0.4543411851187289, + // 0.0036656924511687, -0.3103508259554966, 0.9658177901158624, + // -0.3381631341283657, -0.2003719333831677, 0.8989532087589025, + // -0.8054516243685412, 0.8701363884969631, + // -0.3585976675814562, 0.4150155349314333, + // -0.6908185611649824, 0.8412555125501906, -0.3357469205066645 + VLOAD_64(v16, 0x3f9f90f87f644880, 0x3fe2c4ebcc4c25b4, 0x3fdd13ed0cd3e484, + 0x3f6e0783a63d2400, 0xbfd3dcc9b5f0fd10, 0x3feee7fab5ce29f4, + 0xbfd5a476fc72d40c, 0xbfc9a5c99a756020, 0x3fecc43985081eb2, + 0xbfe9c6427c2588e6, 0x3febd8284474eda0, 0xbfd6f343a1abca7c, + 0x3fda8f9d51773268, 0xbfe61b2f86f58c4a, 0x3feaeb90ae3f72fc, + 0xbfd57ce0a6d3c3f8); + // 0.5881481456806663, -0.9882550591195853, -0.8483939717953815, + // -0.9684864200393222, -0.6743741213041285, 0.4372709704288931, + // -0.8339944484196176, -0.1519222509233684, + // -0.2540075520951832, 0.6661048539265222, 0.3013290199421905, + // -0.0367795249610035, -0.7178804756969177, 0.1577316726139908, + // -0.1242681642824526, -0.9006297759672148 + VLOAD_64(v24, 0x3fe2d21c0f5cd922, 0xbfef9fc912e0ce28, 0xbfeb260b1d5f82be, + 0xbfeefdd73b960c5a, 0xbfe594790988a396, 0x3fdbfc3f615edda8, + 0xbfeab01520204008, 0xbfc3723035a012c8, 0xbfd041a8e44be49c, + 0x3fe550bb206a47d8, 0x3fd348f9837f3238, 0xbfa2d4c411bd66e0, + 0xbfe6f8e079b1c412, 0x3fc4308d2bb5d1f0, 0xbfbfd009d586ef50, + 0xbfecd1f58932a7e4); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.8344616273245185, 0.7077884806720691, -0.1882041492960900, + // -0.2751607560371576, 0.2338395078923734, -0.9938305657796487, + // -0.5345602642671559, 0.0887204597208056, 0.3045224871958914, + // -0.3946645040604191, 0.6818539464440989, 0.9719861381061521, + // -0.8471643748461517, 0.8077493118513845, 0.2789872574353331, + // 0.7073875082318823 + VLOAD_64(v8, 0xbfeab3e8dee4061e, 0x3fe6a634071f1b28, 0xbfc81712d5195ee0, + 0xbfd19c3bdc149f5c, 0x3fcdee73f7748a88, 0xbfefcd75c2393d96, + 0xbfe11b1e209897fa, 0x3fb6b66250fca870, 0x3fd37d4be2d9c9a4, + 0xbfd9422ee8753844, 0x3fe5d1bf5e1407b4, 0x3fef1a82ac6a99b4, + 0xbfeb1bf876899dc0, 0x3fe9d91515b8951c, 0x3fd1daed5eabdf0c, + 0x3fe6a2eb20ae8e42); + asm volatile("vfmadd.vv v8, v16, v24, v0.t"); + // 0.0000000000000000, -0.5731100645801621, 0.0000000000000000, + // -0.9694950747455855, 0.0000000000000000, + // -0.5225882703620045, 0.0000000000000000, + // -0.1696993409682697, 0.0000000000000000, 0.9839880198025914, + // 0.0000000000000000, -0.3853314870073767, 0.0000000000000000, + // -0.4002765447811873, 0.0000000000000000, -1.1381329534609521 + VCMP_U64(6, v8, 0xbfeab3e8dee4061e, 0xbfe256eaeb0c2af6, 0xbfc81712d5195ee0, + 0xbfef061a88f54aac, 0x3fcdee73f7748a88, 0xbfe0b90b094f4be7, + 0xbfe11b1e209897fa, 0xbfc5b8b53fce44b1, 0x3fd37d4be2d9c9a4, + 0x3fef7cd47196a75e, 0x3fe5d1bf5e1407b4, 0xbfd8a94565b434f4, + 0xbfeb1bf876899dc0, 0xbfd99e21834c3aa4, 0x3fd1daed5eabdf0c, + 0xbff235cae659d5ba); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // 0.6299 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x390a); + // -0.5352, 0.1115, 0.9541, -0.8857, -0.4143, 0.4045, 0.2949, + // -0.5479, 0.6733, 0.8965, 0.8882, 0.6294, 0.7568, 0.8735, + // -0.8569, 0.8271 + VLOAD_16(v4, 0xb848, 0x2f23, 0x3ba2, 0xbb16, 0xb6a1, 0x3679, 0x34b8, 0xb862, + 0x3963, 0x3b2c, 0x3b1b, 0x3909, 0x3a0e, 0x3afd, 0xbadb, 0x3a9e); + // 0.2844, 0.1008, 0.3777, 0.9790, -0.8613, 0.4951, 0.4126, + // 0.5518, -0.6680, -0.8340, 0.2094, 0.5884, -0.6509, -0.9360, + // -0.1609, -0.2527 + VLOAD_16(v2, 0x348d, 0x2e74, 0x360b, 0x3bd5, 0xbae4, 0x37ec, 0x369a, 0x386a, + 0xb958, 0xbaac, 0x32b3, 0x38b5, 0xb935, 0xbb7d, 0xb126, 0xb40b); + asm volatile("vfmadd.vf v2, %[A], v4" ::[A] "f"(dscalar_16)); + // -0.3560, 0.1750, 1.1924, -0.2690, -0.9570, 0.7163, 0.5547, + // -0.2002, 0.2527, 0.3711, 1.0195, 1.0000, 0.3469, 0.2842, + // -0.9580, 0.6680 + VCMP_U16(7, v2, 0xb5b2, 0x319a, 0x3cc5, 0xb44e, 0xbba8, 0x39bb, 0x3870, + 0xb269, 0x340b, 0x35f0, 0x3c15, 0x3c00, 0x358d, 0x348b, 0xbbab, + 0x3958); + + VSET(16, e32, m4); + double dscalar_32; + // 0.80368215 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f4dbe1d); + // 0.13072050, -0.19741143, 0.09370349, 0.41049519, + // -0.69910282, -0.90573430, 0.86481184, 0.33341369, + // 0.30657578, -0.90526944, -0.97891974, -0.50830764, + // 0.79750061, 0.96885878, 0.48752418, 0.64305341 + VLOAD_32(v8, 0x3e05db98, 0xbe4a2639, 0x3dbfe79e, 0x3ed22c6d, 0xbf32f867, + 0xbf67de34, 0x3f5d644f, 0x3eaab533, 0x3e9cf780, 0xbf67bfbd, + 0xbf7a9a7c, 0xbf022073, 0x3f4c2900, 0x3f780721, 0x3ef99cc5, + 0x3f249f26); + // -0.61117887, 0.81778014, -0.46267223, -0.30897874, + // -0.84296966, 0.50125730, 0.96147668, 0.65802389, + // 0.19629262, -0.73197508, -0.06948850, -0.60436314, + // -0.80817568, 0.72047287, -0.78180677, -0.40237895 + VLOAD_32(v4, 0xbf1c7638, 0x3f515a0a, 0xbeece360, 0xbe9e3276, 0xbf57ccdc, + 0x3f005266, 0x3f762356, 0x3f287441, 0x3e4900ef, 0xbf3b62b8, + 0xbd8e4ffc, 0xbf1ab78b, 0xbf4ee49a, 0x3f3870e9, 0xbf48247d, + 0xbece049d); + asm volatile("vfmadd.vf v4, %[A], v8" ::[A] "f"(dscalar_32)); + // -0.36047307, 0.45982391, -0.27813792, 0.16217449, + // -1.37658250, -0.50288272, 1.63753343, 0.86225569, + // 0.46433264, -1.49354482, -1.03476644, -0.99402350, 0.14798427, + // 1.54788995, -0.14079997, 0.31966865 + VCMP_U32(8, v4, 0xbeb88fed, 0x3eeb6e09, 0xbe8e6818, 0x3e261112, 0xbfb033db, + 0xbf00bced, 0x3fd19ab2, 0x3f5cbccb, 0x3eedbd02, 0xbfbf2c79, + 0xbf84733a, 0xbf7e7853, 0x3e17892e, 0x3fc62142, 0xbe102ddd, + 0x3ea3ab9c); + + VSET(16, e64, m8); + double dscalar_64; + // 0.5717861827636179 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fe24c128968c808); + // -0.0978999279373105, -0.8066508697961206, 0.1001298330116740, + // 0.6183499729699258, -0.7091158569428311, 0.4713309006442494, + // -0.9363173157138223, 0.6720957973877764, + // -0.1684678230133414, -0.0206783343753454, + // -0.6941474840922310, 0.4809970389518419, + // -0.4671263490725479, 0.5176516826232249, + // -0.9714116214357187, 0.0212574845134876 + VLOAD_64(v16, 0xbfb90ff83cc58160, 0xbfe9d0157c220eae, 0x3fb9a21bd6239ad0, + 0x3fe3c985e1ec6d14, 0xbfe6b113bcd49f88, 0x3fde2a4914f71f28, + 0xbfedf64fbb356b82, 0x3fe581cf0bb1e7c4, 0xbfc5905a8722a398, + 0xbf952cb38782ee00, 0xbfe63674c8d8dba6, 0x3fdec8a7cdf1580c, + 0xbfdde565ea17744c, 0x3fe0909a42f2c184, 0xbfef15cdd320a9f0, + 0x3f95c485a31f6440); + // -0.8599787754583945, -0.8609648323347547, + // -0.5642848553711928, 0.8958493001041692, 0.3661374487395561, + // 0.1017901385891375, 0.2298954297904690, -0.4970717320718749, + // -0.5860844501192310, -0.6581386742527398, + // -0.8379133193505066, -0.6497652150347000, + // -0.4444119628309799, -0.8810041425660891, 0.4421772814931029, + // 0.0606105644967410 + VLOAD_64(v8, 0xbfeb84f2357b2242, 0xbfeb8d061ebc48de, 0xbfe20e9f1cee50cc, + 0x3fecaacc26c2f0d0, 0x3fd76ecbc40b5864, 0x3fba0eeb244baff0, + 0x3fcd6d36a4399740, 0xbfdfd005f440f21c, 0xbfe2c1342d3e9986, + 0xbfe50f78d644befe, 0xbfead02f97efc88a, 0xbfe4cae06b94e0d2, + 0xbfdc713edf93dac0, 0xbfec312f997ec7ac, 0x3fdc4ca1f0c30314, + 0x3faf0859109c77c0); + asm volatile("vfmadd.vf v8, %[A], v16" ::[A] "f"(dscalar_64)); + // -0.5896239092143964, -1.2989386647705283, -0.2225204504323405, + // 1.1305842246079476, -0.4997635227612305, 0.5295330954311120, + // -0.8048662854791286, 0.3878770491466992, -0.5035828135241305, + // -0.3969929346554276, -1.1732547424504496, 0.1094702669545694, + // -0.7212349688741608, 0.0139056869464266, -0.7185807615459836, + // 0.0559137678222272 + VCMP_U64(9, v8, 0xbfe2de32f5e07f06, 0xbff4c873e8cb3071, 0xbfcc7b8cd4a627de, + 0x3ff216df7be108d1, 0xbfdffc20247f9130, 0x3fe0f1ef63e0d73b, + 0xbfe9c176f0b925be, 0x3fd8d2fa423d0f48, 0xbfe01d59b45d3de9, + 0xbfd968550dc5a3de, 0xbff2c5a6c3cb39b0, 0x3fbc063e50744ab8, + 0xbfe7145b5b8179a4, 0x3f8c7a95b54e78da, 0xbfe6fe9d14cbfe26, + 0x3faca0bab8629cdd); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // 0.6299 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x390a); + // -0.5352, 0.1115, 0.9541, -0.8857, -0.4143, 0.4045, 0.2949, + // -0.5479, 0.6733, 0.8965, 0.8882, 0.6294, 0.7568, 0.8735, + // -0.8569, 0.8271 + VLOAD_16(v4, 0xb848, 0x2f23, 0x3ba2, 0xbb16, 0xb6a1, 0x3679, 0x34b8, 0xb862, + 0x3963, 0x3b2c, 0x3b1b, 0x3909, 0x3a0e, 0x3afd, 0xbadb, 0x3a9e); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.2844, 0.1008, 0.3777, 0.9790, -0.8613, 0.4951, 0.4126, + // 0.5518, -0.6680, -0.8340, 0.2094, 0.5884, -0.6509, + // -0.9360, -0.1609, -0.2527 + VLOAD_16(v2, 0x348d, 0x2e74, 0x360b, 0x3bd5, 0xbae4, 0x37ec, 0x369a, 0x386a, + 0xb958, 0xbaac, 0x32b3, 0x38b5, 0xb935, 0xbb7d, 0xb126, 0xb40b); + asm volatile("vfmadd.vf v2, %[A], v4, v0.t" ::[A] "f"(dscalar_16)); + VCMP_U16(10, v2, 0x348d, 0x319a, 0x360b, 0xb44e, 0xbae4, 0x39bb, 0x369a, + 0xb269, 0xb958, 0x35f0, 0x32b3, 0x3c00, 0xb935, 0x348b, 0xb126, + 0x3958); + + VSET(16, e32, m4); + double dscalar_32; + // 0.80368215 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f4dbe1d); + // 0.13072050, -0.19741143, 0.09370349, 0.41049519, + // -0.69910282, -0.90573430, 0.86481184, 0.33341369, + // 0.30657578, -0.90526944, -0.97891974, -0.50830764, + // 0.79750061, 0.96885878, 0.48752418, 0.64305341 + VLOAD_32(v8, 0x3e05db98, 0xbe4a2639, 0x3dbfe79e, 0x3ed22c6d, 0xbf32f867, + 0xbf67de34, 0x3f5d644f, 0x3eaab533, 0x3e9cf780, 0xbf67bfbd, + 0xbf7a9a7c, 0xbf022073, 0x3f4c2900, 0x3f780721, 0x3ef99cc5, + 0x3f249f26); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.61117887, 0.81778014, -0.46267223, -0.30897874, + // -0.84296966, 0.50125730, 0.96147668, 0.65802389, + // 0.19629262, -0.73197508, -0.06948850, -0.60436314, + // -0.80817568, 0.72047287, -0.78180677, -0.40237895 + VLOAD_32(v4, 0xbf1c7638, 0x3f515a0a, 0xbeece360, 0xbe9e3276, 0xbf57ccdc, + 0x3f005266, 0x3f762356, 0x3f287441, 0x3e4900ef, 0xbf3b62b8, + 0xbd8e4ffc, 0xbf1ab78b, 0xbf4ee49a, 0x3f3870e9, 0xbf48247d, + 0xbece049d); + asm volatile("vfmadd.vf v4, %[A], v8, v0.t" ::[A] "f"(dscalar_32)); + VCMP_U32(11, v4, 0xbf1c7638, 0x3eeb6e09, 0xbeece360, 0x3e261112, 0xbf57ccdc, + 0xbf00bced, 0x3f762356, 0x3f5cbccb, 0x3e4900ef, 0xbfbf2c79, + 0xbd8e4ffc, 0xbf7e7853, 0xbf4ee49a, 0x3fc62142, 0xbf48247d, + 0x3ea3ab9c); + + VSET(16, e64, m8); + double dscalar_64; + // 0.5717861827636179 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fe24c128968c808); + // -0.0978999279373105, -0.8066508697961206, + // 0.1001298330116740, 0.6183499729699258, + // -0.7091158569428311, 0.4713309006442494, + // -0.9363173157138223, 0.6720957973877764, + // -0.1684678230133414, -0.0206783343753454, + // -0.6941474840922310, 0.4809970389518419, + // -0.4671263490725479, 0.5176516826232249, + // -0.9714116214357187, 0.0212574845134876 + VLOAD_64(v16, 0xbfb90ff83cc58160, 0xbfe9d0157c220eae, 0x3fb9a21bd6239ad0, + 0x3fe3c985e1ec6d14, 0xbfe6b113bcd49f88, 0x3fde2a4914f71f28, + 0xbfedf64fbb356b82, 0x3fe581cf0bb1e7c4, 0xbfc5905a8722a398, + 0xbf952cb38782ee00, 0xbfe63674c8d8dba6, 0x3fdec8a7cdf1580c, + 0xbfdde565ea17744c, 0x3fe0909a42f2c184, 0xbfef15cdd320a9f0, + 0x3f95c485a31f6440); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.8599787754583945, -0.8609648323347547, + // -0.5642848553711928, 0.8958493001041692, + // 0.3661374487395561, 0.1017901385891375, 0.2298954297904690, + // -0.4970717320718749, -0.5860844501192310, + // -0.6581386742527398, -0.8379133193505066, + // -0.6497652150347000, -0.4444119628309799, + // -0.8810041425660891, 0.4421772814931029, 0.0606105644967410 + VLOAD_64(v8, 0xbfeb84f2357b2242, 0xbfeb8d061ebc48de, 0xbfe20e9f1cee50cc, + 0x3fecaacc26c2f0d0, 0x3fd76ecbc40b5864, 0x3fba0eeb244baff0, + 0x3fcd6d36a4399740, 0xbfdfd005f440f21c, 0xbfe2c1342d3e9986, + 0xbfe50f78d644befe, 0xbfead02f97efc88a, 0xbfe4cae06b94e0d2, + 0xbfdc713edf93dac0, 0xbfec312f997ec7ac, 0x3fdc4ca1f0c30314, + 0x3faf0859109c77c0); + asm volatile("vfmadd.vf v8, %[A], v16, v0.t" ::[A] "f"(dscalar_64)); + VCMP_U64(12, v8, 0xbfeb84f2357b2242, 0xbff4c873e8cb3071, 0xbfe20e9f1cee50cc, + 0x3ff216df7be108d1, 0x3fd76ecbc40b5864, 0x3fe0f1ef63e0d73b, + 0x3fcd6d36a4399740, 0x3fd8d2fa423d0f48, 0xbfe2c1342d3e9986, + 0xbfd968550dc5a3de, 0xbfead02f97efc88a, 0x3fbc063e50744ab8, + 0xbfdc713edf93dac0, 0x3f8c7a95b54e78da, 0x3fdc4ca1f0c30314, + 0x3faca0bab8629cdd); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmax.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmax.c new file mode 100644 index 000000000..8612260a8 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmax.c @@ -0,0 +1,351 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values + 1 subnormal +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 0.0445, -0.4812, 0.5732, 0.0634, 0.2072, -0.6416, 0.7759, + // -0.0042, 0.6138, 0.7847, -0.5337, 0.3455, 0.0304, 0.7920, + // 0.8179, -0.5659 + VLOAD_16(v4, 0x29b3, 0xb7b3, 0x3896, 0x2c0f, 0x32a1, 0xb922, 0x3a35, 0x9c4d, + 0x38e9, 0x3a47, 0xb845, 0x3587, 0x27ca, 0x3a56, 0x3a8b, 0xb887); + // 0.6426, -0.4099, -0.1183, 0.2915, 0.5972, -0.1932, -0.0265, + // -0.5913, -0.8560, 0.5029, -0.8975, -0.7373, 0.3701, 0.9546, + // -0.2671, -0.6855 + VLOAD_16(v6, 0x3924, 0xb68f, 0xaf93, 0x34aa, 0x38c7, 0xb22f, 0xa6c7, 0xb8bb, + 0xbad9, 0x3806, 0xbb2e, 0xb9e6, 0x35ec, 0x3ba3, 0xb446, 0xb97c); + asm volatile("vfmax.vv v2, v4, v6"); + // 0.6426, -0.4099, 0.5732, 0.2915, 0.5972, -0.1932, 0.7759, + // -0.0042, 0.6138, 0.7847, -0.5337, 0.3455, 0.3701, 0.9546, + // 0.8179, -0.5659 + VCMP_U16(1, v2, 0x3924, 0xb68f, 0x3896, 0x34aa, 0x38c7, 0xb22f, 0x3a35, + 0x9c4d, 0x38e9, 0x3a47, 0xb845, 0x3587, 0x35ec, 0x3ba3, 0x3a8b, + 0xb887); + + VSET(16, e32, m4); + // -0.19589283, 0.64597517, -0.09556163, 0.96582597, + // 0.93413597, 0.78331935, -0.18831402, -0.29520443, 0.09486515, + // 0.96548969, 0.74523991, 0.81442171, 0.25644442, + // -0.92091519, 0.25139943, -0.77403748 + VLOAD_32(v8, 0xbe489821, 0x3f255ea1, 0xbdc3b5d1, 0x3f77405f, 0x3f6f2389, + 0x3f48879e, 0xbe40d564, 0xbe972509, 0x3dc248a9, 0x3f772a55, + 0x3f3ec80b, 0x3f507df1, 0x3e834caf, 0xbf6bc119, 0x3e80b76d, + 0xbf462752); + // -0.58921623, 0.69345474, 0.64817399, -0.00869324, + // 0.15872470, -0.17028977, -0.99863762, -0.02739566, + // -0.08060763, 0.73060948, 0.62843031, 0.68798363, + // -0.35207590, 0.01353026, 0.25345275, -0.93635505 + VLOAD_32(v12, 0xbf16d6e0, 0x3f318640, 0x3f25eebb, 0xbc0e6e1c, 0x3e2288ba, + 0xbe2e6071, 0xbf7fa6b7, 0xbce06cdd, 0xbda5159d, 0x3f3b0939, + 0x3f20e0cf, 0x3f301fb2, 0xbeb4434b, 0x3c5dae02, 0x3e81c48f, + 0xbf6fb4f7); + asm volatile("vfmax.vv v4, v8, v12"); + // -0.19589283, 0.69345474, 0.64817399, 0.96582597, + // 0.93413597, 0.78331935, -0.18831402, -0.02739566, 0.09486515, + // 0.96548969, 0.74523991, 0.81442171, 0.25644442, 0.01353026, + // 0.25345275, -0.77403748 + VCMP_U32(2, v4, 0xbe489821, 0x3f318640, 0x3f25eebb, 0x3f77405f, 0x3f6f2389, + 0x3f48879e, 0xbe40d564, 0xbce06cdd, 0x3dc248a9, 0x3f772a55, + 0x3f3ec80b, 0x3f507df1, 0x3e834caf, 0x3c5dae02, 0x3e81c48f, + 0xbf462752); + + VSET(16, e64, m8); + // -0.4061329687298849, -0.2985478109200665, 0.0070087316277823, + // -0.2169778494878496, -0.8530745559533048, -0.1247477743553222, + // 0.5680045000966327, 0.9515829310663801, -0.9797693611753244, + // 0.0055288881366042, 0.3717566019240965, 0.0982171502328268, + // -0.1563664923399100, 0.9555697921812856, 0.4810293698835877, + // -0.1835757691555060 + VLOAD_64(v16, 0xbfd9fe1522a16c7c, 0xbfd31b68470c6bc4, 0x3f7cb530120b5400, + 0xbfcbc5ee1fc0dc58, 0xbfeb4c6302dbd036, 0xbfbfef785b1ada80, + 0x3fe22d17c5fcaaf0, 0x3fee735e0c0b94e4, 0xbfef5a45467bddd8, + 0x3f76a5759bade800, 0x3fd7cadc33d5826c, 0x3fb924c2582803f0, + 0xbfc403d135652390, 0x3fee940719ceda38, 0x3fdec92f69043118, + 0xbfc77f692a6e3368); + // -0.5461826062085420, -0.4431702866722571, -0.7458438472286320, + // -0.8611805160192025, 0.5288841839862100, 0.4836992661145783, + // -0.5942889927274901, 0.5287333894552471, 0.3093279352228719, + // -0.5415645292681506, 0.0094485111801912, -0.2151605186231076, + // -0.0785069829906857, 0.6345480854408712, 0.4658290296396683, + // -0.5143497066150833 + VLOAD_64(v24, 0xbfe17a53f1e9e958, 0xbfdc5ce6e7f43e14, 0xbfe7ddf3ea78a228, + 0xbfeb8eca710827f8, 0x3fe0ec9e8632f518, 0x3fdef4edc443ec94, + 0xbfe3046a59846530, 0x3fe0eb6249006ebc, 0x3fd3cc0765615f4c, + 0xbfe1547f22bc2bc2, 0x3f8359bdb41e5580, 0xbfcb8a613f7035f0, + 0xbfb419089c73df20, 0x3fe44e37c956a792, 0x3fddd0248ff51b48, + 0xbfe0758d8413ceaa); + asm volatile("vfmax.vv v8, v16, v24"); + // -0.4061329687298849, -0.2985478109200665, 0.0070087316277823, + // -0.2169778494878496, 0.5288841839862100, 0.4836992661145783, + // 0.5680045000966327, 0.9515829310663801, 0.3093279352228719, + // 0.0055288881366042, 0.3717566019240965, 0.0982171502328268, + // -0.0785069829906857, 0.9555697921812856, 0.4810293698835877, + // -0.1835757691555060 + VCMP_U64(3, v8, 0xbfd9fe1522a16c7c, 0xbfd31b68470c6bc4, 0x3f7cb530120b5400, + 0xbfcbc5ee1fc0dc58, 0x3fe0ec9e8632f518, 0x3fdef4edc443ec94, + 0x3fe22d17c5fcaaf0, 0x3fee735e0c0b94e4, 0x3fd3cc0765615f4c, + 0x3f76a5759bade800, 0x3fd7cadc33d5826c, 0x3fb924c2582803f0, + 0xbfb419089c73df20, 0x3fee940719ceda38, 0x3fdec92f69043118, + 0xbfc77f692a6e3368); +}; + +// Simple random test with similar values + 1 subnormal (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // 0.0445, -0.4812, 0.5732, 0.0634, 0.2072, -0.6416, 0.7759, + // -0.0042, 0.6138, 0.7847, -0.5337, 0.3455, 0.0304, 0.7920, + // 0.8179, -0.5659 + VLOAD_16(v4, 0x29b3, 0xb7b3, 0x3896, 0x2c0f, 0x32a1, 0xb922, 0x3a35, 0x9c4d, + 0x38e9, 0x3a47, 0xb845, 0x3587, 0x27ca, 0x3a56, 0x3a8b, 0xb887); + // 0.6426, -0.4099, -0.1183, 0.2915, 0.5972, -0.1932, -0.0265, + // -0.5913, -0.8560, 0.5029, -0.8975, -0.7373, 0.3701, 0.9546, + // -0.2671, -0.6855 + VLOAD_16(v6, 0x3924, 0xb68f, 0xaf93, 0x34aa, 0x38c7, 0xb22f, 0xa6c7, 0xb8bb, + 0xbad9, 0x3806, 0xbb2e, 0xb9e6, 0x35ec, 0x3ba3, 0xb446, 0xb97c); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfmax.vv v2, v4, v6, v0.t"); + // 0.0000, -0.4099, 0.0000, 0.2915, 0.0000, -0.1932, 0.0000, + // -0.0042, 0.0000, 0.7847, 0.0000, 0.3455, 0.0000, 0.9546, + // 0.0000, -0.5659 + VCMP_U16(4, v2, 0x0, 0xb68f, 0x0, 0x34aa, 0x0, 0xb22f, 0x0, 0x9c4d, 0x0, + 0x3a47, 0x0, 0x3587, 0x0, 0x3ba3, 0x0, 0xb887); + + VSET(16, e32, m4); + // -0.19589283, 0.64597517, -0.09556163, 0.96582597, + // 0.93413597, 0.78331935, -0.18831402, -0.29520443, 0.09486515, + // 0.96548969, 0.74523991, 0.81442171, 0.25644442, + // -0.92091519, 0.25139943, -0.77403748 + VLOAD_32(v8, 0xbe489821, 0x3f255ea1, 0xbdc3b5d1, 0x3f77405f, 0x3f6f2389, + 0x3f48879e, 0xbe40d564, 0xbe972509, 0x3dc248a9, 0x3f772a55, + 0x3f3ec80b, 0x3f507df1, 0x3e834caf, 0xbf6bc119, 0x3e80b76d, + 0xbf462752); + // -0.58921623, 0.69345474, 0.64817399, -0.00869324, + // 0.15872470, -0.17028977, -0.99863762, -0.02739566, + // -0.08060763, 0.73060948, 0.62843031, 0.68798363, + // -0.35207590, 0.01353026, 0.25345275, -0.93635505 + VLOAD_32(v12, 0xbf16d6e0, 0x3f318640, 0x3f25eebb, 0xbc0e6e1c, 0x3e2288ba, + 0xbe2e6071, 0xbf7fa6b7, 0xbce06cdd, 0xbda5159d, 0x3f3b0939, + 0x3f20e0cf, 0x3f301fb2, 0xbeb4434b, 0x3c5dae02, 0x3e81c48f, + 0xbf6fb4f7); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfmax.vv v4, v8, v12, v0.t"); + // 0.00000000, 0.69345474, 0.00000000, 0.96582597, + // 0.00000000, 0.78331935, 0.00000000, -0.02739566, + // 0.00000000, 0.96548969, 0.00000000, 0.81442171, + // 0.00000000, 0.01353026, 0.00000000, -0.77403748 + VCMP_U32(5, v4, 0x0, 0x3f318640, 0x0, 0x3f77405f, 0x0, 0x3f48879e, 0x0, + 0xbce06cdd, 0x0, 0x3f772a55, 0x0, 0x3f507df1, 0x0, 0x3c5dae02, 0x0, + 0xbf462752); + + VSET(16, e64, m8); + // -0.4061329687298849, -0.2985478109200665, 0.0070087316277823, + // -0.2169778494878496, -0.8530745559533048, -0.1247477743553222, + // 0.5680045000966327, 0.9515829310663801, -0.9797693611753244, + // 0.0055288881366042, 0.3717566019240965, 0.0982171502328268, + // -0.1563664923399100, 0.9555697921812856, 0.4810293698835877, + // -0.1835757691555060 + VLOAD_64(v16, 0xbfd9fe1522a16c7c, 0xbfd31b68470c6bc4, 0x3f7cb530120b5400, + 0xbfcbc5ee1fc0dc58, 0xbfeb4c6302dbd036, 0xbfbfef785b1ada80, + 0x3fe22d17c5fcaaf0, 0x3fee735e0c0b94e4, 0xbfef5a45467bddd8, + 0x3f76a5759bade800, 0x3fd7cadc33d5826c, 0x3fb924c2582803f0, + 0xbfc403d135652390, 0x3fee940719ceda38, 0x3fdec92f69043118, + 0xbfc77f692a6e3368); + // -0.5461826062085420, -0.4431702866722571, -0.7458438472286320, + // -0.8611805160192025, 0.5288841839862100, 0.4836992661145783, + // -0.5942889927274901, 0.5287333894552471, 0.3093279352228719, + // -0.5415645292681506, 0.0094485111801912, -0.2151605186231076, + // -0.0785069829906857, 0.6345480854408712, 0.4658290296396683, + // -0.5143497066150833 + VLOAD_64(v24, 0xbfe17a53f1e9e958, 0xbfdc5ce6e7f43e14, 0xbfe7ddf3ea78a228, + 0xbfeb8eca710827f8, 0x3fe0ec9e8632f518, 0x3fdef4edc443ec94, + 0xbfe3046a59846530, 0x3fe0eb6249006ebc, 0x3fd3cc0765615f4c, + 0xbfe1547f22bc2bc2, 0x3f8359bdb41e5580, 0xbfcb8a613f7035f0, + 0xbfb419089c73df20, 0x3fe44e37c956a792, 0x3fddd0248ff51b48, + 0xbfe0758d8413ceaa); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfmax.vv v8, v16, v24, v0.t"); + // 0.0000000000000000, -0.2985478109200665, 0.0000000000000000, + // -0.2169778494878496, 0.0000000000000000, 0.4836992661145783, + // 0.0000000000000000, 0.9515829310663801, 0.0000000000000000, + // 0.0055288881366042, 0.0000000000000000, 0.0982171502328268, + // 0.0000000000000000, 0.9555697921812856, 0.0000000000000000, + // -0.1835757691555060 + VCMP_U64(6, v8, 0x0, 0xbfd31b68470c6bc4, 0x0, 0xbfcbc5ee1fc0dc58, 0x0, + 0x3fdef4edc443ec94, 0x0, 0x3fee735e0c0b94e4, 0x0, 0x3f76a5759bade800, + 0x0, 0x3fb924c2582803f0, 0x0, 0x3fee940719ceda38, 0x0, + 0xbfc77f692a6e3368); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // 0.0368 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x28b5); + // -0.5518, 0.6772, 0.2756, 0.4421, 0.2081, 0.6250, 0.4136, + // 0.8203, -0.3535, -0.1597, -0.5244, 0.8696, 0.1744, 0.0793, + // -0.2445, -0.4031 + VLOAD_16(v4, 0xb86a, 0x396b, 0x3469, 0x3713, 0x32a9, 0x3900, 0x369e, 0x3a90, + 0xb5a8, 0xb11c, 0xb832, 0x3af5, 0x3195, 0x2d14, 0xb3d3, 0xb673); + asm volatile("vfmax.vf v2, v4, %[A]" ::[A] "f"(dscalar_16)); + // 0.0368, 0.6772, 0.2756, 0.4421, 0.2081, 0.6250, 0.4136, + // 0.8203, 0.0368, 0.0368, 0.0368, 0.8696, 0.1744, 0.0793, + // 0.0368, 0.0368 + VCMP_U16(7, v2, 0x28b5, 0x396b, 0x3469, 0x3713, 0x32a9, 0x3900, 0x369e, + 0x3a90, 0x28b5, 0x28b5, 0x28b5, 0x3af5, 0x3195, 0x2d14, 0x28b5, + 0x28b5); + + VSET(16, e32, m4); + double dscalar_32; + // -0.94383347 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbf719f12); + // 0.51733643, 0.31252080, 0.47358772, 0.13738893, + // 0.11194360, -0.33637357, 0.83680850, 0.95792335, + // 0.41251704, 0.27496886, -0.06774041, -0.19357064, + // -0.48802575, -0.53921199, 0.32722279, 0.28428423 + VLOAD_32(v8, 0x3f047029, 0x3ea002ba, 0x3ef27a17, 0x3e0cafaf, 0x3de542b0, + 0xbeac3928, 0x3f563915, 0x3f753a77, 0x3ed3356f, 0x3e8cc8b8, + 0xbd8abb7c, 0xbe463762, 0xbef9de83, 0xbf0a09cc, 0x3ea789bf, + 0x3e918db4); + asm volatile("vfmax.vf v4, v8, %[A]" ::[A] "f"(dscalar_32)); + // 0.51733643, 0.31252080, 0.47358772, 0.13738893, + // 0.11194360, -0.33637357, 0.83680850, 0.95792335, + // 0.41251704, 0.27496886, -0.06774041, -0.19357064, + // -0.48802575, -0.53921199, 0.32722279, 0.28428423 + VCMP_U32(8, v4, 0x3f047029, 0x3ea002ba, 0x3ef27a17, 0x3e0cafaf, 0x3de542b0, + 0xbeac3928, 0x3f563915, 0x3f753a77, 0x3ed3356f, 0x3e8cc8b8, + 0xbd8abb7c, 0xbe463762, 0xbef9de83, 0xbf0a09cc, 0x3ea789bf, + 0x3e918db4); + + VSET(16, e64, m8); + double dscalar_64; + // -0.8274885128397702 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfea7ac9308eccb6); + // 0.9632225672084347, 0.4671677538923853, + // -0.1749283847947720, -0.0938698612480795, + // 0.3438198935172891, 0.2938331380713377, + // -0.3607699326176230, 0.6841623039857032, + // -0.6959644979744999, 0.4712155929452235, + // 0.1886883982201846, 0.9268486384654282, + // -0.9639662652720637, -0.2101071651393955, + // 0.0859470276611187, -0.7001184217853196 + VLOAD_64(v16, 0x3feed2b8221dbd8e, 0x3fdde613942dab28, 0xbfc6640da5eaf690, + 0xbfb807daf023fbb0, 0x3fd601252797bdcc, 0x3fd2ce29819fd630, + 0xbfd716dac57e4298, 0x3fe5e4a85818c992, 0xbfe6455756bf47f8, + 0x3fde2865724428b0, 0x3fc826f101bec2b8, 0x3feda8be79d1a2f4, + 0xbfeed8cfc7f94e06, 0xbfcae4caa576e8a8, 0x3fb6009fd8fe2f80, + 0xbfe6675ebf9ca482); + asm volatile("vfmax.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); + // 0.9632225672084347, 0.4671677538923853, -0.1749283847947720, + // -0.0938698612480795, 0.3438198935172891, 0.2938331380713377, + // -0.3607699326176230, 0.6841623039857032, + // -0.6959644979744999, 0.4712155929452235, 0.1886883982201846, + // 0.9268486384654282, -0.8274885128397702, -0.2101071651393955, + // 0.0859470276611187, -0.7001184217853196 + VCMP_U64(9, v8, 0x3feed2b8221dbd8e, 0x3fdde613942dab28, 0xbfc6640da5eaf690, + 0xbfb807daf023fbb0, 0x3fd601252797bdcc, 0x3fd2ce29819fd630, + 0xbfd716dac57e4298, 0x3fe5e4a85818c992, 0xbfe6455756bf47f8, + 0x3fde2865724428b0, 0x3fc826f101bec2b8, 0x3feda8be79d1a2f4, + 0xbfea7ac9308eccb6, 0xbfcae4caa576e8a8, 0x3fb6009fd8fe2f80, + 0xbfe6675ebf9ca482); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // 0.0368 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x28b5); + // -0.5518, 0.6772, 0.2756, 0.4421, 0.2081, 0.6250, 0.4136, + // 0.8203, -0.3535, -0.1597, -0.5244, 0.8696, 0.1744, 0.0793, + // -0.2445, -0.4031 + VLOAD_16(v4, 0xb86a, 0x396b, 0x3469, 0x3713, 0x32a9, 0x3900, 0x369e, 0x3a90, + 0xb5a8, 0xb11c, 0xb832, 0x3af5, 0x3195, 0x2d14, 0xb3d3, 0xb673); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfmax.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + // 0.0000, 0.6772, 0.0000, 0.4421, 0.0000, 0.6250, 0.0000, + // 0.8203, 0.0000, 0.0368, 0.0000, 0.8696, 0.0000, 0.0793, + // 0.0000, 0.0368 + VCMP_U16(10, v2, 0x0, 0x396b, 0x0, 0x3713, 0x0, 0x3900, 0x0, 0x3a90, 0x0, + 0x28b5, 0x0, 0x3af5, 0x0, 0x2d14, 0x0, 0x28b5); + + VSET(16, e32, m4); + double dscalar_32; + // -0.94383347 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbf719f12); + // 0.51733643, 0.31252080, 0.47358772, 0.13738893, + // 0.11194360, -0.33637357, 0.83680850, 0.95792335, + // 0.41251704, 0.27496886, -0.06774041, -0.19357064, + // -0.48802575, -0.53921199, 0.32722279, 0.28428423 + VLOAD_32(v8, 0x3f047029, 0x3ea002ba, 0x3ef27a17, 0x3e0cafaf, 0x3de542b0, + 0xbeac3928, 0x3f563915, 0x3f753a77, 0x3ed3356f, 0x3e8cc8b8, + 0xbd8abb7c, 0xbe463762, 0xbef9de83, 0xbf0a09cc, 0x3ea789bf, + 0x3e918db4); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfmax.vf v4, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + // 0.00000000, 0.31252080, 0.00000000, 0.13738893, + // 0.00000000, -0.33637357, 0.00000000, 0.95792335, + // 0.00000000, 0.27496886, 0.00000000, -0.19357064, + // 0.00000000, -0.53921199, 0.00000000, 0.28428423 + VCMP_U32(11, v4, 0x0, 0x3ea002ba, 0x0, 0x3e0cafaf, 0x0, 0xbeac3928, 0x0, + 0x3f753a77, 0x0, 0x3e8cc8b8, 0x0, 0xbe463762, 0x0, 0xbf0a09cc, 0x0, + 0x3e918db4); + + VSET(16, e64, m8); + double dscalar_64; + // -0.8274885128397702 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfea7ac9308eccb6); + // 0.9632225672084347, 0.4671677538923853, + // -0.1749283847947720, -0.0938698612480795, + // 0.3438198935172891, 0.2938331380713377, + // -0.3607699326176230, 0.6841623039857032, + // -0.6959644979744999, 0.4712155929452235, + // 0.1886883982201846, 0.9268486384654282, + // -0.9639662652720637, -0.2101071651393955, + // 0.0859470276611187, -0.7001184217853196 + VLOAD_64(v16, 0x3feed2b8221dbd8e, 0x3fdde613942dab28, 0xbfc6640da5eaf690, + 0xbfb807daf023fbb0, 0x3fd601252797bdcc, 0x3fd2ce29819fd630, + 0xbfd716dac57e4298, 0x3fe5e4a85818c992, 0xbfe6455756bf47f8, + 0x3fde2865724428b0, 0x3fc826f101bec2b8, 0x3feda8be79d1a2f4, + 0xbfeed8cfc7f94e06, 0xbfcae4caa576e8a8, 0x3fb6009fd8fe2f80, + 0xbfe6675ebf9ca482); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfmax.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); + // 0.0000000000000000, 0.4671677538923853, 0.0000000000000000, + // -0.0938698612480795, 0.0000000000000000, + // 0.2938331380713377, 0.0000000000000000, 0.6841623039857032, + // 0.0000000000000000, 0.4712155929452235, 0.0000000000000000, + // 0.9268486384654282, 0.0000000000000000, + // -0.2101071651393955, 0.0000000000000000, + // -0.7001184217853196 + VCMP_U64(12, v8, 0x0, 0x3fdde613942dab28, 0x0, 0xbfb807daf023fbb0, 0x0, + 0x3fd2ce29819fd630, 0x0, 0x3fe5e4a85818c992, 0x0, 0x3fde2865724428b0, + 0x0, 0x3feda8be79d1a2f4, 0x0, 0xbfcae4caa576e8a8, 0x0, + 0xbfe6675ebf9ca482); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmerge.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmerge.c new file mode 100644 index 000000000..7188a6008 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmerge.c @@ -0,0 +1,94 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot +// Matteo Perotti + +#include "float_macros.h" +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e16, m2); + // -0.1481, -0.1797, -0.5454, 0.3228, 0.3237, -0.7212, -0.5195, + // -0.4500, 0.2681, 0.7300, 0.5059, 0.5830, 0.3198, -0.1713, + // -0.6431, 0.4841 + VLOAD_16(v4, 0xb0bd, 0xb1c0, 0xb85d, 0x352a, 0x352e, 0xb9c5, 0xb828, 0xb733, + 0x344a, 0x39d7, 0x380c, 0x38aa, 0x351e, 0xb17b, 0xb925, 0x37bf); + double dscalar_16; + // -0.9380 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xbb81); + VLOAD_8(v0, 0x0F, 0xAA); + asm volatile("vfmerge.vfm v2, v4, %[A], v0" ::[A] "f"(dscalar_16)); + // -0.9380, -0.9380, -0.9380, -0.9380, 0.3237, -0.7212, + // -0.5195, -0.4500, 0.2681, -0.9380, 0.5059, -0.9380, 0.3198, + // -0.9380, -0.6431, -0.9380 + VCMP_U16(1, v2, 0xbb81, 0xbb81, 0xbb81, 0xbb81, 0x352e, 0xb9c5, 0xb828, + 0xb733, 0x344a, 0xbb81, 0x380c, 0xbb81, 0x351e, 0xbb81, 0xb925, + 0xbb81); + + VSET(16, e32, m4); + // 0.86539453, -0.53925377, -0.47128764, 0.99265540, + // 0.32128176, -0.47335613, -0.30028856, 0.44394016, + // -0.72540921, -0.26464799, 0.77351445, -0.21725702, + // -0.25191557, -0.53123665, 0.80404943, 0.81841671 + VLOAD_32(v8, 0x3f5d8a7f, 0xbf0a0c89, 0xbef14c9d, 0x3f7e1eaa, 0x3ea47f0b, + 0xbef25bbc, 0xbe99bf6c, 0x3ee34c20, 0xbf39b46b, 0xbe877ff1, + 0x3f46050b, 0xbe5e78a0, 0xbe80fb14, 0xbf07ff20, 0x3f4dd62f, + 0x3f5183c2); + double dscalar_32; + // -0.96056187 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbf75e762); + VLOAD_8(v0, 0x0F, 0xAA); + asm volatile("vfmerge.vfm v4, v8, %[A], v0" ::[A] "f"(dscalar_32)); + // -0.96056187, -0.96056187, -0.96056187, -0.96056187, + // 0.32128176, -0.47335613, -0.30028856, 0.44394016, + // -0.72540921, -0.96056187, 0.77351445, -0.96056187, + // -0.25191557, -0.96056187, 0.80404943, -0.96056187 + VCMP_U32(2, v4, 0xbf75e762, 0xbf75e762, 0xbf75e762, 0xbf75e762, 0x3ea47f0b, + 0xbef25bbc, 0xbe99bf6c, 0x3ee34c20, 0xbf39b46b, 0xbf75e762, + 0x3f46050b, 0xbf75e762, 0xbe80fb14, 0xbf75e762, 0x3f4dd62f, + 0xbf75e762); + + VSET(16, e64, m8); + // -0.3488917150781869, -0.4501495513738740, 0.8731197104152684, + // 0.3256432550932964, 0.6502591178769535, -0.3169358689246526, + // -0.5396694979141685, -0.5417807430937591, + // -0.7971574213160249, -0.1764794100111047, 0.3564275916066595, + // -0.3754449946313438, 0.6580947137446858, + // -0.3328857144699515, 0.1761214464164236, 0.1429774118511240 + VLOAD_64(v16, 0xbfd6543dea86cb60, 0xbfdccf40105d6e5c, 0x3febf098bf37400c, + 0x3fd4d756ceb279f4, 0x3fe4ceec35a6a266, 0xbfd448ad61fd7c88, + 0xbfe144f8f7861540, 0xbfe1564491a616b8, 0xbfe9825047ca1cd6, + 0xbfc696e097352100, 0x3fd6cfb5ac55edec, 0xbfd8074a7158dd78, + 0x3fe50f1ca5268668, 0xbfd54dffe23d0eec, 0x3fc68b25c63dcaf0, + 0x3fc24d1575fbd080); + double dscalar_64; + // 0.9108707261227378 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fed25da5d7296fe); + VLOAD_8(v0, 0x0F, 0xAA); + asm volatile("vfmerge.vfm v8, v16, %[A], v0" ::[A] "f"(dscalar_64)); + // 0.9108707261227378, 0.9108707261227378, 0.9108707261227378, + // 0.9108707261227378, 0.6502591178769535, + // -0.3169358689246526, -0.5396694979141685, + // -0.5417807430937591 -0.7971574213160249, + // 0.9108707261227378, 0.3564275916066595, 0.9108707261227378, + // 0.6580947137446858, 0.9108707261227378, 0.1761214464164236, + // 0.9108707261227378 + VCMP_U64(3, v8, 0x3fed25da5d7296fe, 0x3fed25da5d7296fe, 0x3fed25da5d7296fe, + 0x3fed25da5d7296fe, 0x3fe4ceec35a6a266, 0xbfd448ad61fd7c88, + 0xbfe144f8f7861540, 0xbfe1564491a616b8, 0xbfe9825047ca1cd6, + 0x3fed25da5d7296fe, 0x3fd6cfb5ac55edec, 0x3fed25da5d7296fe, + 0x3fe50f1ca5268668, 0x3fed25da5d7296fe, 0x3fc68b25c63dcaf0, + 0x3fed25da5d7296fe); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmin.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmin.c new file mode 100644 index 000000000..401879889 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmin.c @@ -0,0 +1,348 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values + 1 subnormal +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 0.9390, 0.9619, 0.9121, 0.6265, 0.1193, -0.4492, -0.3562, + // 0.2365, -0.9897, 0.8638, -0.0379, -0.6201, 0.1809, 0.9824, + // -0.9922, -0.6851 + VLOAD_16(v4, 0x3b83, 0x3bb2, 0x3b4c, 0x3903, 0x2fa2, 0xb730, 0xb5b3, 0x3391, + 0xbbeb, 0x3ae9, 0xa8da, 0xb8f6, 0x31ca, 0x3bdc, 0xbbf0, 0xb97b); + // 0.9795, -0.1069, 0.7070, -0.7305, 0.0516, -0.1321, 0.3828, + // 0.0230, -0.9424, -0.8652, -0.3865, -0.1719, -0.7021, 0.1664, + // 0.7026, -0.8535 + VLOAD_16(v6, 0x3bd6, 0xaed8, 0x39a8, 0xb9d8, 0x2a9a, 0xb03a, 0x3620, 0x25e2, + 0xbb8a, 0xbaec, 0xb62f, 0xb180, 0xb99e, 0x3153, 0x399f, 0xbad4); + asm volatile("vfmin.vv v2, v4, v6"); + // 0.9390, -0.1069, 0.7070, -0.7305, 0.0516, -0.4492, -0.3562, + // 0.0230, -0.9897, -0.8652, -0.3865, -0.6201, -0.7021, 0.1664, + // -0.9922, -0.8535 + VCMP_U16(1, v2, 0x3b83, 0xaed8, 0x39a8, 0xb9d8, 0x2a9a, 0xb730, 0xb5b3, + 0x25e2, 0xbbeb, 0xbaec, 0xb62f, 0xb8f6, 0xb99e, 0x3153, 0xbbf0, + 0xbad4); + + VSET(16, e32, m4); + // 0.33477312, -0.14129849, -0.94871885, 0.83600986, + // -0.28163233, -0.47814348, 0.77408481, -0.54823470, + // -0.72419900, 0.27495387, -0.76835793, 0.71516198, + // 0.32305571, -0.76598656, -0.36499983, -0.52954155 + VLOAD_32(v8, 0x3eab6762, 0xbe10b08d, 0xbf72df3d, 0x3f5604be, 0xbe90321d, + 0xbef4cf39, 0x3f462a6c, 0xbf0c591c, 0xbf39651b, 0x3e8cc6c1, + 0xbf44b31b, 0x3f3714db, 0x3ea5678f, 0xbf4417b2, 0xbebae142, + 0xbf079009); + // 0.87184614, 0.40005061, 0.40118238, 0.97373396, + // 0.74085194, -0.99458516, -0.73125440, -0.46319291, + // -0.76140571, -0.82557100, 0.15205561, 0.39971715, + // -0.32876521, -0.53106725, 0.84727478, 0.21940185 + VLOAD_32(v12, 0x3f5f314f, 0x3eccd36f, 0x3ecd67c7, 0x3f7946a1, 0x3f3da879, + 0xbf7e9d22, 0xbf3b337d, 0xbeed279f, 0xbf42eb7c, 0xbf53589f, + 0x3e1bb477, 0x3ecca7ba, 0xbea853ea, 0xbf07f406, 0x3f58e700, + 0x3e60aae1); + asm volatile("vfmin.vv v4, v8, v12"); + // 0.33477312, -0.14129849, -0.94871885, 0.83600986, + // -0.28163233, -0.99458516, -0.73125440, -0.54823470, + // -0.76140571, -0.82557100, -0.76835793, 0.39971715, + // -0.32876521, -0.76598656, -0.36499983, -0.52954155 + VCMP_U32(2, v4, 0x3eab6762, 0xbe10b08d, 0xbf72df3d, 0x3f5604be, 0xbe90321d, + 0xbf7e9d22, 0xbf3b337d, 0xbf0c591c, 0xbf42eb7c, 0xbf53589f, + 0xbf44b31b, 0x3ecca7ba, 0xbea853ea, 0xbf4417b2, 0xbebae142, + 0xbf079009); + + VSET(16, e64, m8); + // 0.9387726994461698, 0.8517969615002949, -0.8864275043807637, + // 0.3621349692771021, 0.5392486258321831, -0.1288714247798126, + // -0.9149173505741688, -0.9378576380992047, + // -0.2263428385339852, 0.1016628884386184, 0.4783549203499486, + // 0.5394596797016060, 0.7861587828590215, 0.0194772848204161, + // -0.9126826319328591, 0.3997583898469530 + VLOAD_64(v16, 0x3fee0a6d0b4ff74a, 0x3feb41ebb38f3ae2, 0xbfec5d9d36b2e38c, + 0x3fd72d3826721e9c, 0x3fe14186558b96e0, 0xbfc07edbdd68bb68, + 0xbfed4700c06849e8, 0xbfee02ee057e1390, 0xbfccf8cd5897f8a0, + 0x3fba06943d0f8e20, 0x3fde9d5df4b22860, 0x3fe14340f23a8770, + 0x3fe9283676baf718, 0x3f93f1da754635c0, 0xbfed34b234f8ec38, + 0x3fd995a436ac6f1c); + // 0.4808082103120717, 0.7218925128932789, -0.9454618185734458, + // 0.7335258472548418, 0.9800819535502201, -0.6873536121819364, + // -0.7090903925273744, 0.7813319828098306, 0.6810234154055235, + // 0.1176441554686278, 0.4929731878752270, 0.0942028280153233, + // 0.9496420237972776, -0.4549651855719854, -0.9663401540020158, + // 0.4114236885680320 + VLOAD_64(v24, 0x3fdec58fccbc12a4, 0x3fe719be53c35314, 0xbfee413924cc77e4, + 0x3fe7790b32975e1a, 0x3fef5cd4d43cbc4e, 0xbfe5fecd00a37bfa, + 0xbfe6b0de55ba0314, 0x3fe900abee2f95f8, 0x3fe5caf19e1f4324, + 0x3fbe1ded684da4d0, 0x3fdf8cdf69eea758, 0x3fb81dad31843b10, + 0x3fee6377ab63bade, 0xbfdd1e264c366a78, 0xbfeeec422fc80224, + 0x3fda54c405ccc2c0); + asm volatile("vfmin.vv v8, v16, v24"); + // 0.4808082103120717, 0.7218925128932789, -0.9454618185734458, + // 0.3621349692771021, 0.5392486258321831, -0.6873536121819364, + // -0.9149173505741688, -0.9378576380992047, + // -0.2263428385339852, 0.1016628884386184, 0.4783549203499486, + // 0.0942028280153233, 0.7861587828590215, -0.4549651855719854, + // -0.9663401540020158, 0.3997583898469530 + VCMP_U64(3, v8, 0x3fdec58fccbc12a4, 0x3fe719be53c35314, 0xbfee413924cc77e4, + 0x3fd72d3826721e9c, 0x3fe14186558b96e0, 0xbfe5fecd00a37bfa, + 0xbfed4700c06849e8, 0xbfee02ee057e1390, 0xbfccf8cd5897f8a0, + 0x3fba06943d0f8e20, 0x3fde9d5df4b22860, 0x3fb81dad31843b10, + 0x3fe9283676baf718, 0xbfdd1e264c366a78, 0xbfeeec422fc80224, + 0x3fd995a436ac6f1c); +}; + +// Simple random test with similar values + 1 subnormal (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // 0.9390, 0.9619, 0.9121, 0.6265, 0.1193, -0.4492, -0.3562, + // 0.2365, -0.9897, 0.8638, -0.0379, -0.6201, 0.1809, 0.9824, + // -0.9922, -0.6851 + VLOAD_16(v4, 0x3b83, 0x3bb2, 0x3b4c, 0x3903, 0x2fa2, 0xb730, 0xb5b3, 0x3391, + 0xbbeb, 0x3ae9, 0xa8da, 0xb8f6, 0x31ca, 0x3bdc, 0xbbf0, 0xb97b); + // 0.9795, -0.1069, 0.7070, -0.7305, 0.0516, -0.1321, 0.3828, + // 0.0230, -0.9424, -0.8652, -0.3865, -0.1719, -0.7021, 0.1664, + // 0.7026, -0.8535 + VLOAD_16(v6, 0x3bd6, 0xaed8, 0x39a8, 0xb9d8, 0x2a9a, 0xb03a, 0x3620, 0x25e2, + 0xbb8a, 0xbaec, 0xb62f, 0xb180, 0xb99e, 0x3153, 0x399f, 0xbad4); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfmin.vv v2, v4, v6, v0.t"); + // 0.0000, -0.1069, 0.0000, -0.7305, 0.0000, -0.4492, 0.0000, + // 0.0230, 0.0000, -0.8652, 0.0000, -0.6201, 0.0000, 0.1664, + // 0.0000, -0.8535 + VCMP_U16(4, v2, 0x0, 0xaed8, 0x0, 0xb9d8, 0x0, 0xb730, 0x0, 0x25e2, 0x0, + 0xbaec, 0x0, 0xb8f6, 0x0, 0x3153, 0x0, 0xbad4); + + VSET(16, e32, m4); + // 0.33477312, -0.14129849, -0.94871885, 0.83600986, + // -0.28163233, -0.47814348, 0.77408481, -0.54823470, + // -0.72419900, 0.27495387, -0.76835793, 0.71516198, + // 0.32305571, -0.76598656, -0.36499983, -0.52954155 + VLOAD_32(v8, 0x3eab6762, 0xbe10b08d, 0xbf72df3d, 0x3f5604be, 0xbe90321d, + 0xbef4cf39, 0x3f462a6c, 0xbf0c591c, 0xbf39651b, 0x3e8cc6c1, + 0xbf44b31b, 0x3f3714db, 0x3ea5678f, 0xbf4417b2, 0xbebae142, + 0xbf079009); + // 0.87184614, 0.40005061, 0.40118238, 0.97373396, + // 0.74085194, -0.99458516, -0.73125440, -0.46319291, + // -0.76140571, -0.82557100, 0.15205561, 0.39971715, + // -0.32876521, -0.53106725, 0.84727478, 0.21940185 + VLOAD_32(v12, 0x3f5f314f, 0x3eccd36f, 0x3ecd67c7, 0x3f7946a1, 0x3f3da879, + 0xbf7e9d22, 0xbf3b337d, 0xbeed279f, 0xbf42eb7c, 0xbf53589f, + 0x3e1bb477, 0x3ecca7ba, 0xbea853ea, 0xbf07f406, 0x3f58e700, + 0x3e60aae1); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfmin.vv v4, v8, v12, v0.t"); + // 0.00000000, -0.14129849, 0.00000000, 0.83600986, + // 0.00000000, -0.99458516, 0.00000000, -0.54823470, + // 0.00000000, -0.82557100, 0.00000000, 0.39971715, + // 0.00000000, -0.76598656, 0.00000000, -0.52954155 + VCMP_U32(5, v4, 0x0, 0xbe10b08d, 0x0, 0x3f5604be, 0x0, 0xbf7e9d22, 0x0, + 0xbf0c591c, 0x0, 0xbf53589f, 0x0, 0x3ecca7ba, 0x0, 0xbf4417b2, 0x0, + 0xbf079009); + + VSET(16, e64, m8); + // 0.9387726994461698, 0.8517969615002949, -0.8864275043807637, + // 0.3621349692771021, 0.5392486258321831, -0.1288714247798126, + // -0.9149173505741688, -0.9378576380992047, + // -0.2263428385339852, 0.1016628884386184, 0.4783549203499486, + // 0.5394596797016060, 0.7861587828590215, 0.0194772848204161, + // -0.9126826319328591, 0.3997583898469530 + VLOAD_64(v16, 0x3fee0a6d0b4ff74a, 0x3feb41ebb38f3ae2, 0xbfec5d9d36b2e38c, + 0x3fd72d3826721e9c, 0x3fe14186558b96e0, 0xbfc07edbdd68bb68, + 0xbfed4700c06849e8, 0xbfee02ee057e1390, 0xbfccf8cd5897f8a0, + 0x3fba06943d0f8e20, 0x3fde9d5df4b22860, 0x3fe14340f23a8770, + 0x3fe9283676baf718, 0x3f93f1da754635c0, 0xbfed34b234f8ec38, + 0x3fd995a436ac6f1c); + // 0.4808082103120717, 0.7218925128932789, -0.9454618185734458, + // 0.7335258472548418, 0.9800819535502201, -0.6873536121819364, + // -0.7090903925273744, 0.7813319828098306, 0.6810234154055235, + // 0.1176441554686278, 0.4929731878752270, 0.0942028280153233, + // 0.9496420237972776, -0.4549651855719854, -0.9663401540020158, + // 0.4114236885680320 + VLOAD_64(v24, 0x3fdec58fccbc12a4, 0x3fe719be53c35314, 0xbfee413924cc77e4, + 0x3fe7790b32975e1a, 0x3fef5cd4d43cbc4e, 0xbfe5fecd00a37bfa, + 0xbfe6b0de55ba0314, 0x3fe900abee2f95f8, 0x3fe5caf19e1f4324, + 0x3fbe1ded684da4d0, 0x3fdf8cdf69eea758, 0x3fb81dad31843b10, + 0x3fee6377ab63bade, 0xbfdd1e264c366a78, 0xbfeeec422fc80224, + 0x3fda54c405ccc2c0); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfmin.vv v8, v16, v24, v0.t"); + // 0.0000000000000000, 0.7218925128932789, 0.0000000000000000, + // 0.3621349692771021, 0.0000000000000000, -0.6873536121819364, + // 0.0000000000000000, -0.9378576380992047, 0.0000000000000000, + // 0.1016628884386184, 0.0000000000000000, 0.0942028280153233, + // 0.0000000000000000, -0.4549651855719854, 0.0000000000000000, + // 0.3997583898469530 + VCMP_U64(6, v8, 0x0, 0x3fe719be53c35314, 0x0, 0x3fd72d3826721e9c, 0x0, + 0xbfe5fecd00a37bfa, 0x0, 0xbfee02ee057e1390, 0x0, 0x3fba06943d0f8e20, + 0x0, 0x3fb81dad31843b10, 0x0, 0xbfdd1e264c366a78, 0x0, + 0x3fd995a436ac6f1c); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // 0.4434 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x3718); + // -0.2537, 0.5449, 0.2070, -0.5752, -0.3008, 0.0165, -0.8447, + // 0.6279, 0.6802, 0.7300, 0.7720, -0.8525, 0.5264, -0.5249, + // -0.9839, 0.4875 + VLOAD_16(v4, 0xb40f, 0x385c, 0x32a0, 0xb89a, 0xb4d0, 0x2437, 0xbac2, 0x3906, + 0x3971, 0x39d7, 0x3a2d, 0xbad2, 0x3836, 0xb833, 0xbbdf, 0x37cd); + asm volatile("vfmin.vf v2, v4, %[A]" ::[A] "f"(dscalar_16)); + // -0.2537, 0.4434, 0.2070, -0.5752, -0.3008, 0.0165, -0.8447, + // 0.4434, 0.4434, 0.4434, 0.4434, -0.8525, 0.4434, -0.5249, + // -0.9839, 0.4434 + VCMP_U16(7, v2, 0xb40f, 0x3718, 0x32a0, 0xb89a, 0xb4d0, 0x2437, 0xbac2, + 0x3718, 0x3718, 0x3718, 0x3718, 0xbad2, 0x3718, 0xb833, 0xbbdf, + 0x3718); + + VSET(16, e32, m4); + double dscalar_32; + // 0.59499639 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f1851af); + // -0.94244474, -0.52559608, -0.72424960, -0.67824948, + // 0.22003150, -0.67564118, -0.90376341, 0.16465612, + // -0.15494362, -0.01763406, 0.97777683, -0.91671157, + // 0.81712914, -0.10151604, 0.03442690, -0.14597759 + VLOAD_32(v8, 0xbf71440f, 0xbf068d77, 0xbf39686c, 0xbf2da1c2, 0x3e614ff0, + 0xbf2cf6d2, 0xbf675d0a, 0x3e289b9d, 0xbe1ea98a, 0xbc90754e, + 0x3f7a4f95, 0xbf6aad9c, 0x3f512f60, 0xbdcfe7a4, 0x3d0d0338, + 0xbe157b26); + asm volatile("vfmin.vf v4, v8, %[A]" ::[A] "f"(dscalar_32)); + // -0.94244474, -0.52559608, -0.72424960, -0.67824948, + // 0.22003150, -0.67564118, -0.90376341, 0.16465612, + // -0.15494362, -0.01763406, 0.59499639, -0.91671157, + // 0.59499639, -0.10151604, 0.03442690, -0.14597759 + VCMP_U32(8, v4, 0xbf71440f, 0xbf068d77, 0xbf39686c, 0xbf2da1c2, 0x3e614ff0, + 0xbf2cf6d2, 0xbf675d0a, 0x3e289b9d, 0xbe1ea98a, 0xbc90754e, + 0x3f1851af, 0xbf6aad9c, 0x3f1851af, 0xbdcfe7a4, 0x3d0d0338, + 0xbe157b26); + + VSET(16, e64, m8); + double dscalar_64; + // 0.8631130564395617 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3feb9e9f45c51298); + // -0.0203711476424431, 0.4824066080711997, 0.5516514149436702, + // -0.0992829085793798, 0.7425996730256406, 0.3080149644930992, + // -0.6753031265127754, -0.3309631180416657, + // -0.7695072044924456, -0.6726760621514143, + // -0.9995830020822822, 0.2485224245452053, 0.7025040357726613, + // -0.6452676560401207, 0.5090044889036880, 0.0801949752856408 + VLOAD_64(v16, 0xbf94dc2c93a00800, 0x3fdedfbff74290e0, 0x3fe1a720de3f34c0, + 0xbfb96a9acd667320, 0x3fe7c36063b54b1e, 0x3fd3b68465cb4b28, + 0xbfe59c154d684914, 0xbfd52e7fee0af3fc, 0xbfe89fcd92aa9b24, + 0xbfe5868ff2f7c1a4, 0xbfeffc957df296c0, 0x3fcfcf9532df44b8, + 0x3fe67ae9be5e7376, 0xbfe4a6085afb7c12, 0x3fe049c3c82b791e, + 0x3fb487a86c27c560); + asm volatile("vfmin.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); + // -0.0203711476424431, 0.4824066080711997, 0.5516514149436702, + // -0.0992829085793798, 0.7425996730256406, 0.3080149644930992, + // -0.6753031265127754, -0.3309631180416657, -0.7695072044924456, + // -0.6726760621514143, -0.9995830020822822, 0.2485224245452053, + // 0.7025040357726613, -0.6452676560401207, 0.5090044889036880, + // 0.0801949752856408 + VCMP_U64(9, v8, 0xbf94dc2c93a00800, 0x3fdedfbff74290e0, 0x3fe1a720de3f34c0, + 0xbfb96a9acd667320, 0x3fe7c36063b54b1e, 0x3fd3b68465cb4b28, + 0xbfe59c154d684914, 0xbfd52e7fee0af3fc, 0xbfe89fcd92aa9b24, + 0xbfe5868ff2f7c1a4, 0xbfeffc957df296c0, 0x3fcfcf9532df44b8, + 0x3fe67ae9be5e7376, 0xbfe4a6085afb7c12, 0x3fe049c3c82b791e, + 0x3fb487a86c27c560); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // 0.4434 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x3718); + // -0.2537, 0.5449, 0.2070, -0.5752, -0.3008, 0.0165, + // -0.8447, 0.6279, 0.6802, 0.7300, 0.7720, -0.8525, 0.5264, + // -0.5249, -0.9839, 0.4875 + VLOAD_16(v4, 0xb40f, 0x385c, 0x32a0, 0xb89a, 0xb4d0, 0x2437, 0xbac2, 0x3906, + 0x3971, 0x39d7, 0x3a2d, 0xbad2, 0x3836, 0xb833, 0xbbdf, 0x37cd); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfmin.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + // 0.0000, 0.4434, 0.0000, -0.5752, 0.0000, 0.0165, 0.0000, + // 0.4434, 0.0000, 0.4434, 0.0000, -0.8525, 0.0000, + // -0.5249, 0.0000, 0.4434 + VCMP_U16(10, v2, 0x0, 0x3718, 0x0, 0xb89a, 0x0, 0x2437, 0x0, 0x3718, 0x0, + 0x3718, 0x0, 0xbad2, 0x0, 0xb833, 0x0, 0x3718); + + VSET(16, e32, m4); + double dscalar_32; + // 0.59499639 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f1851af); + // -0.94244474, -0.52559608, -0.72424960, -0.67824948, + // 0.22003150, -0.67564118, -0.90376341, 0.16465612, + // -0.15494362, -0.01763406, 0.97777683, -0.91671157, + // 0.81712914, -0.10151604, 0.03442690, -0.14597759 + VLOAD_32(v8, 0xbf71440f, 0xbf068d77, 0xbf39686c, 0xbf2da1c2, 0x3e614ff0, + 0xbf2cf6d2, 0xbf675d0a, 0x3e289b9d, 0xbe1ea98a, 0xbc90754e, + 0x3f7a4f95, 0xbf6aad9c, 0x3f512f60, 0xbdcfe7a4, 0x3d0d0338, + 0xbe157b26); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfmin.vf v4, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + // 0.00000000, -0.52559608, 0.00000000, -0.67824948, + // 0.00000000, -0.67564118, 0.00000000, 0.16465612, + // 0.00000000, -0.01763406, 0.00000000, -0.91671157, + // 0.00000000, -0.10151604, 0.00000000, -0.14597759 + VCMP_U32(11, v4, 0x0, 0xbf068d77, 0x0, 0xbf2da1c2, 0x0, 0xbf2cf6d2, 0x0, + 0x3e289b9d, 0x0, 0xbc90754e, 0x0, 0xbf6aad9c, 0x0, 0xbdcfe7a4, 0x0, + 0xbe157b26); + + VSET(16, e64, m8); + double dscalar_64; + // 0.8631130564395617 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3feb9e9f45c51298); + // -0.0203711476424431, 0.4824066080711997, + // 0.5516514149436702, -0.0992829085793798, 0.7425996730256406, + // 0.3080149644930992, -0.6753031265127754, + // -0.3309631180416657, -0.7695072044924456, + // -0.6726760621514143, -0.9995830020822822, + // 0.2485224245452053, 0.7025040357726613, + // -0.6452676560401207, 0.5090044889036880, 0.0801949752856408 + VLOAD_64(v16, 0xbf94dc2c93a00800, 0x3fdedfbff74290e0, 0x3fe1a720de3f34c0, + 0xbfb96a9acd667320, 0x3fe7c36063b54b1e, 0x3fd3b68465cb4b28, + 0xbfe59c154d684914, 0xbfd52e7fee0af3fc, 0xbfe89fcd92aa9b24, + 0xbfe5868ff2f7c1a4, 0xbfeffc957df296c0, 0x3fcfcf9532df44b8, + 0x3fe67ae9be5e7376, 0xbfe4a6085afb7c12, 0x3fe049c3c82b791e, + 0x3fb487a86c27c560); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfmin.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); + // 0.0000000000000000, 0.4824066080711997, 0.0000000000000000, + // -0.0992829085793798, 0.0000000000000000, + // 0.3080149644930992, 0.0000000000000000, + // -0.3309631180416657, 0.0000000000000000, + // -0.6726760621514143, 0.0000000000000000, + // 0.2485224245452053, 0.0000000000000000, + // -0.6452676560401207, 0.0000000000000000, 0.0801949752856408 + VCMP_U64(12, v8, 0x0, 0x3fdedfbff74290e0, 0x0, 0xbfb96a9acd667320, 0x0, + 0x3fd3b68465cb4b28, 0x0, 0xbfd52e7fee0af3fc, 0x0, 0xbfe5868ff2f7c1a4, + 0x0, 0x3fcfcf9532df44b8, 0x0, 0xbfe4a6085afb7c12, 0x0, + 0x3fb487a86c27c560); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmsac.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmsac.c new file mode 100644 index 000000000..dedcbaf58 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmsac.c @@ -0,0 +1,454 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 0.9175, 0.0740, -0.0362, 0.2961, 0.6226, -0.8032, 0.7749, + // 0.8188, 0.2019, 0.4885, 0.5669, -0.1743, 0.4404, 0.0618, + // 0.8252, -0.5947 + VLOAD_16(v4, 0x3b57, 0x2cbc, 0xa8a3, 0x34bd, 0x38fb, 0xba6d, 0x3a33, 0x3a8d, + 0x3276, 0x37d1, 0x3889, 0xb194, 0x370c, 0x2bea, 0x3a9a, 0xb8c2); + // 0.2812, -0.6733, 0.3289, 0.9609, -0.6841, 0.5488, -0.1401, + // 0.5591, 0.2759, -0.6973, 0.1418, 0.2389, -0.3308, 0.8867, + // 0.8936, 0.3611 + VLOAD_16(v6, 0x3480, 0xb963, 0x3543, 0x3bb0, 0xb979, 0x3864, 0xb07c, 0x3879, + 0x346a, 0xb994, 0x308a, 0x33a5, 0xb54b, 0x3b18, 0x3b26, 0x35c7); + // 0.4780, -0.1810, 0.2800, 0.7529, 0.6484, -0.9526, + // 0.5947, 1.0000, -0.1874, 0.2949, -0.4280, 0.6934, 0.5444, + // 0.8823, -0.8911, 0.7603 + VLOAD_16(v2, 0x37a6, 0xb1cb, 0x347b, 0x3a06, 0x3930, 0xbb9f, 0x38c2, 0x3c00, + 0xb1ff, 0x34b8, 0xb6d9, 0x398c, 0x385b, 0x3b0f, 0xbb21, 0x3a15); + asm volatile("vfmsac.vv v2, v4, v6"); + // -0.2200, 0.1312, -0.2920, -0.4683, -1.0742, 0.5117, -0.7031, + // -0.5420, 0.2430, -0.6357, 0.5083, -0.7349, -0.6904, -0.8276, + // 1.6289, -0.9751 + VCMP_U16(1, v2, 0xb30a, 0x3033, 0xb4ac, 0xb77e, 0xbc4c, 0x3818, 0xb9a0, + 0xb856, 0x33c7, 0xb916, 0x3811, 0xb9e1, 0xb985, 0xba9f, 0x3e84, + 0xbbcd); + + VSET(16, e32, m4); + // -0.90310860, 0.30282700, 0.54854167, -0.38732994, + // 0.92121714, 0.99595129, -0.10263380, 0.83759040, + // -0.23468767, 0.03914077, -0.46234205, 0.38326120, + // 0.36417511, -0.50103557, 0.36991179, 0.44718841 + VLOAD_32(v8, 0xbf673220, 0x3e9b0c24, 0x3f0c6d3a, 0xbec6501c, 0x3f6bd4e3, + 0x3f7ef6aa, 0xbdd231ab, 0x3f566c53, 0xbe7051f7, 0x3d205212, + 0xbeecb819, 0x3ec43ad0, 0x3eba7529, 0xbf0043de, 0x3ebd6514, + 0x3ee4f5e1); + // 0.84989786, -0.04543342, -0.74596256, -0.30687407, + // -0.30795863, 0.57084304, 0.51653886, -0.97366458, + // 0.49300706, 0.62932760, 0.45846274, -0.73850167, + // -0.42686453, -0.31419462, -0.47245970, -0.87721694 + VLOAD_32(v12, 0x3f5992e8, 0xbd3a1866, 0xbf3ef767, 0xbe9d1e99, 0xbe9dacc1, + 0x3f1222c5, 0x3f043be4, 0xbf794215, 0x3efc6b6c, 0x3f211b9d, + 0x3eeabba1, 0xbf3d0e72, 0xbeda8dfd, 0xbea0de1e, 0xbef1e63d, + 0xbf60914a); + // -0.76813585, 0.87161541, -0.67958647, -0.98584491, + // 0.12284227, -0.04006640, -0.93113720, -0.93526161, + // -0.27461481, 0.64110506, 0.61687475, -0.43741968, + // 0.70502371, 0.37014356, -0.98105848, 0.77993429 + VLOAD_32(v4, 0xbf44a48d, 0x3f5f2230, 0xbf2df961, 0xbf7c6055, 0x3dfb94bb, + 0xbd241caa, 0xbf6e5f02, 0xbf6f6d4e, 0xbe8c9a50, 0x3f241f76, + 0x3f1deb81, 0xbedff579, 0x3f347c6f, 0x3ebd8375, 0xbf7b26a6, + 0x3f47a9c6); + asm volatile("vfmsac.vv v4, v8, v12"); + // 0.00058579, -0.88537389, 0.27039492, 1.10470641, + // -0.40653905, 0.60859829, 0.87812287, 0.11972952, + // 0.15891212, -0.61647266, -0.82884133, 0.15438065, + // -0.86047715, -0.21272089, 0.80629003, -1.17221546 + VCMP_U32(2, v4, 0x3a198f11, 0xbf62a7dd, 0x3e8a7134, 0x3f8d6705, 0xbed025e3, + 0x3f1bcd19, 0x3f60cca9, 0x3df534be, 0x3e22b9dd, 0xbf1dd127, + 0xbf542ef3, 0x3e1e15f6, 0xbf5c483b, 0xbe59d381, 0x3f4e6907, + 0xbf960b29); + + VSET(16, e64, m8); + // 0.0971325394189311, 0.6403859199401045, 0.3478142243141771, + // -0.4702414117546168, 0.8862438155310881, 0.6157878617136987, + // -0.9954501284062294, -0.2761157935600853, + // -0.7189549700888722, -0.2302799669824283, 0.0093666993515229, + // 0.9188774299961215, -0.4297410504980956, 0.2729294776457381, + // -0.1419575372981836, -0.8472908703054822 + VLOAD_64(v16, 0x3fb8ddad982e8680, 0x3fe47e0a9cdec59e, 0x3fd6429697a0d4f8, + 0xbfde186f6f2d8030, 0x3fec5c1bfd7f9ffe, 0x3fe3b488beeab100, + 0xbfefdaba3a49b85e, 0xbfd1abe193cffa54, 0xbfe701adda7a81f6, + 0xbfcd79d05f8e86d0, 0x3f832ed91b170d00, 0x3fed6771a3dbb538, + 0xbfdb80e09b68d514, 0x3fd177ad33269468, 0xbfc22baa220ee628, + 0xbfeb1d01be452a62); + // -0.2416734667201210, -0.2737616510555549, 0.6084509432766920, + // -0.4000545529138850, 0.5985258122916897, -0.9559409603601607, + // 0.5010970610326939, 0.5772808284477746, -0.4551243154247406, + // 0.6584804564152213, 0.6542532086910551, -0.2215058802905889, + // 0.3203723346938081, 0.0696368102348055, 0.8882580549203218, + // 0.7725843936650791 + VLOAD_64(v24, 0xbfceef27f9efdac8, 0xbfd1854f968baf44, 0x3fe3786e1cd2fff2, + 0xbfd99a7e695862ec, 0x3fe3271f9ab3593a, 0xbfee97117f34eb4c, + 0x3fe008fcb4283a76, 0x3fe27915a4d94fb2, 0xbfdd20c1bc974608, + 0x3fe512459b2b7912, 0x3fe4efa46cd43256, 0xbfcc5a4dffdc2170, + 0x3fd480faf7036588, 0x3fb1d3b7ce8e6640, 0x3fec6c9c280952c6, + 0x3fe8b902e80620ce); + // -0.8982912058335177, 0.5582779858188844, -0.3988318240568800, + // 0.0267896464795028, 0.8241806039831361, 0.2839220639224551, + // -0.4781090814672235, -0.1240154287362147, 0.4586341020154134, + // -0.6132901056934972, 0.1207753636997857, 0.4420874266235846, + // 0.8256868703569773, 0.0642541522901756, -0.3012484644971416, + // 0.7323810741358745 + VLOAD_64(v8, 0xbfecbecd32eadc10, 0x3fe1dd69cb65674e, 0xbfd98675ea3b69b0, + 0x3f9b6ebebe00e300, 0x3fea5fb000835cf4, 0x3fd22bc772ca399c, + 0xbfde9956d534a0f8, 0xbfbfbf79a29f1810, 0x3fdd5a42d93f2348, + 0xbfe3a012925d3f1c, 0x3fbeeb225d40ca30, 0x3fdc4b290fd48cd4, + 0x3fea6c06df1d6f14, 0x3fb072f5cab7f020, 0xbfd347a7a3bf1174, + 0x3fe76faa6f33ef10); + asm volatile("vfmsac.vv v8, v16, v24"); + // 0.8748168483008159, -0.7335910925744179, 0.6104597169258920, + // 0.1613325712615846, -0.2937408044039052, -0.8725789038271781, + // -0.0207080522817558, -0.0353809253176660, + // -0.1314202134325006, 0.4616552479316257, + // -0.1146471705942074, -0.6456241806340295, + // -0.9633640140188219, -0.0452482140478748, 0.1751535385353780, + // -1.3869847774287927 + VCMP_U64(3, v8, 0x3febfe7fe72e2334, 0xbfe7799406e7cf1f, 0x3fe388e2d0f71ba3, + 0x3fc4a68bb2ac8e62, 0xbfd2cca63b1a97a5, 0xbfebec2a97e3c096, + 0xbf95347ddd418906, 0xbfa21d72da487f01, 0xbfc0d260a75ceb46, + 0x3fdd8bc273f9289e, 0xbfbd59845847f323, 0xbfe4a8f40aaa8efa, + 0xbfeed3e0c4cb54ff, 0xbfa72ac61f1378a7, 0x3fc66b6e5fe4e141, + 0xbff63116f331b43d); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // 0.9175, 0.0740, -0.0362, 0.2961, 0.6226, -0.8032, 0.7749, + // 0.8188, 0.2019, 0.4885, 0.5669, -0.1743, 0.4404, 0.0618, + // 0.8252, -0.5947 + VLOAD_16(v4, 0x3b57, 0x2cbc, 0xa8a3, 0x34bd, 0x38fb, 0xba6d, 0x3a33, 0x3a8d, + 0x3276, 0x37d1, 0x3889, 0xb194, 0x370c, 0x2bea, 0x3a9a, 0xb8c2); + // 0.2812, -0.6733, 0.3289, 0.9609, -0.6841, 0.5488, -0.1401, + // 0.5591, 0.2759, -0.6973, 0.1418, 0.2389, -0.3308, 0.8867, + // 0.8936, 0.3611 + VLOAD_16(v6, 0x3480, 0xb963, 0x3543, 0x3bb0, 0xb979, 0x3864, 0xb07c, 0x3879, + 0x346a, 0xb994, 0x308a, 0x33a5, 0xb54b, 0x3b18, 0x3b26, 0x35c7); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.4780, -0.1810, 0.2800, 0.7529, 0.6484, -0.9526, + // 0.5947, 1.0000, -0.1874, 0.2949, -0.4280, 0.6934, 0.5444, + // 0.8823, -0.8911, 0.7603 + VLOAD_16(v2, 0x37a6, 0xb1cb, 0x347b, 0x3a06, 0x3930, 0xbb9f, 0x38c2, 0x3c00, + 0xb1ff, 0x34b8, 0xb6d9, 0x398c, 0x385b, 0x3b0f, 0xbb21, 0x3a15); + asm volatile("vfmsac.vv v2, v4, v6, v0.t"); + // 0.4780, 0.1312, 0.2800, -0.4683, 0.6484, 0.5117, 0.5947, + // -0.5420, -0.1874, -0.6357, -0.4280, -0.7349, 0.5444, + // -0.8276, -0.8911, -0.9751 + VCMP_U16(4, v2, 0x37a6, 0x3033, 0x347b, 0xb77e, 0x3930, 0x3818, 0x38c2, + 0xb856, 0xb1ff, 0xb916, 0xb6d9, 0xb9e1, 0x385b, 0xba9f, 0xbb21, + 0xbbcd); + + VSET(16, e32, m4); + // -0.90310860, 0.30282700, 0.54854167, -0.38732994, + // 0.92121714, 0.99595129, -0.10263380, 0.83759040, + // -0.23468767, 0.03914077, -0.46234205, 0.38326120, + // 0.36417511, -0.50103557, 0.36991179, 0.44718841 + VLOAD_32(v8, 0xbf673220, 0x3e9b0c24, 0x3f0c6d3a, 0xbec6501c, 0x3f6bd4e3, + 0x3f7ef6aa, 0xbdd231ab, 0x3f566c53, 0xbe7051f7, 0x3d205212, + 0xbeecb819, 0x3ec43ad0, 0x3eba7529, 0xbf0043de, 0x3ebd6514, + 0x3ee4f5e1); + // 0.84989786, -0.04543342, -0.74596256, -0.30687407, + // -0.30795863, 0.57084304, 0.51653886, -0.97366458, + // 0.49300706, 0.62932760, 0.45846274, -0.73850167, + // -0.42686453, -0.31419462, -0.47245970, -0.87721694 + VLOAD_32(v12, 0x3f5992e8, 0xbd3a1866, 0xbf3ef767, 0xbe9d1e99, 0xbe9dacc1, + 0x3f1222c5, 0x3f043be4, 0xbf794215, 0x3efc6b6c, 0x3f211b9d, + 0x3eeabba1, 0xbf3d0e72, 0xbeda8dfd, 0xbea0de1e, 0xbef1e63d, + 0xbf60914a); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.76813585, 0.87161541, -0.67958647, -0.98584491, + // 0.12284227, -0.04006640, -0.93113720, -0.93526161, + // -0.27461481, 0.64110506, 0.61687475, -0.43741968, + // 0.70502371, 0.37014356, -0.98105848, 0.77993429 + VLOAD_32(v4, 0xbf44a48d, 0x3f5f2230, 0xbf2df961, 0xbf7c6055, 0x3dfb94bb, + 0xbd241caa, 0xbf6e5f02, 0xbf6f6d4e, 0xbe8c9a50, 0x3f241f76, + 0x3f1deb81, 0xbedff579, 0x3f347c6f, 0x3ebd8375, 0xbf7b26a6, + 0x3f47a9c6); + asm volatile("vfmsac.vv v4, v8, v12, v0.t"); + // -0.76813585, -0.88537389, -0.67958647, 1.10470641, + // 0.12284227, 0.60859829, -0.93113720, 0.11972952, + // -0.27461481, -0.61647266, 0.61687475, 0.15438065, + // 0.70502371, -0.21272089, -0.98105848, -1.17221546 + VCMP_U32(5, v4, 0xbf44a48d, 0xbf62a7dd, 0xbf2df961, 0x3f8d6705, 0x3dfb94bb, + 0x3f1bcd19, 0xbf6e5f02, 0x3df534be, 0xbe8c9a50, 0xbf1dd127, + 0x3f1deb81, 0x3e1e15f6, 0x3f347c6f, 0xbe59d381, 0xbf7b26a6, + 0xbf960b29); + + VSET(16, e64, m8); + // 0.0971325394189311, 0.6403859199401045, 0.3478142243141771, + // -0.4702414117546168, 0.8862438155310881, 0.6157878617136987, + // -0.9954501284062294, -0.2761157935600853, + // -0.7189549700888722, -0.2302799669824283, 0.0093666993515229, + // 0.9188774299961215, -0.4297410504980956, 0.2729294776457381, + // -0.1419575372981836, -0.8472908703054822 + VLOAD_64(v16, 0x3fb8ddad982e8680, 0x3fe47e0a9cdec59e, 0x3fd6429697a0d4f8, + 0xbfde186f6f2d8030, 0x3fec5c1bfd7f9ffe, 0x3fe3b488beeab100, + 0xbfefdaba3a49b85e, 0xbfd1abe193cffa54, 0xbfe701adda7a81f6, + 0xbfcd79d05f8e86d0, 0x3f832ed91b170d00, 0x3fed6771a3dbb538, + 0xbfdb80e09b68d514, 0x3fd177ad33269468, 0xbfc22baa220ee628, + 0xbfeb1d01be452a62); + // -0.2416734667201210, -0.2737616510555549, 0.6084509432766920, + // -0.4000545529138850, 0.5985258122916897, -0.9559409603601607, + // 0.5010970610326939, 0.5772808284477746, -0.4551243154247406, + // 0.6584804564152213, 0.6542532086910551, -0.2215058802905889, + // 0.3203723346938081, 0.0696368102348055, 0.8882580549203218, + // 0.7725843936650791 + VLOAD_64(v24, 0xbfceef27f9efdac8, 0xbfd1854f968baf44, 0x3fe3786e1cd2fff2, + 0xbfd99a7e695862ec, 0x3fe3271f9ab3593a, 0xbfee97117f34eb4c, + 0x3fe008fcb4283a76, 0x3fe27915a4d94fb2, 0xbfdd20c1bc974608, + 0x3fe512459b2b7912, 0x3fe4efa46cd43256, 0xbfcc5a4dffdc2170, + 0x3fd480faf7036588, 0x3fb1d3b7ce8e6640, 0x3fec6c9c280952c6, + 0x3fe8b902e80620ce); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.8982912058335177, 0.5582779858188844, -0.3988318240568800, + // 0.0267896464795028, 0.8241806039831361, 0.2839220639224551, + // -0.4781090814672235, -0.1240154287362147, 0.4586341020154134, + // -0.6132901056934972, 0.1207753636997857, 0.4420874266235846, + // 0.8256868703569773, 0.0642541522901756, -0.3012484644971416, + // 0.7323810741358745 + VLOAD_64(v8, 0xbfecbecd32eadc10, 0x3fe1dd69cb65674e, 0xbfd98675ea3b69b0, + 0x3f9b6ebebe00e300, 0x3fea5fb000835cf4, 0x3fd22bc772ca399c, + 0xbfde9956d534a0f8, 0xbfbfbf79a29f1810, 0x3fdd5a42d93f2348, + 0xbfe3a012925d3f1c, 0x3fbeeb225d40ca30, 0x3fdc4b290fd48cd4, + 0x3fea6c06df1d6f14, 0x3fb072f5cab7f020, 0xbfd347a7a3bf1174, + 0x3fe76faa6f33ef10); + asm volatile("vfmsac.vv v8, v16, v24, v0.t"); + // -0.8982912058335177, -0.7335910925744179, -0.3988318240568800, + // 0.1613325712615846, 0.8241806039831361, -0.8725789038271781, + // -0.4781090814672235, -0.0353809253176660, 0.4586341020154134, + // 0.4616552479316257, 0.1207753636997857, -0.6456241806340295, + // 0.8256868703569773, -0.0452482140478748, -0.3012484644971416, + // -1.3869847774287927 + VCMP_U64(6, v8, 0xbfecbecd32eadc10, 0xbfe7799406e7cf1f, 0xbfd98675ea3b69b0, + 0x3fc4a68bb2ac8e62, 0x3fea5fb000835cf4, 0xbfebec2a97e3c096, + 0xbfde9956d534a0f8, 0xbfa21d72da487f01, 0x3fdd5a42d93f2348, + 0x3fdd8bc273f9289e, 0x3fbeeb225d40ca30, 0xbfe4a8f40aaa8efa, + 0x3fea6c06df1d6f14, 0xbfa72ac61f1378a7, 0xbfd347a7a3bf1174, + 0xbff63116f331b43d); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // 0.3911 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x3642); + // 0.3203, -0.8521, 0.0744, 0.9370, 0.2056, 0.2866, -0.8379, + // -0.2668, -0.0878, -0.5703, 0.1272, -0.6606, -0.6919, 0.9189, + // 0.6245, 0.9932 + VLOAD_16(v4, 0x3520, 0xbad1, 0x2cc3, 0x3b7f, 0x3294, 0x3496, 0xbab4, 0xb445, + 0xad9f, 0xb890, 0x3012, 0xb949, 0xb989, 0x3b5a, 0x38ff, 0x3bf2); + // 0.8706, 0.4900, -0.9497, 0.4727, -0.7168, 0.0167, -0.3606, + // -0.1565, -0.5142, 0.8271, -0.4783, 0.6318, 0.0842, + // -0.6646, 0.1454, -0.3020 + VLOAD_16(v2, 0x3af7, 0x37d7, 0xbb99, 0x3790, 0xb9bc, 0x2445, 0xb5c5, 0xb102, + 0xb81d, 0x3a9e, 0xb7a7, 0x390e, 0x2d63, 0xb951, 0x30a7, 0xb4d5); + asm volatile("vfmsac.vf v2, %[A], v4" ::[A] "f"(dscalar_16)); + // -0.7451, -0.8232, 0.9790, -0.1062, 0.7974, 0.0955, 0.0330, + // 0.0521, 0.4797, -1.0498, 0.5278, -0.8901, -0.3547, 1.0234, + // 0.0989, 0.6904 + VCMP_U16(7, v2, 0xb9f6, 0xba96, 0x3bd5, 0xaecc, 0x3a61, 0x2e1b, 0x2836, + 0x2aac, 0x37ad, 0xbc33, 0x3839, 0xbb1f, 0xb5ad, 0x3c19, 0x2e54, + 0x3986); + + VSET(16, e32, m4); + double dscalar_32; + // -0.39704049 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbecb48e4); + // 0.43844241, 0.13734208, 0.15601240, 0.48965997, + // -0.41457745, -0.69918746, 0.38535324, 0.83301985, + // 0.79336989, -0.03326649, -0.85931808, 0.92554229, + // -0.77742523, 0.47821125, -0.53653014, -0.32442030 + VLOAD_32(v8, 0x3ee07b86, 0x3e0ca367, 0x3e1fc1b7, 0x3efab4b6, 0xbed4437f, + 0xbf32fdf3, 0x3ec54d05, 0x3f5540ca, 0x3f4b1a4a, 0xbd084272, + 0xbf5bfc45, 0x3f6cf057, 0xbf470557, 0x3ef4d81b, 0xbf095a0a, + 0xbea61a6b); + // -0.73119336, 0.87333083, -0.16325396, -0.30275631, + // 0.34779423, 0.22721651, 0.47497734, -0.58483958, + // -0.24916913, 0.13750601, -0.99799657, 0.66137350, + // 0.58565408, 0.68887448, -0.74538875, 0.99311894 + VLOAD_32(v4, 0xbf3b2f7d, 0x3f5f929c, 0xbe272c0c, 0xbe9b02e0, 0x3eb21216, + 0x3e68ab72, 0x3ef3303b, 0xbf15b80c, 0xbe7f2631, 0x3e0cce60, + 0xbf7f7cb4, 0x3f294fc6, 0x3f15ed6d, 0x3f305a14, 0xbf3ed1cc, + 0x3f7e3d0b); + asm volatile("vfmsac.vf v4, %[A], v8" ::[A] "f"(dscalar_32)); + // 0.55711401, -0.92786121, 0.10131072, 0.10834149, + // -0.18319020, 0.05038923, -0.62797821, 0.25409698, + // -0.06583084, -0.12429786, 1.33918071, -1.02885127, + // -0.27698478, -0.87874371, 0.95841295, -0.86431098 + VCMP_U32(8, v4, 0x3f0e9f06, 0xbf6d8850, 0x3dcf7bff, 0x3ddde223, 0xbe3b9636, + 0x3d4e64ec, 0xbf20c32e, 0x3e821900, 0xbd86d252, 0xbdfe8fe1, + 0x3fab6a45, 0xbf83b166, 0xbe8dd0f3, 0xbf60f559, 0x3f755a8d, + 0xbf5d437b); + + VSET(16, e64, m8); + double dscalar_64; + // 0.0070730785066928 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3f7cf8a90de48000); + // -0.3542445595658672, 0.5861662785402695, 0.5713440701195280, + // 0.2841717566030781, 0.4022451154567073, -0.9804628417449213, + // -0.9703836833708208, 0.2288593539727362, 0.1806576644288407, + // 0.4892172254017777, 0.9508074316559227, -0.9022151172016701, + // -0.7929839752648156, 0.5513143449560454, 0.4823446191982377, + // -0.7486658065787619 + VLOAD_64(v16, 0xbfd6abf15f87c3c4, 0x3fe2c1dfc88b26c8, 0x3fe248735bfda932, + 0x3fd22fdebc43a768, 0x3fd9be624bf72ff4, 0xbfef5ff39c079aea, + 0xbfef0d621514ca4c, 0x3fcd4b43685929d8, 0x3fc71fca543f2eb8, + 0x3fdf4f55c3ef6448, 0x3fee6d03b4f830b0, 0xbfecdef23ccad0bc, + 0xbfe9601fee00766c, 0x3fe1a45dfb2cdc2e, 0x3fdedebbf736e98c, + 0xbfe7f511fe5c74b0); + // -0.7603855538897846, -0.0491604902215765, + // -0.3714656077097227, -0.6096204185796581, + // -0.2818689596683441, 0.3527700521309320, 0.1176602936422064, + // -0.2049443830034134, -0.6926950556538125, 0.7269529331298494, + // -0.2107692441818434, 0.1746722346734710, + // -0.5298547863982788, 0.2397543330794352, + // -0.8347981409736787, -0.6198539479673024 + VLOAD_64(v8, 0xbfe8551415c9d6cc, 0xbfa92b9053839560, 0xbfd7c617af2cedf8, + 0xbfe38202ae18c034, 0xbfd20a241ae21e00, 0x3fd693c8d73a46c0, + 0x3fbe1efc293b2500, 0xbfca3b9e173fd0c8, 0xbfe62a8ed24449ee, + 0x3fe74332cc30c46e, 0xbfcafa7c9161bf78, 0x3fc65ba8e7b88cc0, + 0xbfe0f4920666b5a4, 0x3fceb0451dd34270, 0xbfeab6aa9747cb24, + 0xbfe3d5d7f25a1d14); + asm volatile("vfmsac.vf v8, %[A], v16" ::[A] "f"(dscalar_64)); + // 0.7578799543094064, 0.0533064903276678, 0.3755067691720115, + // 0.6116303877234964, 0.2847140709489031, -0.3597049427834890, + // -0.1245238936163020, 0.2065631231810536, 0.6939728614971534, + // -0.7234926612877562, 0.2174943797906927, + // -0.1810536730273635, 0.5242459484866814, + // -0.2358548434356951, 0.8382098023325486, 0.6145585759420944 + VCMP_U64(9, v8, 0x3fe8408d7641b126, 0x3fab4afd013e6639, 0x3fd8084d8b414e68, + 0x3fe39279e4106415, 0x3fd238c15ddbf0a4, 0xbfd70567e15dbc9b, + 0xbfbfe0cc42a710ce, 0x3fca70a9114fa5b7, 0x3fe63506930e2352, + 0xbfe726da14e40fb7, 0x3fcbd6db1821e5e6, 0xbfc72cc44a3c91ef, + 0x3fe0c69f7079f20d, 0xbfce307dd3946ada, 0x3fead29d5d068eb6, + 0x3fe3aa76bf24b95e); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // 0.3911 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x3642); + // 0.3203, -0.8521, 0.0744, 0.9370, 0.2056, 0.2866, + // -0.8379, -0.2668, -0.0878, -0.5703, 0.1272, -0.6606, + // -0.6919, 0.9189, 0.6245, 0.9932 + VLOAD_16(v4, 0x3520, 0xbad1, 0x2cc3, 0x3b7f, 0x3294, 0x3496, 0xbab4, 0xb445, + 0xad9f, 0xb890, 0x3012, 0xb949, 0xb989, 0x3b5a, 0x38ff, 0x3bf2); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.8706, 0.4900, -0.9497, 0.4727, -0.7168, 0.0167, + // -0.3606, -0.1565, -0.5142, 0.8271, -0.4783, 0.6318, + // 0.0842, -0.6646, 0.1454, -0.3020 + VLOAD_16(v2, 0x3af7, 0x37d7, 0xbb99, 0x3790, 0xb9bc, 0x2445, 0xb5c5, 0xb102, + 0xb81d, 0x3a9e, 0xb7a7, 0x390e, 0x2d63, 0xb951, 0x30a7, 0xb4d5); + asm volatile("vfmsac.vf v2, %[A], v4, v0.t" ::[A] "f"(dscalar_16)); + // 0.8706, -0.8232, -0.9497, -0.1062, -0.7168, 0.0955, + // -0.3606, 0.0521, -0.5142, -1.0498, -0.4783, -0.8901, + // 0.0842, 1.0234, 0.1454, 0.6904 + VCMP_U16(10, v2, 0x3af7, 0xba96, 0xbb99, 0xaecc, 0xb9bc, 0x2e1b, 0xb5c5, + 0x2aac, 0xb81d, 0xbc33, 0xb7a7, 0xbb1f, 0x2d63, 0x3c19, 0x30a7, + 0x3986); + + VSET(16, e32, m4); + double dscalar_32; + // -0.39704049 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbecb48e4); + // 0.43844241, 0.13734208, 0.15601240, 0.48965997, + // -0.41457745, -0.69918746, 0.38535324, 0.83301985, + // 0.79336989, -0.03326649, -0.85931808, 0.92554229, + // -0.77742523, 0.47821125, -0.53653014, -0.32442030 + VLOAD_32(v8, 0x3ee07b86, 0x3e0ca367, 0x3e1fc1b7, 0x3efab4b6, 0xbed4437f, + 0xbf32fdf3, 0x3ec54d05, 0x3f5540ca, 0x3f4b1a4a, 0xbd084272, + 0xbf5bfc45, 0x3f6cf057, 0xbf470557, 0x3ef4d81b, 0xbf095a0a, + 0xbea61a6b); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.73119336, 0.87333083, -0.16325396, -0.30275631, + // 0.34779423, 0.22721651, 0.47497734, -0.58483958, + // -0.24916913, 0.13750601, -0.99799657, 0.66137350, + // 0.58565408, 0.68887448, -0.74538875, 0.99311894 + VLOAD_32(v4, 0xbf3b2f7d, 0x3f5f929c, 0xbe272c0c, 0xbe9b02e0, 0x3eb21216, + 0x3e68ab72, 0x3ef3303b, 0xbf15b80c, 0xbe7f2631, 0x3e0cce60, + 0xbf7f7cb4, 0x3f294fc6, 0x3f15ed6d, 0x3f305a14, 0xbf3ed1cc, + 0x3f7e3d0b); + asm volatile("vfmsac.vf v4, %[A], v8, v0.t" ::[A] "f"(dscalar_32)); + // -0.73119336, -0.92786121, -0.16325396, 0.10834149, + // 0.34779423, 0.05038923, 0.47497734, 0.25409698, + // -0.24916913, -0.12429786, -0.99799657, -1.02885127, + // 0.58565408, -0.87874371, -0.74538875, -0.86431098 + VCMP_U32(11, v4, 0xbf3b2f7d, 0xbf6d8850, 0xbe272c0c, 0x3ddde223, 0x3eb21216, + 0x3d4e64ec, 0x3ef3303b, 0x3e821900, 0xbe7f2631, 0xbdfe8fe1, + 0xbf7f7cb4, 0xbf83b166, 0x3f15ed6d, 0xbf60f559, 0xbf3ed1cc, + 0xbf5d437b); + + VSET(16, e64, m8); + double dscalar_64; + // 0.0070730785066928 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3f7cf8a90de48000); + // -0.3542445595658672, 0.5861662785402695, + // 0.5713440701195280, 0.2841717566030781, 0.4022451154567073, + // -0.9804628417449213, -0.9703836833708208, + // 0.2288593539727362, 0.1806576644288407, 0.4892172254017777, + // 0.9508074316559227, -0.9022151172016701, + // -0.7929839752648156, 0.5513143449560454, + // 0.4823446191982377, -0.7486658065787619 + VLOAD_64(v16, 0xbfd6abf15f87c3c4, 0x3fe2c1dfc88b26c8, 0x3fe248735bfda932, + 0x3fd22fdebc43a768, 0x3fd9be624bf72ff4, 0xbfef5ff39c079aea, + 0xbfef0d621514ca4c, 0x3fcd4b43685929d8, 0x3fc71fca543f2eb8, + 0x3fdf4f55c3ef6448, 0x3fee6d03b4f830b0, 0xbfecdef23ccad0bc, + 0xbfe9601fee00766c, 0x3fe1a45dfb2cdc2e, 0x3fdedebbf736e98c, + 0xbfe7f511fe5c74b0); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.7603855538897846, -0.0491604902215765, + // -0.3714656077097227, -0.6096204185796581, + // -0.2818689596683441, 0.3527700521309320, + // 0.1176602936422064, -0.2049443830034134, + // -0.6926950556538125, 0.7269529331298494, + // -0.2107692441818434, 0.1746722346734710, + // -0.5298547863982788, 0.2397543330794352, + // -0.8347981409736787, -0.6198539479673024 + VLOAD_64(v8, 0xbfe8551415c9d6cc, 0xbfa92b9053839560, 0xbfd7c617af2cedf8, + 0xbfe38202ae18c034, 0xbfd20a241ae21e00, 0x3fd693c8d73a46c0, + 0x3fbe1efc293b2500, 0xbfca3b9e173fd0c8, 0xbfe62a8ed24449ee, + 0x3fe74332cc30c46e, 0xbfcafa7c9161bf78, 0x3fc65ba8e7b88cc0, + 0xbfe0f4920666b5a4, 0x3fceb0451dd34270, 0xbfeab6aa9747cb24, + 0xbfe3d5d7f25a1d14); + asm volatile("vfmsac.vf v8, %[A], v16, v0.t" ::[A] "f"(dscalar_64)); + // -0.7603855538897846, 0.0533064903276678, + // -0.3714656077097227, 0.6116303877234964, + // -0.2818689596683441, -0.3597049427834890, 0.1176602936422064, + // 0.2065631231810536, -0.6926950556538125, -0.7234926612877562, + // -0.2107692441818434, -0.1810536730273635, + // -0.5298547863982788, -0.2358548434356951, + // -0.8347981409736787, 0.6145585759420944 + VCMP_U64(12, v8, 0xbfe8551415c9d6cc, 0x3fab4afd013e6639, 0xbfd7c617af2cedf8, + 0x3fe39279e4106415, 0xbfd20a241ae21e00, 0xbfd70567e15dbc9b, + 0x3fbe1efc293b2500, 0x3fca70a9114fa5b7, 0xbfe62a8ed24449ee, + 0xbfe726da14e40fb7, 0xbfcafa7c9161bf78, 0xbfc72cc44a3c91ef, + 0xbfe0f4920666b5a4, 0xbfce307dd3946ada, 0xbfeab6aa9747cb24, + 0x3fe3aa76bf24b95e); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmsub.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmsub.c new file mode 100644 index 000000000..d54334f0a --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmsub.c @@ -0,0 +1,453 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 0.6821, 0.7749, 0.0299, 0.0299, -0.5410, -0.1865, 0.1885, + // 0.0228, -0.5410, 0.1902, -0.9160, -0.3511, -0.9287, -0.9961, + // -0.6509, -0.3940 + VLOAD_16(v4, 0x3975, 0x3a33, 0x27a5, 0x27a5, 0xb854, 0xb1f8, 0x3208, 0x25d8, + 0xb854, 0x3216, 0xbb54, 0xb59e, 0xbb6e, 0xbbf8, 0xb935, 0xb64e); + // -0.9048, 0.1698, 0.2411, 0.2411, -0.6147, 0.9580, 0.5117, + // 0.8330, -0.6147, 0.5591, 0.7031, -0.1556, 0.2397, 0.8154, + // 0.1646, 0.2693 + VLOAD_16(v6, 0xbb3d, 0x316f, 0x33b7, 0x33b7, 0xb8eb, 0x3baa, 0x3818, 0x3aaa, + 0xb8eb, 0x3879, 0x39a0, 0xb0fb, 0x33ac, 0x3a86, 0x3144, 0x344f); + // -0.2292, 0.7578, -0.7427, -0.7427, 0.1119, 0.2939, -0.2983, + // -0.3608, 0.1119, -0.5562, -0.0629, -0.1968, -0.8638, -0.2700, + // -0.7690, 0.4216 + VLOAD_16(v2, 0xb356, 0x3a10, 0xb9f1, 0xb9f1, 0x2f29, 0x34b4, 0xb4c6, 0xb5c6, + 0x2f29, 0xb873, 0xac06, 0xb24c, 0xbae9, 0xb452, 0xba27, 0x36bf); + asm volatile("vfmsub.vv v2, v4, v6"); + // 0.7485, 0.4175, -0.2632, -0.2632, 0.5542, -1.0127, -0.5679, + // -0.8413, 0.5542, -0.6650, -0.6455, 0.2247, 0.5625, + // -0.5464, 0.3359, -0.4355 + VCMP_U16(1, v2, 0x39fd, 0x36ae, 0xb436, 0xb436, 0x386f, 0xbc0d, 0xb88b, + 0xbabb, 0x386f, 0xb952, 0xb92a, 0x3331, 0x3880, 0xb85f, 0x3560, + 0xb6f8); + + VSET(16, e32, m4); + // -0.74553698, -0.16736358, -0.11869104, -0.85860848, + // -0.66138542, -0.68386567, -0.45389724, -0.12761629, + // -0.95652348, 0.71083277, 0.24187960, 0.01609672, + // -0.58867335, -0.55222940, -0.67417240, -0.06725668 + VLOAD_32(v8, 0xbf3edb83, 0xbe2b615c, 0xbdf3144a, 0xbf5bcdc4, 0xbf29508e, + 0xbf2f11d2, 0xbee86538, 0xbe02add8, 0xbf74deb9, 0x3f35f923, + 0x3e77af49, 0x3c83dd45, 0xbf16b34c, 0xbf0d5ee8, 0xbf2c9690, + 0xbd89bddf); + // -0.17500710, -0.81537211, -0.31956050, 0.22762603, + // 0.49659184, -0.09389434, 0.05757815, -0.13087828, + // -0.73042232, -0.79662275, -0.96801740, 0.03017101, + // 0.70759267, -0.35606241, 0.18037270, -0.35372722 + VLOAD_32(v12, 0xbe333510, 0xbf50bc3a, 0xbea39d6f, 0x3e6916cc, 0x3efe4149, + 0xbdc04bad, 0x3d6bd711, 0xbe0604f5, 0xbf3afcf5, 0xbf4bef78, + 0xbf77cffd, 0x3cf72932, 0x3f3524cb, 0xbeb64dd0, 0x3e38b39f, + 0xbeb51bbc); + // 0.92876774, 0.18572871, -0.42147154, -0.79289448, + // 0.90907055, 0.07037155, 0.07339484, 0.17415307, + // -0.61978233, -0.04939311, 0.56138068, -0.51601994, + // -0.80625385, -0.31227911, 0.91474551, 0.78424871 + VLOAD_32(v4, 0x3f6dc3b9, 0x3e3e2fab, 0xbed7cb1e, 0xbf4afb22, 0x3f68b8d9, + 0x3d901ef6, 0x3d965009, 0x3e32552f, 0xbf1eaa0e, 0xbd4a506e, + 0x3f0fb6a5, 0xbf0419e2, 0xbf4e66a7, 0xbe9fe30c, 0x3f6a2cc3, + 0x3f48c486); + asm volatile("vfmsub.vv v4, v8, v12"); + // -0.51742357, 0.78428787, 0.36958539, 0.45315993, + // -1.09783781, 0.04576965, -0.09089187, + // 0.10865352, 1.32325864, 0.76151252, 1.10380387, + // -0.03847724, -0.23297250, 0.52851212, -0.79706889, 0.30098125 + VCMP_U32(2, v4, 0xbf0475df, 0x3f48c717, 0x3ebd3a4c, 0x3ee80493, 0xbf8c85f3, + 0x3d3b78f5, 0xbdba2584, 0x3dde85bc, 0x3fa9608a, 0x3f42f27c, + 0x3f8d4973, 0xbd1d9a4f, 0xbe6e9058, 0x3f074c92, 0xbf4c0cb5, + 0x3e9a1a37); + + VSET(16, e64, m8); + // 0.3304351537536074, -0.7528197595818080, -0.7530937950641439, + // -0.7994160811423281, 0.0797802827518117, + // -0.6361377214985149, 0.1748070414096887, 0.8251843575618585, + // 0.0699629848559165, -0.8195631240215655, -0.4843919596862658, + // -0.9206444585342115, 0.9791118581337512, 0.5143481050333210, + // 0.5856279779979670, -0.5536419150604011 + VLOAD_64(v16, 0x3fd525d97cb482ac, 0xbfe8171976e5f762, 0xbfe819582893df6e, + 0xbfe994d1088ce396, 0x3fb46c7b0948dfc0, 0xbfe45b3d7eb2d188, + 0x3fc66013befb8968, 0x3fea67e9069cc438, 0x3fb1e9181be2ff10, + 0xbfea39dc71d5c454, 0xbfdf00472253102c, 0xbfed75eb5e14bcf6, + 0x3fef54e26439ed98, 0x3fe0758a283c1602, 0x3fe2bd76e2a3f6ca, + 0xbfe1b76f3fdc22ac); + // 0.4156163852505284, -0.7806302214299039, -0.8826873649954201, + // 0.4810449553239884, 0.9337837820126544, -0.5377837408558668, + // -0.1434453653318362, 0.1199087999382409, + // -0.2601268153647489, -0.1813009025048657, + // -0.2492371358416354, 0.4131695659117063, + // -0.9085600854772706, -0.3952216110937696, 0.0817663443229741, + // 0.1439804529607418 + VLOAD_64(v24, 0x3fda997577954be0, 0xbfe8faec3ae9f10e, 0xbfec3ef992a7ed86, + 0x3fdec970c7c16d8c, 0x3fede18e86a8f206, 0xbfe135863f697cd2, + 0xbfc25c6af06e7710, 0x3fbeb257d63cc310, 0xbfd0a5eaf1337874, + 0xbfc734de337f3100, 0xbfcfe700a1b1bb78, 0x3fda715ec352c558, + 0xbfed12ec99b26d4c, 0xbfd94b4f95947db0, 0x3fb4eea39ec7d8a0, + 0x3fc26df3945d6540); + // -0.2185765241217579, -0.9587275435281344, 0.1216903502931035, + // 0.7653655177934149, -0.5928258331230032, 0.9123074434439491, + // 0.1569052366565831, 0.2566745252901157, -0.9113595614847214, + // 0.1628442001087833, 0.2337303194688813, 0.1926350000139823, + // -0.6653994610877216, -0.6745212179353777, 0.8748797125997727, + // -0.7324641634418565 + VLOAD_64(v8, 0xbfcbfa50c7635df8, 0xbfeeade562a749c2, 0x3fbf27194abf66e0, + 0x3fe87ddfd38d1514, 0xbfe2f86de1af9792, 0x3fed319f612fa6f6, + 0x3fc4157886016dd8, 0x3fd06d5afcf59780, 0xbfed29db86ef2934, + 0x3fc4d81428e7be98, 0x3fcdeae00719eac8, 0x3fc8a84380900070, + 0xbfe54af3cf84bab0, 0xbfe595ad856fb278, 0x3febff03bd3198ce, + 0xbfe77058af6f3156); + asm volatile("vfmsub.vv v8, v16, v24"); + // -0.4878417526056305, 1.5023792602532113, 0.7910431172705017, + // -1.0928904581998689, -0.9810795946017860, -0.0425694375227023, + // 0.1708735055334605, 0.0918950033157782, 0.1963653801662987, + // 0.0478398011349183, 0.1360200483560070, -0.5905179111943168, + // 0.2570595827304748, 0.0482829008439404, 0.4305876927582732, + // 0.2615424092003222 + VCMP_U64(3, v8, 0xbfdf38cc9d4420dd, 0x3ff809bed5cf9e94, 0x3fe95039a7cce2e7, + 0xbff17c7ab4814324, 0xbfef650108b2cdb1, 0xbfa5cba94bf030ac, + 0x3fc5df2edb027178, 0x3fb7866e51e83656, 0x3fc9228032f0c004, + 0x3fa87e755aa4ab1c, 0x3fc1691adda50ab1, 0xbfe2e585d18904b2, + 0x3fd073aa093cd9d6, 0x3fa8b88950295616, 0x3fdb8ebfae9d3d83, + 0x3fd0bd1c5f821364); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // 0.6821, 0.7749, 0.0299, 0.0299, -0.5410, -0.1865, 0.1885, + // 0.0228, 0.7217, 0.1902, -0.9160, -0.3511, -0.9287, -0.9961, + // -0.6509, -0.3940 + VLOAD_16(v4, 0x3975, 0x3a33, 0x27a5, 0x27a5, 0xb854, 0xb1f8, 0x3208, 0x25d8, + 0x39c6, 0x3216, 0xbb54, 0xb59e, 0xbb6e, 0xbbf8, 0xb935, 0xb64e); + // -0.9048, 0.1698, 0.2411, 0.2411, -0.6147, 0.9580, 0.5117, + // 0.8330, -0.8584, 0.5591, 0.7031, -0.1556, 0.2397, 0.8154, + // 0.1646, 0.2693 + VLOAD_16(v6, 0xbb3d, 0x316f, 0x33b7, 0x33b7, 0xb8eb, 0x3baa, 0x3818, 0x3aaa, + 0xbade, 0x3879, 0x39a0, 0xb0fb, 0x33ac, 0x3a86, 0x3144, 0x344f); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.2292, 0.7578, -0.7427, -0.7427, 0.1119, 0.2939, -0.2983, + // -0.3608, 0.3169, -0.5562, -0.0629, -0.1968, -0.8638, -0.2700, + // -0.7690, 0.4216 + VLOAD_16(v2, 0xb356, 0x3a10, 0xb9f1, 0xb9f1, 0x2f29, 0x34b4, 0xb4c6, 0xb5c6, + 0x3512, 0xb873, 0xac06, 0xb24c, 0xbae9, 0xb452, 0xba27, 0x36bf); + asm volatile("vfmsub.vv v2, v4, v6, v0.t"); + // -0.2292, 0.4175, -0.7427, -0.2632, 0.1119, -1.0127, -0.2983, + // -0.8413, 0.3169, -0.6650, -0.0629, 0.2247, -0.8638, -0.5464, + // -0.7690, -0.4355 + VCMP_U16(4, v2, 0xb356, 0x36ae, 0xb9f1, 0xb436, 0x2f29, 0xbc0d, 0xb4c6, + 0xbabb, 0x3512, 0xb952, 0xac06, 0x3331, 0xbae9, 0xb85f, 0xba27, + 0xb6f8); + + VSET(16, e32, m4); + // -0.74553698, -0.16736358, -0.11869104, -0.85860848, + // -0.66138542, -0.68386567, -0.45389724, -0.12761629, + // -0.95652348, 0.71083277, 0.24187960, 0.01609672, + // -0.58867335, -0.55222940, -0.67417240, -0.06725668 + VLOAD_32(v8, 0xbf3edb83, 0xbe2b615c, 0xbdf3144a, 0xbf5bcdc4, 0xbf29508e, + 0xbf2f11d2, 0xbee86538, 0xbe02add8, 0xbf74deb9, 0x3f35f923, + 0x3e77af49, 0x3c83dd45, 0xbf16b34c, 0xbf0d5ee8, 0xbf2c9690, + 0xbd89bddf); + // -0.17500710, -0.81537211, -0.31956050, 0.22762603, + // 0.49659184, -0.09389434, 0.05757815, -0.13087828, + // -0.73042232, -0.79662275, -0.96801740, 0.03017101, + // 0.70759267, -0.35606241, 0.18037270, -0.35372722 + VLOAD_32(v12, 0xbe333510, 0xbf50bc3a, 0xbea39d6f, 0x3e6916cc, 0x3efe4149, + 0xbdc04bad, 0x3d6bd711, 0xbe0604f5, 0xbf3afcf5, 0xbf4bef78, + 0xbf77cffd, 0x3cf72932, 0x3f3524cb, 0xbeb64dd0, 0x3e38b39f, + 0xbeb51bbc); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.92876774, 0.18572871, -0.42147154, -0.79289448, + // 0.90907055, 0.07037155, 0.07339484, 0.17415307, + // -0.61978233, -0.04939311, 0.56138068, -0.51601994, + // -0.80625385, -0.31227911, 0.91474551, 0.78424871 + VLOAD_32(v4, 0x3f6dc3b9, 0x3e3e2fab, 0xbed7cb1e, 0xbf4afb22, 0x3f68b8d9, + 0x3d901ef6, 0x3d965009, 0x3e32552f, 0xbf1eaa0e, 0xbd4a506e, + 0x3f0fb6a5, 0xbf0419e2, 0xbf4e66a7, 0xbe9fe30c, 0x3f6a2cc3, + 0x3f48c486); + asm volatile("vfmsub.vv v4, v8, v12, v0.t"); + // 0.92876774, 0.78428787, -0.42147154, 0.45315993, + // 0.90907055, 0.04576965, 0.07339484, 0.10865352, + // -0.61978233, 0.76151252, 0.56138068, -0.03847724, + // -0.80625385, 0.52851212, 0.91474551, 0.30098125 + VCMP_U32(5, v4, 0x3f6dc3b9, 0x3f48c717, 0xbed7cb1e, 0x3ee80493, 0x3f68b8d9, + 0x3d3b78f5, 0x3d965009, 0x3dde85bc, 0xbf1eaa0e, 0x3f42f27c, + 0x3f0fb6a5, 0xbd1d9a4f, 0xbf4e66a7, 0x3f074c92, 0x3f6a2cc3, + 0x3e9a1a37); + + VSET(16, e64, m8); + // 0.3304351537536074, -0.7528197595818080, -0.7530937950641439, + // -0.7994160811423281, 0.0797802827518117, + // -0.6361377214985149, 0.1748070414096887, 0.8251843575618585, + // 0.0699629848559165, -0.8195631240215655, -0.4843919596862658, + // -0.9206444585342115, 0.9791118581337512, 0.5143481050333210, + // 0.5856279779979670, -0.5536419150604011 + VLOAD_64(v16, 0x3fd525d97cb482ac, 0xbfe8171976e5f762, 0xbfe819582893df6e, + 0xbfe994d1088ce396, 0x3fb46c7b0948dfc0, 0xbfe45b3d7eb2d188, + 0x3fc66013befb8968, 0x3fea67e9069cc438, 0x3fb1e9181be2ff10, + 0xbfea39dc71d5c454, 0xbfdf00472253102c, 0xbfed75eb5e14bcf6, + 0x3fef54e26439ed98, 0x3fe0758a283c1602, 0x3fe2bd76e2a3f6ca, + 0xbfe1b76f3fdc22ac); + // 0.4156163852505284, -0.7806302214299039, -0.8826873649954201, + // 0.4810449553239884, 0.9337837820126544, -0.5377837408558668, + // -0.1434453653318362, 0.1199087999382409, + // -0.2601268153647489, -0.1813009025048657, + // -0.2492371358416354, 0.4131695659117063, + // -0.9085600854772706, -0.3952216110937696, 0.0817663443229741, + // 0.1439804529607418 + VLOAD_64(v24, 0x3fda997577954be0, 0xbfe8faec3ae9f10e, 0xbfec3ef992a7ed86, + 0x3fdec970c7c16d8c, 0x3fede18e86a8f206, 0xbfe135863f697cd2, + 0xbfc25c6af06e7710, 0x3fbeb257d63cc310, 0xbfd0a5eaf1337874, + 0xbfc734de337f3100, 0xbfcfe700a1b1bb78, 0x3fda715ec352c558, + 0xbfed12ec99b26d4c, 0xbfd94b4f95947db0, 0x3fb4eea39ec7d8a0, + 0x3fc26df3945d6540); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.2185765241217579, -0.9587275435281344, 0.1216903502931035, + // 0.7653655177934149, -0.5928258331230032, 0.9123074434439491, + // 0.1569052366565831, 0.2566745252901157, -0.9113595614847214, + // 0.1628442001087833, 0.2337303194688813, 0.1926350000139823, + // -0.6653994610877216, -0.6745212179353777, 0.8748797125997727, + // -0.7324641634418565 + VLOAD_64(v8, 0xbfcbfa50c7635df8, 0xbfeeade562a749c2, 0x3fbf27194abf66e0, + 0x3fe87ddfd38d1514, 0xbfe2f86de1af9792, 0x3fed319f612fa6f6, + 0x3fc4157886016dd8, 0x3fd06d5afcf59780, 0xbfed29db86ef2934, + 0x3fc4d81428e7be98, 0x3fcdeae00719eac8, 0x3fc8a84380900070, + 0xbfe54af3cf84bab0, 0xbfe595ad856fb278, 0x3febff03bd3198ce, + 0xbfe77058af6f3156); + asm volatile("vfmsub.vv v8, v16, v24, v0.t"); + // -0.2185765241217579, 1.5023792602532113, 0.1216903502931035, + // -1.0928904581998689, -0.5928258331230032, -0.0425694375227023, + // 0.1569052366565831, 0.0918950033157782, -0.9113595614847214, + // 0.0478398011349183, 0.2337303194688813, -0.5905179111943168, + // -0.6653994610877216, 0.0482829008439404, 0.8748797125997727, + // 0.2615424092003222 + VCMP_U64(6, v8, 0xbfcbfa50c7635df8, 0x3ff809bed5cf9e94, 0x3fbf27194abf66e0, + 0xbff17c7ab4814324, 0xbfe2f86de1af9792, 0xbfa5cba94bf030ac, + 0x3fc4157886016dd8, 0x3fb7866e51e83656, 0xbfed29db86ef2934, + 0x3fa87e755aa4ab1c, 0x3fcdeae00719eac8, 0xbfe2e585d18904b2, + 0xbfe54af3cf84bab0, 0x3fa8b88950295616, 0x3febff03bd3198ce, + 0x3fd0bd1c5f821364); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // 0.1489 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x30c4); + // 0.8530, -0.3298, -0.1814, -0.2385, 0.9946, 0.6553, 0.8711, + // -0.6377, 0.4224, -0.1814, 0.7026, 0.2852, 0.0553, 0.7349, + // -0.8105, 0.0033 + VLOAD_16(v4, 0x3ad3, 0xb547, 0xb1ce, 0xb3a2, 0x3bf5, 0x393e, 0x3af8, 0xb91a, + 0x36c2, 0xb1ce, 0x399f, 0x3490, 0x2b15, 0x39e1, 0xba7c, 0x1abd); + // -0.2338, -0.2512, 0.0069, 0.0613, -0.1733, 0.8560, -0.2766, + // -0.0028, -0.1803, 0.0069, 0.7856, -0.0243, -0.1974, 0.6416, + // 0.7109, 0.0817 + VLOAD_16(v2, 0xb37b, 0xb405, 0x1f06, 0x2bd8, 0xb18c, 0x3ad9, 0xb46d, 0x99d2, + 0xb1c5, 0x1f06, 0x3a49, 0xa639, 0xb251, 0x3922, 0x39b0, 0x2d3a); + asm volatile("vfmsub.vf v2, %[A], v4" ::[A] "f"(dscalar_16)); + // -0.8877, 0.2925, 0.1824, 0.2477, -1.0205, -0.5278, -0.9121, + // 0.6372, -0.4492, 0.1824, -0.5854, -0.2888, -0.0847, -0.6392, + // 0.9165, 0.0089 + VCMP_U16(7, v2, 0xbb1a, 0x34ae, 0x31d6, 0x33ed, 0xbc15, 0xb839, 0xbb4c, + 0x3919, 0xb730, 0x31d6, 0xb8af, 0xb49f, 0xad6c, 0xb91d, 0x3b55, + 0x208b); + + VSET(16, e32, m4); + double dscalar_32; + // -0.12857932 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbe03aa4c); + // 0.31050768, -0.13843875, 0.23405042, -0.30545133, + // -0.28880060, 0.46233574, -0.51105869, -0.11776974, + // -0.39969075, 0.51141965, 0.88750082, -0.22310242, + // 0.60111052, 0.58466393, -0.14306845, -0.01826003 + VLOAD_32(v8, 0x3e9efadd, 0xbe0dc2e3, 0x3e6faaea, 0xbe9c641e, 0xbe93ddac, + 0x3eecb745, 0xbf02d4be, 0xbdf13143, 0xbecca444, 0x3f02ec66, + 0x3f633341, 0xbe6474f6, 0x3f19e261, 0x3f15ac89, 0xbe128089, + 0xbc95960e); + // -0.51789892, 0.77328473, -0.88433731, 0.40865302, + // -0.50454420, 0.30827177, -0.25503114, 0.07736996, + // 0.20596179, -0.42633566, 0.89622146, 0.03779412, 0.50878429, + // 0.67896879, -0.17667305, 0.06984760 + VLOAD_32(v4, 0xbf049506, 0x3f45f5fd, 0xbf6263ee, 0x3ed13af8, 0xbf0129cf, + 0x3e9dd5cc, 0xbe829371, 0x3d9e7424, 0x3e52e7a6, 0xbeda48ab, + 0x3f656ec5, 0x3d1ace01, 0x3f023fb0, 0x3f2dd0e6, 0xbe34e9c8, + 0x3d8f0c42); + asm volatile("vfmsub.vf v4, %[A], v8" ::[A] "f"(dscalar_32)); + // -0.24391660, 0.03901032, -0.12034293, 0.25290701, + // 0.35367453, -0.50197309, 0.54385042, 0.10782156, 0.37320831, + // -0.45660171, -1.00273633, 0.21824288, -0.66652966, + // -0.67196524, 0.16578496, 0.00927907 + VCMP_U32(8, v4, 0xbe79c546, 0x3d1fc94a, 0xbdf6765b, 0x3e817d07, 0x3eb514d5, + 0xbf00814f, 0x3f0b39c8, 0x3ddcd18d, 0x3ebf1529, 0xbee9c7b3, + 0xbf8059aa, 0x3e5f7b10, 0xbf2aa1b0, 0xbf2c05eb, 0x3e29c388, + 0x3c18073f); + + VSET(16, e64, m8); + double dscalar_64; + // -0.6953502965951812 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfe6404f43e47c8c); + // -0.8873182146436771, 0.3913246153259273, 0.8941416868753180, + // 0.2630283463166789, -0.8096670634564123, 0.6449486037845993, + // 0.8384405697279889, -0.9956067461953679, -0.8936777193492917, + // -0.4464070291333477, 0.5599998966835931, + // -0.3406088963725078, 0.4908382567748615, + // -0.5194254665571632, 0.0909287222245825, 0.5893410930389467 + VLOAD_64(v16, 0xbfec64e92b21453c, 0x3fd90b76663c74f4, 0x3fec9ccf06e3d51a, + 0x3fd0d574d8567864, 0xbfe9e8cae6c6325c, 0x3fe4a36b411b6206, + 0x3fead4815153e1da, 0xbfefdc02add2c126, 0xbfec990204389c42, + 0xbfdc91eec9b5438c, 0x3fe1eb84e7409f04, 0xbfd5cc8941a96178, + 0x3fdf69e4dd1e50c8, 0xbfe09f2227f25264, 0x3fb7471ad038be10, + 0x3fe2dbe1da195142); + // -0.4387964890891065, -0.2425720412460179, + // -0.8909058709916624, -0.7961584351708695, 0.0353694444236163, + // 0.2992862865812480, -0.4186756300648600, -0.5421957392048740, + // 0.3780444269462682, -0.6731508364205383, 0.1263808806166760, + // 0.8571806635726140, 0.5149747658358419, 0.3530123248386567, + // -0.3756405874818076, -0.4529815298587780 + VLOAD_64(v8, 0xbfdc153dde8f3078, 0xbfcf0c99c409ad98, 0xbfec824d0777279c, + 0xbfe97a21412fca1a, 0x3fa21bf19e277c80, 0x3fd32781ab407ee0, + 0xbfdacb94deb0b06c, 0xbfe159aae0fd4b9a, 0x3fd831e1408ad588, + 0xbfe58a739f7670b4, 0x3fc02d3faa8b4d88, 0x3feb6e062499dac6, + 0x3fe07aac5c30f764, 0x3fd697c1019115dc, 0xbfd80a7ed19236bc, + 0xbfdcfda63e1bdf38); + asm volatile("vfmsub.vf v8, %[A], v16" ::[A] "f"(dscalar_64)); + // 1.1924354834767115, -0.2226520744998102, -0.2746500252428773, + // 0.2905806577161405, 0.7850729097860438, -0.8530574119257405, + // -0.5473143461852141, 1.3726227142641205, 0.6308044149460488, + // 0.9144826628916634, -0.6478788795043589, -0.2554319322783636, + // -0.8489261129378481, 0.2739582417788488, 0.1702730716940806, + // -0.2743602518995064 + VCMP_U64(9, v8, 0x3ff314373ac1f573, 0xbfcc7fdcf92e7eaa, 0xbfd193ddb310e0ff, + 0x3fd298df9d6f6c70, 0x3fe91f5139103634, 0xbfeb4c3f0eba9b49, + 0xbfe18399602fe862, 0x3ff5f6433c382dac, 0x3fe42f8cbd8bb3c3, + 0x3fed4371253c1e34, 0xbfe4bb6c7ce7b0ea, 0xbfd058ff2cdf5691, + 0xbfeb2a6718793b11, 0x3fd18888263a3a6e, 0x3fc5cb820d286398, + 0xbfd18f1e4d4ec3d2); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // 0.1489 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x30c4); + // 0.8530, -0.3298, -0.1814, -0.2385, 0.9946, 0.6553, 0.8711, + // -0.6377, 0.4224, -0.1814, 0.7026, 0.2852, 0.0553, + // 0.7349, -0.8105, 0.0033 + VLOAD_16(v4, 0x3ad3, 0xb547, 0xb1ce, 0xb3a2, 0x3bf5, 0x393e, 0x3af8, 0xb91a, + 0x36c2, 0xb1ce, 0x399f, 0x3490, 0x2b15, 0x39e1, 0xba7c, 0x1abd); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.2338, -0.2512, 0.0069, 0.0613, -0.1733, 0.8560, + // -0.2766, -0.0028, -0.1803, 0.0069, 0.7856, -0.0243, + // -0.1974, 0.6416, 0.7109, 0.0817 + VLOAD_16(v2, 0xb37b, 0xb405, 0x1f06, 0x2bd8, 0xb18c, 0x3ad9, 0xb46d, 0x99d2, + 0xb1c5, 0x1f06, 0x3a49, 0xa639, 0xb251, 0x3922, 0x39b0, 0x2d3a); + asm volatile("vfmsub.vf v2, %[A], v4, v0.t" ::[A] "f"(dscalar_16)); + // -0.2338, 0.2925, 0.0069, 0.2477, -0.1733, -0.5278, + // -0.2766, 0.6372, -0.1803, 0.1824, 0.7856, -0.2888, + // -0.1974, -0.6392, 0.7109, 0.0089 + VCMP_U16(10, v2, 0xb37b, 0x34ae, 0x1f06, 0x33ed, 0xb18c, 0xb839, 0xb46d, + 0x3919, 0xb1c5, 0x31d6, 0x3a49, 0xb49f, 0xb251, 0xb91d, 0x39b0, + 0x208b); + + VSET(16, e32, m4); + double dscalar_32; + // -0.12857932 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbe03aa4c); + // 0.31050768, -0.13843875, 0.23405042, -0.30545133, + // -0.28880060, 0.46233574, -0.51105869, -0.11776974, + // -0.39969075, 0.51141965, 0.88750082, -0.22310242, + // 0.60111052, 0.58466393, -0.14306845, -0.01826003 + VLOAD_32(v8, 0x3e9efadd, 0xbe0dc2e3, 0x3e6faaea, 0xbe9c641e, 0xbe93ddac, + 0x3eecb745, 0xbf02d4be, 0xbdf13143, 0xbecca444, 0x3f02ec66, + 0x3f633341, 0xbe6474f6, 0x3f19e261, 0x3f15ac89, 0xbe128089, + 0xbc95960e); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.51789892, 0.77328473, -0.88433731, 0.40865302, + // -0.50454420, 0.30827177, -0.25503114, 0.07736996, + // 0.20596179, -0.42633566, 0.89622146, 0.03779412, + // 0.50878429, 0.67896879, -0.17667305, 0.06984760 + VLOAD_32(v4, 0xbf049506, 0x3f45f5fd, 0xbf6263ee, 0x3ed13af8, 0xbf0129cf, + 0x3e9dd5cc, 0xbe829371, 0x3d9e7424, 0x3e52e7a6, 0xbeda48ab, + 0x3f656ec5, 0x3d1ace01, 0x3f023fb0, 0x3f2dd0e6, 0xbe34e9c8, + 0x3d8f0c42); + asm volatile("vfmsub.vf v4, %[A], v8, v0.t" ::[A] "f"(dscalar_32)); + // -0.51789892, 0.03901032, -0.88433731, 0.25290701, + // -0.50454420, -0.50197309, -0.25503114, 0.10782156, + // 0.20596179, -0.45660171, 0.89622146, 0.21824288, + // 0.50878429, -0.67196524, -0.17667305, 0.00927907 + VCMP_U32(11, v4, 0xbf049506, 0x3d1fc94a, 0xbf6263ee, 0x3e817d07, 0xbf0129cf, + 0xbf00814f, 0xbe829371, 0x3ddcd18d, 0x3e52e7a6, 0xbee9c7b3, + 0x3f656ec5, 0x3e5f7b10, 0x3f023fb0, 0xbf2c05eb, 0xbe34e9c8, + 0x3c18073f); + + VSET(16, e64, m8); + double dscalar_64; + // -0.6953502965951812 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfe6404f43e47c8c); + // -0.8873182146436771, 0.3913246153259273, + // 0.8941416868753180, 0.2630283463166789, + // -0.8096670634564123, 0.6449486037845993, + // 0.8384405697279889, -0.9956067461953679, + // -0.8936777193492917, -0.4464070291333477, + // 0.5599998966835931, -0.3406088963725078, 0.4908382567748615, + // -0.5194254665571632, 0.0909287222245825, 0.5893410930389467 + VLOAD_64(v16, 0xbfec64e92b21453c, 0x3fd90b76663c74f4, 0x3fec9ccf06e3d51a, + 0x3fd0d574d8567864, 0xbfe9e8cae6c6325c, 0x3fe4a36b411b6206, + 0x3fead4815153e1da, 0xbfefdc02add2c126, 0xbfec990204389c42, + 0xbfdc91eec9b5438c, 0x3fe1eb84e7409f04, 0xbfd5cc8941a96178, + 0x3fdf69e4dd1e50c8, 0xbfe09f2227f25264, 0x3fb7471ad038be10, + 0x3fe2dbe1da195142); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.4387964890891065, -0.2425720412460179, + // -0.8909058709916624, -0.7961584351708695, + // 0.0353694444236163, 0.2992862865812480, + // -0.4186756300648600, -0.5421957392048740, + // 0.3780444269462682, -0.6731508364205383, 0.1263808806166760, + // 0.8571806635726140, 0.5149747658358419, 0.3530123248386567, + // -0.3756405874818076, -0.4529815298587780 + VLOAD_64(v8, 0xbfdc153dde8f3078, 0xbfcf0c99c409ad98, 0xbfec824d0777279c, + 0xbfe97a21412fca1a, 0x3fa21bf19e277c80, 0x3fd32781ab407ee0, + 0xbfdacb94deb0b06c, 0xbfe159aae0fd4b9a, 0x3fd831e1408ad588, + 0xbfe58a739f7670b4, 0x3fc02d3faa8b4d88, 0x3feb6e062499dac6, + 0x3fe07aac5c30f764, 0x3fd697c1019115dc, 0xbfd80a7ed19236bc, + 0xbfdcfda63e1bdf38); + asm volatile("vfmsub.vf v8, %[A], v16, v0.t" ::[A] "f"(dscalar_64)); + // -0.4387964890891065, -0.2226520744998102, + // -0.8909058709916624, 0.2905806577161405, 0.0353694444236163, + // -0.8530574119257405, + // -0.4186756300648600, 1.3726227142641205, 0.3780444269462682, + // 0.9144826628916634, 0.1263808806166760, -0.2554319322783636, + // 0.5149747658358419, 0.2739582417788488, -0.3756405874818076, + // -0.2743602518995064 + VCMP_U64(12, v8, 0xbfdc153dde8f3078, 0xbfcc7fdcf92e7eaa, 0xbfec824d0777279c, + 0x3fd298df9d6f6c70, 0x3fa21bf19e277c80, 0xbfeb4c3f0eba9b49, + 0xbfdacb94deb0b06c, 0x3ff5f6433c382dac, 0x3fd831e1408ad588, + 0x3fed4371253c1e34, 0x3fc02d3faa8b4d88, 0xbfd058ff2cdf5691, + 0x3fe07aac5c30f764, 0x3fd18888263a3a6e, 0xbfd80a7ed19236bc, + 0xbfd18f1e4d4ec3d2); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmul.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmul.c new file mode 100644 index 000000000..21f397e90 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmul.c @@ -0,0 +1,350 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values + 1 subnormal +void TEST_CASE1(void) { + VSET(16, e16, m2); + // -0.5522, 0.0462, -0.4255, 0.4131, 0.4658, 0.3931, -0.4868, + // 0.5503, 0.3516, -0.3025, -0.2155, 0.9307, 0.9775, 0.8394, + // 0.7446, 0.3909 + VLOAD_16(v4, 0xb86b, 0x29e9, 0xb6cf, 0x369c, 0x3774, 0x364a, 0xb7ca, 0x3867, + 0x35a0, 0xb4d7, 0xb2e5, 0x3b72, 0x3bd2, 0x3ab7, 0x39f5, 0x3641); + // 0.8247, 0.4902, 0.5796, -0.9561, -0.7676, 0.1672, -0.1094, + // -0.9395, 0.4885, -0.2739, 0.8691, -0.3394, -0.8032, + // -0.4922, 0.4456, 0.2050 + VLOAD_16(v6, 0x3a99, 0x37d8, 0x38a3, 0xbba6, 0xba24, 0x315a, 0xaf01, 0xbb84, + 0x37d1, 0xb462, 0x3af4, 0xb56e, 0xba6d, 0xb7e0, 0x3721, 0x328f); + asm volatile("vfmul.vv v2, v4, v6"); + // -0.4553, 0.0226, -0.2466, -0.3950, -0.3577, 0.0657, 0.0533, + // -0.5171, 0.1718, 0.0829, -0.1873, -0.3159, -0.7852, -0.4131, + // 0.3318, 0.0801 + VCMP_U16(1, v2, 0xb749, 0x25cb, 0xb3e4, 0xb652, 0xb5b9, 0x2c35, 0x2ad2, + 0xb823, 0x317f, 0x2d4e, 0xb1fe, 0xb50e, 0xba48, 0xb69c, 0x354f, + 0x2d21); + + VSET(16, e32, m4); + // 0.48805356, 0.30350628, -0.10483003, 0.61108905, + // -0.09161828, 0.83353645, -0.55006021, -0.78635991, + // 0.49253011, -0.03583150, -0.77662903, 0.57397723, + // -0.54674339, 0.86299890, 0.65402901, -0.16832402 + VLOAD_32(v8, 0x3ef9e228, 0x3e9b652d, 0xbdd6b121, 0x3f1c7055, 0xbdbba25d, + 0x3f5562a5, 0xbf0cd0bf, 0xbf494ee2, 0x3efc2ce8, 0xbd12c40e, + 0xbf46d129, 0x3f12f02c, 0xbf0bf760, 0x3f5ced7f, 0x3f276e72, + 0xbe2c5d22); + // 0.87142652, -0.32756421, 0.76706660, -0.54420376, + // -0.99424285, 0.31885657, 0.18092929, -0.68290263, + // 0.45391774, -0.45151946, -0.08929581, 0.80524033, + // 0.81978256, -0.28325567, -0.53026456, -0.21847765 + VLOAD_32(v12, 0x3f5f15cf, 0xbea7b67f, 0x3f445e7a, 0xbf0b50f0, 0xbf7e86b3, + 0x3ea3412b, 0x3e394587, 0xbf2ed2b5, 0x3ee867e8, 0xbee72d8f, + 0xbdb6e0b9, 0x3f4e243b, 0x3f51dd45, 0xbe9106e3, 0xbf07bf6b, + 0xbe5fb89b); + asm volatile("vfmul.vv v4, v8, v12"); + // 0.42530280, -0.09941780, -0.08041162, -0.33255696, + // 0.09109081, 0.26577857, -0.09952200, 0.53700727, + // 0.22356816, 0.01617862, 0.06934972, 0.46218961, + // -0.44821069, -0.24444933, -0.34680840, 0.03677504 + VCMP_U32(2, v4, 0x3ed9c14a, 0xbdcb9b8f, 0xbda4aed9, 0xbeaa44e8, 0x3dba8dd2, + 0x3e881421, 0xbdcbd231, 0x3f09794f, 0x3e64ef0d, 0x3c848907, + 0x3d8e073a, 0x3eeca41e, 0xbee57bdf, 0xbe7a50ed, 0xbeb190df, + 0x3d16a16c); + + VSET(16, e64, m8); + // -0.7493892241714462, 0.7026559207451004, 0.6475697152132245, + // 0.0771197585157644, -0.2238692303359540, 0.8998213782649329, + // -0.9446193329247832, 0.8596730101791072, -0.0254417293392082, + // 0.1965035124326171, -0.4709662077579637, -0.2875069600640039, + // -0.4671574223295827, 0.3105385724706418, 0.1703390668980564, + // 0.1487690137320270 + VLOAD_64(v16, 0xbfe7faff1c39514c, 0x3fe67c2844fe1c76, 0x3fe4b8e41f971110, + 0x3fb3be1ed8b35c30, 0xbfcca7bf376fd290, 0x3feccb5633fc770c, + 0xbfee3a5252c299d8, 0x3feb8270f8ff23f8, 0xbf9a0d658ddcc1c0, + 0x3fc92706efb93e80, 0xbfde244f72f5dcb4, 0xbfd2668397b639c0, + 0xbfdde5e83ebf4f58, 0x3fd3dfdd2d3a1b90, 0x3fc5cdaba8c776a8, + 0x3fc30adcf05190c8); + // 0.6932733143704406, -0.2687556191190688, 0.2528829246597466, + // 0.7287253758892476, -0.5682564905667424, 0.0092122398882537, + // -0.5132517188156311, -0.0178020357545405, 0.0816988280997786, + // 0.6297663200296084, 0.3637508978200528, 0.6003193921430929, + // -0.9089688764960682, 0.1595578103621622, 0.2113473996516566, + // -0.4586515678904381 + VLOAD_64(v24, 0x3fe62f4b848d2362, 0xbfd1334ac4aee374, 0x3fd02f3bdcc85930, + 0x3fe751b7e126b540, 0xbfe22f283c572a1e, 0x3f82dddde857f980, + 0xbfe06c8ede5db9be, 0xbf923ab26578ce40, 0x3fb4ea36e2cf6110, + 0x3fe4270bb294c832, 0x3fd747b1d881c6e4, 0x3fe335d1038d1808, + 0xbfed1645e5b43d3e, 0x3fc46c63eca9d670, 0x3fcb0d6e7ccc9be0, + 0xbfdd5a8c1b164ebc); + asm volatile("vfmul.vv v8, v16, v24"); + // -0.5195315511948315, -0.1888427270075288, 0.1637593235041994, + // 0.0561991250128884, 0.1272151431765869, 0.0082893703931556, + // 0.4848274962501199, -0.0153039296644220, -0.0020785594718451, + // 0.1237512938975817, -0.1713143809148648, -0.1725960035025313, + // 0.4246315573217200, 0.0495488546564072, 0.0360007188479938, + // -0.0682331414017083 + VCMP_U64(3, v8, 0xbfe0a000a1b3e706, 0xbfc82bff9c4ada77, 0x3fc4f610c56ecca8, + 0x3facc621b7fd0401, 0x3fc04895f7bfec49, 0x3f80fa0475f1bbe1, + 0x3fdf0769e826220a, 0xbf8f57aaab459580, 0xbf61070e1e8a29ae, + 0x3fbfae2a3020b759, 0xbfc5eda12fae9203, 0xbfc617a0373b59a7, + 0x3fdb2d29d6e2f72e, 0x3fa95e77ac9b67ce, 0x3fa26eafac2b53dd, + 0xbfb177ba26d2dcbe); +}; + +// Simple random test with similar values + 1 subnormal (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // -0.5522, 0.0462, -0.4255, 0.4131, 0.4658, 0.3931, -0.4868, + // 0.5503, 0.3516, -0.3025, -0.2155, 0.9307, 0.9775, 0.8394, + // 0.7446, 0.3909 + VLOAD_16(v4, 0xb86b, 0x29e9, 0xb6cf, 0x369c, 0x3774, 0x364a, 0xb7ca, 0x3867, + 0x35a0, 0xb4d7, 0xb2e5, 0x3b72, 0x3bd2, 0x3ab7, 0x39f5, 0x3641); + // 0.8247, 0.4902, 0.5796, -0.9561, -0.7676, 0.1672, -0.1094, + // -0.9395, 0.4885, -0.2739, 0.8691, -0.3394, -0.8032, + // -0.4922, 0.4456, 0.2050 + VLOAD_16(v6, 0x3a99, 0x37d8, 0x38a3, 0xbba6, 0xba24, 0x315a, 0xaf01, 0xbb84, + 0x37d1, 0xb462, 0x3af4, 0xb56e, 0xba6d, 0xb7e0, 0x3721, 0x328f); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfmul.vv v2, v4, v6, v0.t"); + // 0.0000, 0.0226, 0.0000, -0.3950, 0.0000, 0.0657, 0.0000, + // -0.5171, 0.0000, 0.0829, 0.0000, -0.3159, 0.0000, + // -0.4131, 0.0000, 0.0801 + VCMP_U16(4, v2, 0x0, 0x25cb, 0x0, 0xb652, 0x0, 0x2c35, 0x0, 0xb823, 0x0, + 0x2d4e, 0x0, 0xb50e, 0x0, 0xb69c, 0x0, 0x2d21); + + VSET(16, e32, m4); + // 0.48805356, 0.30350628, -0.10483003, 0.61108905, + // -0.09161828, 0.83353645, -0.55006021, -0.78635991, + // 0.49253011, -0.03583150, -0.77662903, 0.57397723, + // -0.54674339, 0.86299890, 0.65402901, -0.16832402 + VLOAD_32(v8, 0x3ef9e228, 0x3e9b652d, 0xbdd6b121, 0x3f1c7055, 0xbdbba25d, + 0x3f5562a5, 0xbf0cd0bf, 0xbf494ee2, 0x3efc2ce8, 0xbd12c40e, + 0xbf46d129, 0x3f12f02c, 0xbf0bf760, 0x3f5ced7f, 0x3f276e72, + 0xbe2c5d22); + // 0.87142652, -0.32756421, 0.76706660, -0.54420376, + // -0.99424285, 0.31885657, 0.18092929, -0.68290263, + // 0.45391774, -0.45151946, -0.08929581, 0.80524033, + // 0.81978256, -0.28325567, -0.53026456, -0.21847765 + VLOAD_32(v12, 0x3f5f15cf, 0xbea7b67f, 0x3f445e7a, 0xbf0b50f0, 0xbf7e86b3, + 0x3ea3412b, 0x3e394587, 0xbf2ed2b5, 0x3ee867e8, 0xbee72d8f, + 0xbdb6e0b9, 0x3f4e243b, 0x3f51dd45, 0xbe9106e3, 0xbf07bf6b, + 0xbe5fb89b); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfmul.vv v4, v8, v12, v0.t"); + // 0.00000000, -0.09941780, 0.00000000, -0.33255696, + // 0.00000000, 0.26577857, 0.00000000, 0.53700727, + // 0.00000000, 0.01617862, 0.00000000, 0.46218961, + // 0.00000000, -0.24444933, 0.00000000, 0.03677504 + VCMP_U32(5, v4, 0x0, 0xbdcb9b8f, 0x0, 0xbeaa44e8, 0x0, 0x3e881421, 0x0, + 0x3f09794f, 0x0, 0x3c848907, 0x0, 0x3eeca41e, 0x0, 0xbe7a50ed, 0x0, + 0x3d16a16c); + + VSET(16, e64, m8); + // -0.7493892241714462, 0.7026559207451004, 0.6475697152132245, + // 0.0771197585157644, -0.2238692303359540, 0.8998213782649329, + // -0.9446193329247832, 0.8596730101791072, -0.0254417293392082, + // 0.1965035124326171, -0.4709662077579637, -0.2875069600640039, + // -0.4671574223295827, 0.3105385724706418, 0.1703390668980564, + // 0.1487690137320270 + VLOAD_64(v16, 0xbfe7faff1c39514c, 0x3fe67c2844fe1c76, 0x3fe4b8e41f971110, + 0x3fb3be1ed8b35c30, 0xbfcca7bf376fd290, 0x3feccb5633fc770c, + 0xbfee3a5252c299d8, 0x3feb8270f8ff23f8, 0xbf9a0d658ddcc1c0, + 0x3fc92706efb93e80, 0xbfde244f72f5dcb4, 0xbfd2668397b639c0, + 0xbfdde5e83ebf4f58, 0x3fd3dfdd2d3a1b90, 0x3fc5cdaba8c776a8, + 0x3fc30adcf05190c8); + // 0.6932733143704406, -0.2687556191190688, 0.2528829246597466, + // 0.7287253758892476, -0.5682564905667424, 0.0092122398882537, + // -0.5132517188156311, -0.0178020357545405, 0.0816988280997786, + // 0.6297663200296084, 0.3637508978200528, 0.6003193921430929, + // -0.9089688764960682, 0.1595578103621622, 0.2113473996516566, + // -0.4586515678904381 + VLOAD_64(v24, 0x3fe62f4b848d2362, 0xbfd1334ac4aee374, 0x3fd02f3bdcc85930, + 0x3fe751b7e126b540, 0xbfe22f283c572a1e, 0x3f82dddde857f980, + 0xbfe06c8ede5db9be, 0xbf923ab26578ce40, 0x3fb4ea36e2cf6110, + 0x3fe4270bb294c832, 0x3fd747b1d881c6e4, 0x3fe335d1038d1808, + 0xbfed1645e5b43d3e, 0x3fc46c63eca9d670, 0x3fcb0d6e7ccc9be0, + 0xbfdd5a8c1b164ebc); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfmul.vv v8, v16, v24, v0.t"); + // 0.0000000000000000, -0.1888427270075288, 0.0000000000000000, + // 0.0561991250128884, 0.0000000000000000, 0.0082893703931556, + // 0.0000000000000000, -0.0153039296644220, 0.0000000000000000, + // 0.1237512938975817, 0.0000000000000000, -0.1725960035025313, + // 0.0000000000000000, 0.0495488546564072, 0.0000000000000000, + // -0.0682331414017083 + VCMP_U64(6, v8, 0x0, 0xbfc82bff9c4ada77, 0x0, 0x3facc621b7fd0401, 0x0, + 0x3f80fa0475f1bbe1, 0x0, 0xbf8f57aaab459580, 0x0, 0x3fbfae2a3020b759, + 0x0, 0xbfc617a0373b59a7, 0x0, 0x3fa95e77ac9b67ce, 0x0, + 0xbfb177ba26d2dcbe); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + // -0.8423, 0.9531, 0.3889, -0.3704, -0.9731, -0.4636, -0.4797, + // -0.5903, 0.2959, 0.4685, -0.3660, 0.3167, -0.9766, 0.0052, + // -0.6489, -0.0474 + VLOAD_16(v4, 0xbabd, 0x3ba0, 0x3639, 0xb5ed, 0xbbc9, 0xb76b, 0xb7ad, 0xb8b9, + 0x34bc, 0x377f, 0xb5db, 0x3511, 0xbbd0, 0x1d48, 0xb931, 0xaa11); + double dscalar_16; + // 0.2971 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x34c1); + asm volatile("vfmul.vf v2, v4, %[A]" ::[A] "f"(dscalar_16)); + // -0.2502, 0.2832, 0.1155, -0.1100, -0.2891, -0.1377, -0.1426, + // -0.1754, 0.0879, 0.1392, -0.1088, 0.0941, -0.2900, 0.0015, + // -0.1927, -0.0141 + VCMP_U16(7, v2, 0xb401, 0x3488, 0x2f65, 0xaf0b, 0xb4a0, 0xb068, 0xb090, + 0xb19d, 0x2da0, 0x3074, 0xaef6, 0x2e05, 0xb4a4, 0x1647, 0xb22b, + 0xa336); + + VSET(16, e32, m4); + // -0.11454447, -0.46133029, 0.06972761, 0.20429718, + // -0.97134608, -0.95719630, -0.11250938, 0.48455358, + // 0.59656250, 0.46462929, 0.13447689, -0.32035729, 0.75118428, + // 0.90634471, 0.73552424, -0.53555632 + VLOAD_32(v8, 0xbdea964b, 0xbeec337c, 0x3d8ecd5a, 0x3e513348, 0xbf78aa23, + 0xbf750ad1, 0xbde66b52, 0x3ef81768, 0x3f18b852, 0x3eede3e4, + 0x3e09b44f, 0xbea405df, 0x3f404d9d, 0x3f680635, 0x3f3c4b51, + 0xbf091a38); + double dscalar_32; + // 0.94017404 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f70af3f); + asm volatile("vfmul.vf v4, v8, %[A]" ::[A] "f"(dscalar_32)); + // -0.10769174, -0.43373078, 0.06555609, 0.19207491, + // -0.91323435, -0.89993113, -0.10577840, 0.45556471, + // 0.56087255, 0.43683240, 0.12643167, -0.30119160, 0.70624399, + // 0.85212177, 0.69152081, -0.50351614 + VCMP_U32(8, v4, 0xbddc8d7d, 0xbede11f6, 0x3d864246, 0x3e44af49, 0xbf69c9ba, + 0xbf6661e3, 0xbdd8a259, 0x3ee93fc7, 0x3f0f9558, 0x3edfa87f, + 0x3e01774e, 0xbe9a35c9, 0x3f34cc68, 0x3f5a24a7, 0x3f310782, + 0xbf00e66f); + + VSET(16, e64, m8); + // -0.3344965024132001, -0.2497404698970234, 0.3402338726452623, + // -0.5885400342262450, -0.7135559920290824, 0.1114442794173345, + // -0.9541638058007114, 0.1021679621951177, + // -0.1364702451627324, -0.9351295729000717, + // -0.2701320849999789, 0.3582375365191053, + // -0.6137661452178358, 0.6195430637830983, 0.2731869234335833, + // -0.4075196944877124 + VLOAD_64(v16, 0xbfd56864049f6dd8, 0xbfcff77ee7590278, 0x3fd5c6644b002e60, + 0xbfe2d551e8ec6e20, 0xbfe6d573603426e0, 0x3fbc879cbf6c7a10, + 0xbfee8882889e1c44, 0x3fba27adf853b5f0, 0xbfc177db63eceed0, + 0xbfedec94daa41aac, 0xbfd149d815ab3680, 0x3fd6ed5d21e3257c, + 0xbfe3a3f8e623486e, 0x3fe3d34bf9ad2f82, 0x3fd17be50175e4e8, + 0xbfda14cd7c133da0); + double dscalar_64; + // -0.7970907277742201 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfe981c469f7860e); + asm volatile("vfmul.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); + // 0.2666240605464688, 0.1990658129048941, -0.2711972651602534, + // 0.4691198042056620, 0.5687688649941168, -0.0888312017870367, + // 0.7605551223815086, -0.0814371353413154, 0.1087791670362886, + // 0.7453831118261137, 0.2153197802278006, -0.2855478187000574, + // 0.4892273033748624, -0.4938320315983399, -0.2177547636180751, + // 0.3248301698615385 + VCMP_U64(9, v8, 0x3fd1105e5d17ec76, 0x3fc97afd1216ce6e, 0xbfd15b4bc6282ffc, + 0x3fde060f123e080e, 0x3fe2335ac3443fa9, 0xbfb6bda4428a29bb, + 0x3fe85677b22de228, 0xbfb4d91068f88b49, 0x3fbbd8f394e82fe7, + 0x3fe7da2daf091575, 0x3fcb8f993b2151e0, 0xbfd2466a5bb0b251, + 0x3fdf4f8009138a1b, 0xbfdf9af1aa5ba7aa, 0xbfcbdf635a24d80a, + 0x3fd4ca047b13cdbf); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + // -0.8423, 0.9531, 0.3889, -0.3704, -0.9731, -0.4636, + // -0.4797, -0.5903, 0.2959, 0.4685, -0.3660, 0.3167, + // -0.9766, 0.0052, -0.6489, -0.0474 + VLOAD_16(v4, 0xbabd, 0x3ba0, 0x3639, 0xb5ed, 0xbbc9, 0xb76b, 0xb7ad, 0xb8b9, + 0x34bc, 0x377f, 0xb5db, 0x3511, 0xbbd0, 0x1d48, 0xb931, 0xaa11); + double dscalar_16; + // 0.2971 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x34c1); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfmul.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + // 0.0000, 0.2832, 0.0000, -0.1100, 0.0000, -0.1377, 0.0000, + // -0.1754, 0.0000, 0.1392, 0.0000, 0.0941, 0.0000, + // 0.0015, 0.0000, -0.0141 + VCMP_U16(10, v2, 0x0, 0x3488, 0x0, 0xaf0b, 0x0, 0xb068, 0x0, 0xb19d, 0x0, + 0x3074, 0x0, 0x2e05, 0x0, 0x1647, 0x0, 0xa336); + + VSET(16, e32, m4); + // -0.11454447, -0.46133029, 0.06972761, 0.20429718, + // -0.97134608, -0.95719630, -0.11250938, 0.48455358, + // 0.59656250, 0.46462929, 0.13447689, -0.32035729, + // 0.75118428, 0.90634471, 0.73552424, -0.53555632 + VLOAD_32(v8, 0xbdea964b, 0xbeec337c, 0x3d8ecd5a, 0x3e513348, 0xbf78aa23, + 0xbf750ad1, 0xbde66b52, 0x3ef81768, 0x3f18b852, 0x3eede3e4, + 0x3e09b44f, 0xbea405df, 0x3f404d9d, 0x3f680635, 0x3f3c4b51, + 0xbf091a38); + double dscalar_32; + // 0.94017404 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f70af3f); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfmul.vf v4, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + // 0.00000000, -0.43373078, 0.00000000, 0.19207491, + // 0.00000000, -0.89993113, 0.00000000, 0.45556471, + // 0.00000000, 0.43683240, 0.00000000, -0.30119160, + // 0.00000000, 0.85212177, 0.00000000, -0.50351614 + VCMP_U32(11, v4, 0x0, 0xbede11f6, 0x0, 0x3e44af49, 0x0, 0xbf6661e3, 0x0, + 0x3ee93fc7, 0x0, 0x3edfa87f, 0x0, 0xbe9a35c9, 0x0, 0x3f5a24a7, 0x0, + 0xbf00e66f); + + VSET(16, e64, m8); + // -0.3344965024132001, -0.2497404698970234, + // 0.3402338726452623, -0.5885400342262450, + // -0.7135559920290824, 0.1114442794173345, + // -0.9541638058007114, 0.1021679621951177, + // -0.1364702451627324, -0.9351295729000717, + // -0.2701320849999789, 0.3582375365191053, + // -0.6137661452178358, 0.6195430637830983, + // 0.2731869234335833, -0.4075196944877124 + VLOAD_64(v16, 0xbfd56864049f6dd8, 0xbfcff77ee7590278, 0x3fd5c6644b002e60, + 0xbfe2d551e8ec6e20, 0xbfe6d573603426e0, 0x3fbc879cbf6c7a10, + 0xbfee8882889e1c44, 0x3fba27adf853b5f0, 0xbfc177db63eceed0, + 0xbfedec94daa41aac, 0xbfd149d815ab3680, 0x3fd6ed5d21e3257c, + 0xbfe3a3f8e623486e, 0x3fe3d34bf9ad2f82, 0x3fd17be50175e4e8, + 0xbfda14cd7c133da0); + double dscalar_64; + // -0.7970907277742201 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfe981c469f7860e); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfmul.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); + // 0.0000000000000000, 0.1990658129048941, 0.0000000000000000, + // 0.4691198042056620, 0.0000000000000000, + // -0.0888312017870367, 0.0000000000000000, + // -0.0814371353413154, 0.0000000000000000, + // 0.7453831118261137, 0.0000000000000000, + // -0.2855478187000574, 0.0000000000000000, + // -0.4938320315983399, 0.0000000000000000, 0.3248301698615385 + VCMP_U64(12, v8, 0x0, 0x3fc97afd1216ce6e, 0x0, 0x3fde060f123e080e, 0x0, + 0xbfb6bda4428a29bb, 0x0, 0xbfb4d91068f88b49, 0x0, 0x3fe7da2daf091575, + 0x0, 0xbfd2466a5bb0b251, 0x0, 0xbfdf9af1aa5ba7aa, 0x0, + 0x3fd4ca047b13cdbf); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmv.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmv.c new file mode 100644 index 000000000..942be7ad3 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmv.c @@ -0,0 +1,68 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot +// Matteo Perotti + +#include "float_macros.h" +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.9380 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xbb81); + VCLEAR(v2); + asm volatile("vfmv.v.f v2, %[A]" ::[A] "f"(dscalar_16)); + // -0.9380, -0.9380, -0.9380, -0.9380, -0.9380, -0.9380, -0.9380, + // -0.9380, -0.9380, -0.9380, -0.9380, -0.9380, -0.9380, -0.9380, + // -0.9380, -0.9380 + VCMP_U16(1, v2, 0xbb81, 0xbb81, 0xbb81, 0xbb81, 0xbb81, 0xbb81, 0xbb81, + 0xbb81, 0xbb81, 0xbb81, 0xbb81, 0xbb81, 0xbb81, 0xbb81, 0xbb81, + 0xbb81); + + VSET(16, e32, m4); + double dscalar_32; + // -0.96056187 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbf75e762); + VCLEAR(v4); + asm volatile("vfmv.v.f v4, %[A]" ::[A] "f"(dscalar_32)); + // -0.96056187, -0.96056187, -0.96056187, -0.96056187, + // -0.96056187, -0.96056187, -0.96056187, -0.96056187, + // -0.96056187, -0.96056187, -0.96056187, -0.96056187, + // -0.96056187, -0.96056187, -0.96056187, -0.96056187 + VCMP_U32(2, v4, 0xbf75e762, 0xbf75e762, 0xbf75e762, 0xbf75e762, 0xbf75e762, + 0xbf75e762, 0xbf75e762, 0xbf75e762, 0xbf75e762, 0xbf75e762, + 0xbf75e762, 0xbf75e762, 0xbf75e762, 0xbf75e762, 0xbf75e762, + 0xbf75e762); + + VSET(16, e64, m8); + double dscalar_64; + // 0.9108707261227378 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fed25da5d7296fe); + VCLEAR(v8); + asm volatile("vfmv.v.f v8, %[A]" ::[A] "f"(dscalar_64)); + // 0.9108707261227378, 0.9108707261227378, 0.9108707261227378, + // 0.9108707261227378, 0.9108707261227378, 0.9108707261227378, + // 0.9108707261227378, 0.9108707261227378, 0.9108707261227378, + // 0.9108707261227378, 0.9108707261227378, 0.9108707261227378, + // 0.9108707261227378, 0.9108707261227378, 0.9108707261227378, + // 0.9108707261227378 + VCMP_U64(3, v8, 0x3fed25da5d7296fe, 0x3fed25da5d7296fe, 0x3fed25da5d7296fe, + 0x3fed25da5d7296fe, 0x3fed25da5d7296fe, 0x3fed25da5d7296fe, + 0x3fed25da5d7296fe, 0x3fed25da5d7296fe, 0x3fed25da5d7296fe, + 0x3fed25da5d7296fe, 0x3fed25da5d7296fe, 0x3fed25da5d7296fe, + 0x3fed25da5d7296fe, 0x3fed25da5d7296fe, 0x3fed25da5d7296fe, + 0x3fed25da5d7296fe); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmvfs.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmvfs.c new file mode 100644 index 000000000..e48726a90 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmvfs.c @@ -0,0 +1,90 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "float_macros.h" +#include "vector_macros.h" + +double scalar_16b; +float scalar_32b; +double scalar_64b; + +void TEST_CASE1() { + BOX_HALF_IN_DOUBLE(scalar_16b, 0); + VSET(16, e16, m2); + VLOAD_16(v2, 0xbb1e, 0xb573, 0x39dc, 0xb97a, 0xb4c0, 0xba31, 0x3897, 0x36ee, + 0x3b27, 0xb7d7, 0x36c0, 0x376c, 0x395b, 0x3703, 0x3057, 0x0001); + asm volatile("vfmv.f.s %0, v2" : "=f"(scalar_16b)); + XCMP(1, *((uint16_t *)&scalar_16b), 0xbb1e); + + scalar_32b = 0; + VSET(16, e32, m4); + VLOAD_32(v4, 0xbe9451b0, 0x3ece4bf7, 0x3eadc098, 0x3f09f4f0, 0x3ecc80cc, + 0xbe8a42c5, 0x3f47fd31, 0xbe201365, 0xbeffeb17, 0xbf314e2e, + 0xbd0a9c78, 0xbf1fb51f, 0x3b5e1209, 0x3eac9a73, 0xbeb187b6, + 0x3dea828d); + asm volatile("vfmv.f.s %0, v4" : "=f"(scalar_32b)); + XCMP(2, *((uint32_t *)&scalar_32b), 0xbe9451b0); + + scalar_64b = 0; + VSET(16, e64, m8); + VLOAD_64(v8, 0xbfe8d9d3f67536d2, 0x3fdad9e3e9cdd5bc, 0xbfd90875fda29450, + 0x3fe62686e0339faa, 0x3fe2208e74273f2c, 0xbfc21587add90b50, + 0xbfc7a755744afe30, 0xbfdf67da0cc99808, 0xbfed4488f52c57bc, + 0xbfe6d19a966debbe, 0xbfe1a7778d7c344c, 0xbfdae653f20dd9d4, + 0x3fe4c26b0962c342, 0xbfe2053afd5a822c, 0xbfb9851b4a2e8ff0, + 0xbfdc0cda147fbe5c); + asm volatile("vfmv.f.s %0, v8" : "=f"(scalar_64b)); + XCMP(3, *((uint64_t *)&scalar_64b), 0xbfe8d9d3f67536d2); +} + +// Check special cases +void TEST_CASE2() { + scalar_64b = 0; + VSET(16, e64, m1); + VLOAD_64(v1, 0xbfe8d9d3f67536d2, 0x3fdad9e3e9cdd5bc, 0xbfd90875fda29450, + 0x3fe62686e0339faa, 0x3fe2208e74273f2c, 0xbfc21587add90b50, + 0xbfc7a755744afe30, 0xbfdf67da0cc99808, 0xbfed4488f52c57bc, + 0xbfe6d19a966debbe, 0xbfe1a7778d7c344c, 0xbfdae653f20dd9d4, + 0x3fe4c26b0962c342, 0xbfe2053afd5a822c, 0xbfb9851b4a2e8ff0, + 0xbfdc0cda147fbe5c); + VSET(16, e64, m8); + asm volatile("vfmv.f.s %0, v1" : "=f"(scalar_64b)); + XCMP(4, *((uint64_t *)&scalar_64b), 0xbfe8d9d3f67536d2); + + scalar_64b = 0; + VSET(16, e64, m1); + VLOAD_64(v1, 0xbfe8d9d3f67536d2, 0x3fdad9e3e9cdd5bc, 0xbfd90875fda29450, + 0x3fe62686e0339faa, 0x3fe2208e74273f2c, 0xbfc21587add90b50, + 0xbfc7a755744afe30, 0xbfdf67da0cc99808, 0xbfed4488f52c57bc, + 0xbfe6d19a966debbe, 0xbfe1a7778d7c344c, 0xbfdae653f20dd9d4, + 0x3fe4c26b0962c342, 0xbfe2053afd5a822c, 0xbfb9851b4a2e8ff0, + 0xbfdc0cda147fbe5c); + VSET_ZERO(e64, m1); + asm volatile("vfmv.f.s %0, v1" : "=f"(scalar_64b)); + XCMP(5, *((uint64_t *)&scalar_64b), 0xbfe8d9d3f67536d2); + + scalar_64b = 0; + VSET(16, e64, m1); + VLOAD_64(v1, 0xbfe8d9d3f67536d2, 0x3fdad9e3e9cdd5bc, 0xbfd90875fda29450, + 0x3fe62686e0339faa, 0x3fe2208e74273f2c, 0xbfc21587add90b50, + 0xbfc7a755744afe30, 0xbfdf67da0cc99808, 0xbfed4488f52c57bc, + 0xbfe6d19a966debbe, 0xbfe1a7778d7c344c, 0xbfdae653f20dd9d4, + 0x3fe4c26b0962c342, 0xbfe2053afd5a822c, 0xbfb9851b4a2e8ff0, + 0xbfdc0cda147fbe5c); + VSET_ZERO(e64, m8); + asm volatile("vfmv.f.s %0, v1" : "=f"(scalar_64b)); + XCMP(6, *((uint64_t *)&scalar_64b), 0xbfe8d9d3f67536d2); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmvsf.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmvsf.c new file mode 100644 index 000000000..5a751ede6 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfmvsf.c @@ -0,0 +1,69 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "float_macros.h" +#include "vector_macros.h" + +double scalar_16b; +float scalar_32b; +double scalar_64b; + +void TEST_CASE1() { + BOX_HALF_IN_DOUBLE(scalar_16b, 0xbb1e); + VSET(16, e16, m1); + VLOAD_16(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vfmv.s.f v1, %0" ::"f"(scalar_16b)); + VCMP_U16(1, v1, *((uint16_t *)&scalar_16b)); + + scalar_32b = 0xbe9451b0; + VSET(16, e32, m1); + VLOAD_32(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vfmv.s.f v1, %0" ::"f"(scalar_32b)); + VCMP_U32(2, v1, *((uint32_t *)&scalar_32b)); + + scalar_64b = 0xbfe8d9d3f67536d2; + VSET(16, e64, m1); + VLOAD_64(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vfmv.s.f v1, %0" ::"f"(scalar_64b)); + VCMP_U64(3, v1, *((uint64_t *)&scalar_64b)); +} + +// Check special cases +void TEST_CASE2() { + scalar_64b = 0xbfe8d9d3f67536d2; + VSET(16, e64, m1); + VLOAD_64(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET(16, e64, m8); + asm volatile("vfmv.s.f v1, %0" ::"f"(scalar_64b)); + VSET(1, e64, m1); + VCMP_U64(4, v1, *((uint64_t *)&scalar_64b)); + + scalar_64b = 0xbfe8d9d3f67536d2; + VSET(16, e64, m1); + VLOAD_64(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET_ZERO(e64, m1); + asm volatile("vfmv.s.f v1, %0" ::"f"(scalar_64b)); + VSET(1, e64, m1); + VCMP_U64(5, v1, 1); + + scalar_64b = 0xbfe8d9d3f67536d2; + VSET(16, e64, m1); + VLOAD_64(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET_ZERO(e64, m8); + asm volatile("vfmv.s.f v1, %0" ::"f"(scalar_64b)); + VSET(1, e64, m1); + VCMP_U64(6, v1, 1); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfncvt.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfncvt.c new file mode 100644 index 000000000..df1148f2f --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfncvt.c @@ -0,0 +1,793 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +///////////////// +// vfncvt.xu.f // +///////////////// + +// Simple random test with similar values +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 9165.669, 5488.131, -1648.302, 80154.047, 7163.093, + // -6826.076, -6976.746, 2675.899, 9587.624, -3671.810, + // 3611.960, -9086.531, -5333.617, -3284.205, 5676.141, + // -8293.472 + VLOAD_32(v4, 0x460f36ad, 0x45ab810c, 0x479c8d06, 0xc59cf316, 0x45dfd8be, + 0xc5d5509c, 0xc5da05f8, 0x45273e62, 0x4615ce7f, 0xc5657cf5, + 0x4561bf5b, 0xc60dfa20, 0xc5a6acf0, 0xc54d4347, 0x45b16120, + 0xc60195e3); + asm volatile("vfncvt.xu.f.w v8, v4"); + // 9166, 5488, 65535, 0, 7163, 0, + // 0, 2676, 9588, 0, 3612, 0, 0, + // 0, 5676, 0 + VCMP_U16(1, v8, 0x23ce, 0x1570, 0xffff, 0x0000, 0x1bfb, 0x0000, 0x0000, + 0x0a74, 0x2574, 0x0000, 0x0e1c, 0x0000, 0x0000, 0x0000, 0x162c, + 0x0000); + + VSET(16, e32, m4); + // -3508862.563, 1678202.418, -799491.756, 1707676.429, + // -5056868.769, 4282070.604, 458667.918, 8393053.957, + // -4485003.775, -5016427.098, -9086965.507, -6796529.257, + // -7756776.890, -1173384.460, 4850684.145, 8658279.578 + VLOAD_64(v8, 0xc14ac53f4813ac38, 0x41399b7a6ae9e42f, 0xc128660783332e44, + 0x413a0e9c6ddfa609, 0xc1534a59313a407b, 0x415055b5a6a655de, + 0x411bfeafabb28b3f, 0x4160022bbe9fc5e9, 0xc1511be2f1a1ac8b, + 0xc15322dac64b7c31, 0xc16154feb0372db3, 0xc159ed3c506ab6eb, + 0xc15d96fa38fb0400, 0xc131e78875bc4ace, 0x415280ff09493a97, + 0x416083acf280b61e); + asm volatile("vfncvt.xu.f.w v16, v8"); + // 0, 1678202, 0, 1707676, 0, + // 4282071, 458668, 8393054, 0, 0, 0, + // 0, 0, 0, 4850684, + // 8658280 + VCMP_U32(2, v16, 0x00000000, 0x00199b7a, 0x00000000, 0x001a0e9c, 0x00000000, + 0x004156d7, 0x0006ffac, 0x0080115e, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x004a03fc, + 0x00841d68); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // 9165.669, 5488.131, -1648.302, -5022.386, + // 7163.093, -6826.076, -6976.746, 2675.899, 9587.624, + // -3671.810, 3611.960, -9086.531, -5333.617, -3284.205, + // 5676.141, -8293.472 + VLOAD_32(v4, 0x460f36ad, 0x45ab810c, 0xc4ce09ad, 0xc59cf316, 0x45dfd8be, + 0xc5d5509c, 0xc5da05f8, 0x45273e62, 0x4615ce7f, 0xc5657cf5, + 0x4561bf5b, 0xc60dfa20, 0xc5a6acf0, 0xc54d4347, 0x45b16120, + 0xc60195e3); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfncvt.xu.f.w v8, v4, v0.t"); + // 0, 5488, 0, 0, 0, 0, + // 0, 2676, 0, 0, 0, 0, + // 0, 0, 0, 0 + VCMP_U16(3, v8, 0x0000, 0x1570, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0a74, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000); + + VSET(16, e32, m4); + // -3508862.563, 1678202.418, -799491.756, + // 1707676.429, -5056868.769, 4282070.604, + // 458667.918, 8393053.957, -4485003.775, + // -5016427.098, -9086965.507, -6796529.257, + // -7756776.890, -1173384.460, 4850684.145, + // 8658279.578 + VLOAD_64(v8, 0xc14ac53f4813ac38, 0x41399b7a6ae9e42f, 0xc128660783332e44, + 0x413a0e9c6ddfa609, 0xc1534a59313a407b, 0x415055b5a6a655de, + 0x411bfeafabb28b3f, 0x4160022bbe9fc5e9, 0xc1511be2f1a1ac8b, + 0xc15322dac64b7c31, 0xc16154feb0372db3, 0xc159ed3c506ab6eb, + 0xc15d96fa38fb0400, 0xc131e78875bc4ace, 0x415280ff09493a97, + 0x416083acf280b61e); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfncvt.xu.f.w v16, v8, v0.t"); + // 0, 1678202, 0, 1707676, 0, + // 4282071, 0, 8393054, 0, 0, 0, + // 0, 0, 0, 0, 8658280 + VCMP_U32(4, v16, 0x00000000, 0x00199b7a, 0x00000000, 0x001a0e9c, 0x00000000, + 0x004156d7, 0x00000000, 0x0080115e, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00841d68); +}; + +//////////////// +// vfncvt.x.f // +//////////////// + +// Simple random test with similar values +void TEST_CASE3(void) { + VSET(16, e16, m2); + // -7808.056, 9317.408, 1685.891, 3975.596, -5978.108, + // 9676.333, 6963.966, 3589.870, -4334.772, -3261.309, + // -2340.480, 6085.075, 4043.322, 2827.902, 4389.497, + // -5196.684 + VLOAD_32(v4, 0xc5f40072, 0x461195a2, 0x44d2bc86, 0x4578798a, 0xc5bad0dd, + 0x46173155, 0x45d99fbb, 0x45605ded, 0xc587762e, 0xc54bd4f0, + 0xc51247af, 0x45be2899, 0x457cb528, 0x4530be6f, 0x45892bfa, + 0xc5a26578); + asm volatile("vfncvt.x.f.w v8, v4"); + // -7808, 9317, 1686, 3976, -5978, + // 9676, 6964, 3590, -4335, -3261, + // -2340, 6085, 4043, 2828, 4389, + // -5197 + VCMP_U16(5, v8, 0xe180, 0x2465, 0x0696, 0x0f88, 0xe8a6, 0x25cc, 0x1b34, + 0x0e06, 0xef11, 0xf343, 0xf6dc, 0x17c5, 0x0fcb, 0x0b0c, 0x1125, + 0xebb3); + + VSET(16, e32, m4); + // 5365665.770, -7563846.858, 8056193.411, -2468299.255, + // -9624608.750, -6974543.165, 5868078.422, -5387798.170, + // 3847378.080, 1368753.124, 4380497.931, -8044304.268, + // 1687738.849, 3753399.509, -3684410.483, -7416477.444 + VLOAD_64(v8, 0x415477e8714aea69, 0xc15cda91b6eefd56, 0x415ebb605a479cd5, + 0xc142d4e5a0a1f367, 0xc1625b841802ee1d, 0xc15a9b13ca8c7bb6, + 0x4156628b9afacdc9, 0xc1548d858ae6df86, 0x414d5a690a2dbb5e, + 0x4134e2b11fa8e994, 0x4150b5d47b9c3df2, 0xc15eafc4112995f5, + 0x4139c0bad971859a, 0x414ca2dbc1288a12, 0xc14c1c1d3dcd1b39, + 0xc15c4aa75c6c5635); + asm volatile("vfncvt.x.f.w v16, v8"); + // 5365666, -7563847, 8056193, -2468299, + // -9624609, -6974543, 5868078, -5387798, + // 3847378, 1368753, 4380498, -8044304, + // 1687739, 3753400, -3684410, -7416477 + VCMP_U32(6, v16, 0x0051dfa2, 0xff8c95b9, 0x007aed81, 0xffda5635, 0xff6d23df, + 0xff9593b1, 0x00598a2e, 0xffadc9ea, 0x003ab4d2, 0x0014e2b1, + 0x0042d752, 0xff8540f0, 0x0019c0bb, 0x003945b8, 0xffc7c7c6, + 0xff8ed563); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE4(void) { + VSET(16, e16, m2); + // -7808.056, 9317.408, 1685.891, 3975.596, -5978.108, + // 9676.333, 6963.966, 3589.870, -4334.772, -3261.309, + // -2340.480, 6085.075, 4043.322, 2827.902, 4389.497, + // -5196.684 + VLOAD_32(v4, 0xc5f40072, 0x461195a2, 0x44d2bc86, 0x4578798a, 0xc5bad0dd, + 0x46173155, 0x45d99fbb, 0x45605ded, 0xc587762e, 0xc54bd4f0, + 0xc51247af, 0x45be2899, 0x457cb528, 0x4530be6f, 0x45892bfa, + 0xc5a26578); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfncvt.x.f.w v8, v4, v0.t"); + // 0, 9317, 0, 3976, 0, + // 9676, 0, 3590, 0, -3261, + // 0, 6085, 0, 2828, 0, + // -5197 + VCMP_U16(7, v8, 0x0000, 0x2465, 0x0000, 0x0f88, 0x0000, 0x25cc, 0x0000, + 0x0e06, 0x0000, 0xf343, 0x0000, 0x17c5, 0x0000, 0x0b0c, 0x0000, + 0xebb3); + + VSET(16, e32, m4); + // 5365665.770, -7563846.858, 8056193.411, -2468299.255, + // -9624608.750, -6974543.165, 5868078.422, -5387798.170, + // 3847378.080, 1368753.124, 4380497.931, -8044304.268, + // 1687738.849, 3753399.509, -3684410.483, -7416477.444 + VLOAD_64(v8, 0x415477e8714aea69, 0xc15cda91b6eefd56, 0x415ebb605a479cd5, + 0xc142d4e5a0a1f367, 0xc1625b841802ee1d, 0xc15a9b13ca8c7bb6, + 0x4156628b9afacdc9, 0xc1548d858ae6df86, 0x414d5a690a2dbb5e, + 0x4134e2b11fa8e994, 0x4150b5d47b9c3df2, 0xc15eafc4112995f5, + 0x4139c0bad971859a, 0x414ca2dbc1288a12, 0xc14c1c1d3dcd1b39, + 0xc15c4aa75c6c5635); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfncvt.x.f.w v16, v8, v0.t"); + // 0, -7563847, 0, -2468299, 0, + // -6974543, 0, -5387798, 0, + // 1368753, 0, -8044304, 0, + // 3753400, 0, -7416477 + VCMP_U32(8, v16, 0x00000000, 0xff8c95b9, 0x00000000, 0xffda5635, 0x00000000, + 0xff9593b1, 0x00000000, 0xffadc9ea, 0x00000000, 0x0014e2b1, + 0x00000000, 0xff8540f0, 0x00000000, 0x003945b8, 0x00000000, + 0xff8ed563); +}; + +///////////////////// +// vfncvt.rtz.xu.f // +///////////////////// + +// Simple random test with similar values +void TEST_CASE5(void) { + VSET(16, e16, m2); + // -9750.252, -4363.736, -2345.615, 6996.062, -7115.004, + // 6670.171, -4079.234, -1773.082, 254.350, 53.058, + // -9041.926, -8137.022, 1522.146, 198.516, -920.430, + // 2857.583 + VLOAD_32(v4, 0xc6185902, 0xc5885de3, 0xc51299d6, 0x45daa07e, 0xc5de5808, + 0x45d0715e, 0xc57ef3bf, 0xc4dda29c, 0x437e5998, 0x42543afb, + 0xc60d47b4, 0xc5fe482e, 0x44be44af, 0x43468433, 0xc4661b8b, + 0x45329953); + asm volatile("vfncvt.rtz.xu.f.w v8, v4"); + // 0, 0, 0, 6996, 0, + // 6670, 0, 0, 254, 53, + // 0, 0, 1522, 198, 0, + // 2857 + VCMP_U16(9, v8, 0x0000, 0x0000, 0x0000, 0x1b54, 0x0000, 0x1a0e, 0x0000, + 0x0000, 0x00fe, 0x0035, 0x0000, 0x0000, 0x05f2, 0x00c6, 0x0000, + 0x0b29); + + VSET(16, e32, m4); + // -8404683.758, 3627605.540, -4368861.865, -2883871.623, + // 5750957.328, -7243911.338, -8202847.045, 5348152.868, + // 9957770.965, 8018962.598, -8478197.842, -9780786.953, + // 184470.081, 250336.923, -6517203.475, -7691903.192 + VLOAD_64(v8, 0xc16007d978438b7f, 0x414bad2ac51eade4, 0xc150aa777763eeda, + 0xc146008fcfc2093b, 0x4155f02b54fbd105, 0xc15ba221d5a4f5c3, + 0xc15f4a97c2e2daa6, 0x415466ce378e9269, 0x4162fe315edeecec, + 0x415e9704a64d845e, 0xc1602bbebaf40bb8, 0xc162a7c65e8002a9, + 0x410684b0a4ee482d, 0x410e8f07623ffd06, 0xc158dc74de617fbc, + 0xc15d579fcc41ba16); + asm volatile("vfncvt.rtz.xu.f.w v16, v8"); + // 0, 3627605, 0, 0, 5750957, 0, + // 0, 5348152, 9957770, 8018962, 0, 0, + // 184470, 250336, 0, 0 + VCMP_U32(10, v16, 0x00000000, 0x00375a55, 0x00000000, 0x00000000, 0x0057c0ad, + 0x00000000, 0x00000000, 0x00519b38, 0x0097f18a, 0x007a5c12, + 0x00000000, 0x00000000, 0x0002d096, 0x0003d1e0, 0x00000000, + 0x00000000); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE6(void) { + VSET(16, e16, m2); + // -9750.252, -4363.736, -2345.615, 6996.062, -7115.004, + // 6670.171, -4079.234, -1773.082, 254.350, 53.058, + // -9041.926, -8137.022, 1522.146, 198.516, -920.430, + // 2857.583 + VLOAD_32(v4, 0xc6185902, 0xc5885de3, 0xc51299d6, 0x45daa07e, 0xc5de5808, + 0x45d0715e, 0xc57ef3bf, 0xc4dda29c, 0x437e5998, 0x42543afb, + 0xc60d47b4, 0xc5fe482e, 0x44be44af, 0x43468433, 0xc4661b8b, + 0x45329953); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfncvt.rtz.xu.f.w v8, v4, v0.t"); + // 0, 0, 0, 6996, 0, + // 6670, 0, 0, 0, 53, + // 0, 0, 0, 198, 0, + // 2857 + VCMP_U16(11, v8, 0x0000, 0x0000, 0x0000, 0x1b54, 0x0000, 0x1a0e, 0x0000, + 0x0000, 0x0000, 0x0035, 0x0000, 0x0000, 0x0000, 0x00c6, 0x0000, + 0x0b29); + + VSET(16, e32, m4); + // -8404683.758, 3627605.540, -4368861.865, -2883871.623, + // 5750957.328, -7243911.338, -8202847.045, 5348152.868, + // 9957770.965, 8018962.598, -8478197.842, -9780786.953, + // 184470.081, 250336.923, -6517203.475, -7691903.192 + VLOAD_64(v8, 0xc16007d978438b7f, 0x414bad2ac51eade4, 0xc150aa777763eeda, + 0xc146008fcfc2093b, 0x4155f02b54fbd105, 0xc15ba221d5a4f5c3, + 0xc15f4a97c2e2daa6, 0x415466ce378e9269, 0x4162fe315edeecec, + 0x415e9704a64d845e, 0xc1602bbebaf40bb8, 0xc162a7c65e8002a9, + 0x410684b0a4ee482d, 0x410e8f07623ffd06, 0xc158dc74de617fbc, + 0xc15d579fcc41ba16); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfncvt.rtz.xu.f.w v16, v8, v0.t"); + // 0, 3627605, 0, 0, 0, 0, 0, + // 5348152, 0, 8018962, 0, 0, 0, + // 250336, 0, 0 + VCMP_U32(12, v16, 0x00000000, 0x00375a55, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00519b38, 0x00000000, 0x007a5c12, + 0x00000000, 0x00000000, 0x00000000, 0x0003d1e0, 0x00000000, + 0x00000000); +}; + +//////////////////// +// vfncvt.rtz.x.f // +//////////////////// + +// Simple random test with similar values +void TEST_CASE7(void) { + VSET(16, e16, m2); + // 9352.418, -5719.459, 4617.815, -3012.009, -3597.063, + // -5717.140, -3327.545, 1286.004, 1797.767, 3842.966, + // -2148.369, -7283.256, 8783.331, -7958.880, -6728.271, + // 4727.792 + VLOAD_32(v4, 0x461221ac, 0xc5b2bbac, 0x45904e86, 0xc53c4026, 0xc560d104, + 0xc5b2a91e, 0xc54ff8b9, 0x44a0c01e, 0x44e0b88c, 0x45702f76, + 0xc50645e9, 0xc5e39a0c, 0x46093d53, 0xc5f8b70a, 0xc5d2422c, + 0x4593be56); + asm volatile("vfncvt.rtz.x.f.w v8, v4"); + // 9352, -5719, 4617, -3012, -3597, + // -5717, -3327, 1286, 1797, 3842, + // -2148, -7283, 8783, -7958, -6728, + // 4727 + VCMP_U16(13, v8, 0x2488, 0xe9a9, 0x1209, 0xf43c, 0xf1f3, 0xe9ab, 0xf301, + 0x0506, 0x0705, 0x0f02, 0xf79c, 0xe38d, 0x224f, 0xe0ea, 0xe5b8, + 0x1277); + + VSET(16, e32, m4); + // 1563546.261, -1988965.594, 6496092.888, 5054778.769, + // 9551708.952, -336377.787, -2352111.643, 4412162.570, + // 7087155.475, 338850.875, 2765611.498, 2723631.912, + // -3252079.308, 1096915.326, 5492109.280, -7265880.245 + VLOAD_64(v8, 0x4137db9a42b839bd, 0xc13e596598118127, 0x4158c7d738d1eec8, + 0x4153484eb13573ed, 0x416237eb9e79d2a8, 0xc11487e725f1ce50, + 0xc141f1f7d2451c3d, 0x4150d4c0a47be906, 0x415b090cde6b0575, + 0x4114ae8b8081532d, 0x41451995bfc3bc74, 0x4144c797f4b307dd, + 0xc148cfb7a76dea0f, 0x4130bcd353667e5d, 0x4154f36351f3a3c5, + 0xc15bb7960fb007a5); + asm volatile("vfncvt.rtz.x.f.w v16, v8"); + // 1563546, -1988965, 6496092, 5054778, + // 9551708, -336377, -2352111, 4412162, + // 7087155, 338850, 2765611, 2723631, + // -3252079, 1096915, 5492109, -7265880 + VCMP_U32(14, v16, 0x0017db9a, 0xffe1a69b, 0x00631f5c, 0x004d213a, 0x0091bf5c, + 0xfffade07, 0xffdc1c11, 0x00435302, 0x006c2433, 0x00052ba2, + 0x002a332b, 0x00298f2f, 0xffce6091, 0x0010bcd3, 0x0053cd8d, + 0xff9121a8); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE8(void) { + VSET(16, e16, m2); + // 9352.418, -5719.459, 4617.815, -3012.009, -3597.063, + // -5717.140, -3327.545, 1286.004, 1797.767, 3842.966, + // -2148.369, -7283.256, 8783.331, -7958.880, -6728.271, + // 4727.792 + VLOAD_32(v4, 0x461221ac, 0xc5b2bbac, 0x45904e86, 0xc53c4026, 0xc560d104, + 0xc5b2a91e, 0xc54ff8b9, 0x44a0c01e, 0x44e0b88c, 0x45702f76, + 0xc50645e9, 0xc5e39a0c, 0x46093d53, 0xc5f8b70a, 0xc5d2422c, + 0x4593be56); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfncvt.rtz.x.f.w v8, v4, v0.t"); + // 0, -5719, 0, -3012, 0, + // -5717, 0, 1286, 0, 3842, 0, + // -7283, 0, -7958, 0, 4727 + VCMP_U16(15, v8, 0x0000, 0xe9a9, 0x0000, 0xf43c, 0x0000, 0xe9ab, 0x0000, + 0x0506, 0x0000, 0x0f02, 0x0000, 0xe38d, 0x0000, 0xe0ea, 0x0000, + 0x1277); + + VSET(16, e32, m4); + // 1563546.261, -1988965.594, 6496092.888, 5054778.769, + // 9551708.952, -336377.787, -2352111.643, 4412162.570, + // 7087155.475, 338850.875, 2765611.498, 2723631.912, + // -3252079.308, 1096915.326, 5492109.280, -7265880.245 + VLOAD_64(v8, 0x4137db9a42b839bd, 0xc13e596598118127, 0x4158c7d738d1eec8, + 0x4153484eb13573ed, 0x416237eb9e79d2a8, 0xc11487e725f1ce50, + 0xc141f1f7d2451c3d, 0x4150d4c0a47be906, 0x415b090cde6b0575, + 0x4114ae8b8081532d, 0x41451995bfc3bc74, 0x4144c797f4b307dd, + 0xc148cfb7a76dea0f, 0x4130bcd353667e5d, 0x4154f36351f3a3c5, + 0xc15bb7960fb007a5); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfncvt.rtz.x.f.w v16, v8, v0.t"); + // 0, -1988965, 0, 5054778, 0, + // -336377, 0, 4412162, 0, + // 338850, 0, 2723631, 0, + // 1096915, 0, -7265880 + VCMP_U32(16, v16, 0x00000000, 0xffe1a69b, 0x00000000, 0x004d213a, 0x00000000, + 0xfffade07, 0x00000000, 0x00435302, 0x00000000, 0x00052ba2, + 0x00000000, 0x00298f2f, 0x00000000, 0x0010bcd3, 0x00000000, + 0xff9121a8); +}; + +///////////////// +// vfncvt.f.xu // +///////////////// + +// Simple random test with similar values +void TEST_CASE9(void) { + VSET(16, e16, m2); + // 4294964178, 5853, 4294962638, 4294962082, 4585, + // 1637, 3984, 4294964217, 9553, 4294962615, + // 4294962166, 9867, 4294958580, 4294966752, 5172, + // 7478 + VLOAD_32(v4, 0xfffff3d2, 0x000016dd, 0xffffedce, 0xffffeba2, 0x000011e9, + 0x00000665, 0x00000f90, 0xfffff3f9, 0x00002551, 0xffffedb7, + 0xffffebf6, 0x0000268b, 0xffffddf4, 0xfffffde0, 0x00001434, + 0x00001d36); + asm volatile("vfncvt.f.xu.w v8, v4"); + // inf, 5852.000, inf, inf, 4584.000, 1637.000, + // 3984.000, inf, 9552.000, inf, inf, 9864.000, + // inf, inf, 5172.000, 7480.000 + VCMP_U16(17, v8, 0x7c00, 0x6db7, 0x7c00, 0x7c00, 0x6c7a, 0x6665, 0x6bc8, + 0x7c00, 0x70aa, 0x7c00, 0x7c00, 0x70d1, 0x7c00, 0x7c00, 0x6d0d, + 0x6f4e); + + VSET(16, e32, m4); + // 18446744073704835106, 18446744073709117625, + // 18446744073705901616, 2086515, 18446744073699655996, + // 932771, 255753, 3148047, + // 18446744073705977615, 18446744073704792883, + // 18446744073704699584, 8685460, 18446744073709143843, + // 18446744073703142874, 3905530, 18446744073704152149 + VLOAD_64(v8, 0xffffffffffb80822, 0xfffffffffff960b9, 0xffffffffffc84e30, + 0x00000000001fd673, 0xffffffffff69013c, 0x00000000000e3ba3, + 0x000000000003e709, 0x000000000030090f, 0xffffffffffc9770f, + 0xffffffffffb76333, 0xffffffffffb5f6c0, 0x0000000000848794, + 0xfffffffffff9c723, 0xffffffffff9e35da, 0x00000000003b97fa, + 0xffffffffffad9c55); + asm volatile("vfncvt.f.xu.w v16, v8"); + // 18446744073709551616.000, 18446744073709551616.000, + // 18446744073709551616.000, 2086515.000, + // 18446744073709551616.000, 932771.000, 255753.000, + // 3148047.000, 18446744073709551616.000, + // 18446744073709551616.000, 18446744073709551616.000, + // 8685460.000, 18446744073709551616.000, + // 18446744073709551616.000, 3905530.000, + // 18446744073709551616.000 + VCMP_U32(18, v16, 0x5f800000, 0x5f800000, 0x5f800000, 0x49feb398, 0x5f800000, + 0x4963ba30, 0x4879c240, 0x4a40243c, 0x5f800000, 0x5f800000, + 0x5f800000, 0x4b048794, 0x5f800000, 0x5f800000, 0x4a6e5fe8, + 0x5f800000); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE10(void) { + VSET(16, e16, m2); + // 4294964178, 5853, 4294962638, 4294962082, 4585, + // 1637, 3984, 4294964217, 9553, 4294962615, + // 4294962166, 9867, 4294958580, 4294966752, 5172, + // 7478 + VLOAD_32(v4, 0xfffff3d2, 0x000016dd, 0xffffedce, 0xffffeba2, 0x000011e9, + 0x00000665, 0x00000f90, 0xfffff3f9, 0x00002551, 0xffffedb7, + 0xffffebf6, 0x0000268b, 0xffffddf4, 0xfffffde0, 0x00001434, + 0x00001d36); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfncvt.f.xu.w v8, v4, v0.t"); + // 0.000, 5852.000, 0.000, inf, 0.000, 1637.000, + // 0.000, inf, 0.000, inf, 0.000, 9864.000, 0.000, + // inf, 0.000, 7480.000 + VCMP_U16(19, v8, 0x0, 0x6db7, 0x0, 0x7c00, 0x0, 0x6665, 0x0, 0x7c00, 0x0, + 0x7c00, 0x0, 0x70d1, 0x0, 0x7c00, 0x0, 0x6f4e); + + VSET(16, e32, m4); + // 18446744073704835106, 18446744073709117625, + // 18446744073705901616, 2086515, 18446744073699655996, + // 932771, 255753, 3148047, + // 18446744073705977615, 18446744073704792883, + // 18446744073704699584, 8685460, 18446744073709143843, + // 18446744073703142874, 3905530, 18446744073704152149 + VLOAD_64(v8, 0xffffffffffb80822, 0xfffffffffff960b9, 0xffffffffffc84e30, + 0x00000000001fd673, 0xffffffffff69013c, 0x00000000000e3ba3, + 0x000000000003e709, 0x000000000030090f, 0xffffffffffc9770f, + 0xffffffffffb76333, 0xffffffffffb5f6c0, 0x0000000000848794, + 0xfffffffffff9c723, 0xffffffffff9e35da, 0x00000000003b97fa, + 0xffffffffffad9c55); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfncvt.f.xu.w v16, v8, v0.t"); + // 0.000, 18446744073709551616.000, 0.000, 2086515.000, + // 0.000, 932771.000, 0.000, 3148047.000, 0.000, + // 18446744073709551616.000, 0.000, 8685460.000, 0.000, + // 18446744073709551616.000, 0.000, 18446744073709551616.000 + VCMP_U32(20, v16, 0x0, 0x5f800000, 0x0, 0x49feb398, 0x0, 0x4963ba30, 0x0, + 0x4a40243c, 0x0, 0x5f800000, 0x0, 0x4b048794, 0x0, 0x5f800000, 0x0, + 0x5f800000); +}; + +//////////////// +// vfncvt.f.x // +//////////////// + +// Simple random test with similar values +void TEST_CASE11(void) { + VSET(16, e16, m2); + // -6279, 3717, 9022, -8925, -5530, + // 3851, 5592, -3692, -2747, -748, + // -2621, -9352, 4018, 3174, -6975, + // -4466 + VLOAD_32(v4, 0xffffe779, 0x00000e85, 0x0000233e, 0xffffdd23, 0xffffea66, + 0x00000f0b, 0x000015d8, 0xfffff194, 0xfffff545, 0xfffffd14, + 0xfffff5c3, 0xffffdb78, 0x00000fb2, 0x00000c66, 0xffffe4c1, + 0xffffee8e); + asm volatile("vfncvt.f.x.w v8, v4"); + // -6280.000, 3716.000, 9024.000, -8928.000, -5528.000, + // 3852.000, 5592.000, -3692.000, -2748.000, -748.000, + // -2620.000, -9352.000, 4018.000, 3174.000, -6976.000, + // -4464.000 + VCMP_U16(21, v8, 0xee22, 0x6b42, 0x7068, 0xf05c, 0xed66, 0x6b86, 0x6d76, + 0xeb36, 0xe95e, 0xe1d8, 0xe91e, 0xf091, 0x6bd9, 0x6a33, 0xeed0, + 0xec5c); + + VSET(16, e32, m4); + // 757099, -9365555, 3016973, + // -9277105, -8350486, -650348, + // -1775160, 4659116, 148573, + // 4475248, -2937762, 3310433, + // 9151745, -2201488, -1506850, + // 1593161 + VLOAD_64(v8, 0x00000000000b8d6b, 0xffffffffff7117cd, 0x00000000002e090d, + 0xffffffffff72714f, 0xffffffffff8094ea, 0xfffffffffff61394, + 0xffffffffffe4e9c8, 0x00000000004717ac, 0x000000000002445d, + 0x0000000000444970, 0xffffffffffd32c5e, 0x0000000000328361, + 0x00000000008ba501, 0xffffffffffde6870, 0xffffffffffe901de, + 0x0000000000184f49); + asm volatile("vfncvt.f.x.w v16, v8"); + // 757099.000, -9365555.000, 3016973.000, -9277105.000, + // -8350486.000, -650348.000, -1775160.000, 4659116.000, + // 148573.000, 4475248.000, -2937762.000, 3310433.000, + // 9151745.000, -2201488.000, -1506850.000, 1593161.000 + VCMP_U32(22, v16, 0x4938d6b0, 0xcb0ee833, 0x4a382434, 0xcb0d8eb1, 0xcafed62c, + 0xc91ec6c0, 0xc9d8b1c0, 0x4a8e2f58, 0x48111740, 0x4a8892e0, + 0xca334e88, 0x4a4a0d84, 0x4b0ba501, 0xca065e40, 0xc9b7f110, + 0x49c27a48); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE12(void) { + VSET(16, e16, m2); + // -6279, 3717, 9022, -8925, -5530, + // 3851, 5592, -3692, -2747, -748, + // -2621, -9352, 4018, 3174, -6975, + // -4466 + VLOAD_32(v4, 0xffffe779, 0x00000e85, 0x0000233e, 0xffffdd23, 0xffffea66, + 0x00000f0b, 0x000015d8, 0xfffff194, 0xfffff545, 0xfffffd14, + 0xfffff5c3, 0xffffdb78, 0x00000fb2, 0x00000c66, 0xffffe4c1, + 0xffffee8e); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfncvt.f.x.w v8, v4, v0.t"); + // 0.000, 3716.000, 0.000, -8928.000, 0.000, 3852.000, + // 0.000, -3692.000, 0.000, -748.000, 0.000, -9352.000, + // 0.000, 3174.000, 0.000, -4464.000 + VCMP_U16(23, v8, 0x0, 0x6b42, 0x0, 0xf05c, 0x0, 0x6b86, 0x0, 0xeb36, 0x0, + 0xe1d8, 0x0, 0xf091, 0x0, 0x6a33, 0x0, 0xec5c); + + VSET(16, e32, m4); + // 757099, -9365555, 3016973, -9277105, + // -8350486, -650348, -1775160, 4659116, + // 148573, 4475248, -2937762, 3310433, + // 9151745, -2201488, -1506850, 1593161 + VLOAD_64(v8, 0x00000000000b8d6b, 0xffffffffff7117cd, 0x00000000002e090d, + 0xffffffffff72714f, 0xffffffffff8094ea, 0xfffffffffff61394, + 0xffffffffffe4e9c8, 0x00000000004717ac, 0x000000000002445d, + 0x0000000000444970, 0xffffffffffd32c5e, 0x0000000000328361, + 0x00000000008ba501, 0xffffffffffde6870, 0xffffffffffe901de, + 0x0000000000184f49); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfncvt.f.x.w v16, v8, v0.t"); + // 0.000, -9365555.000, 0.000, -9277105.000, 0.000, + // -650348.000, 0.000, 4659116.000, 0.000, 4475248.000, + // 0.000, 3310433.000, 0.000, -2201488.000, 0.000, + // 1593161.000 + VCMP_U32(24, v16, 0x0, 0xcb0ee833, 0x0, 0xcb0d8eb1, 0x0, 0xc91ec6c0, 0x0, + 0x4a8e2f58, 0x0, 0x4a8892e0, 0x0, 0x4a4a0d84, 0x0, 0xca065e40, 0x0, + 0x49c27a48); +}; + +//////////////// +// vfncvt.f.f // +//////////////// + +// Simple random test with similar values +void TEST_CASE13(void) { + VSET(16, e16, m2); + // 908.994, -6788.630, -5789.335, 8054.104, 3947.551, 9596.856, + // 2474.506, 3094.286, 7684.992, -6850.149, -54.922, 7737.443, + // 4171.873, 5266.611, 9163.839, 5679.187 + VLOAD_32(v4, 0x44633fa3, 0xc5d4250b, 0xc5b4eaaf, 0x45fbb0d4, 0x4576b8d0, + 0x4615f36d, 0x451aa818, 0x45416494, 0x45f027ef, 0xc5d61131, + 0xc25bb026, 0x45f1cb8c, 0x45825efb, 0x45a494e4, 0x460f2f5b, + 0x45b1797f); + asm volatile("vfncvt.f.f.w v8, v4"); + // 909.000, -6788.000, -5788.000, 8056.000, 3948.000, 9600.000, + // 2474.000, 3094.000, 7684.000, -6852.000, -54.938, 7736.000, + // 4172.000, 5268.000, 9160.000, 5680.000 + VCMP_U16(25, v8, 0x631a, 0xeea1, 0xeda7, 0x6fde, 0x6bb6, 0x70b0, 0x68d5, + 0x6a0b, 0x6f81, 0xeeb1, 0xd2de, 0x6f8e, 0x6c13, 0x6d25, 0x7079, + 0x6d8c); + + VSET(16, e32, m4); + // 153431.766, -7796010.957, -6652812.196, 1049714.758, + // 7538298.328, -8731739.480, 537176.622, -3884944.157, + // 7612336.042, -2270131.404, -4976406.726, -5260237.163, + // -4947737.810, 3583352.355, 7648790.331, -9360989.228 + VLOAD_64(v8, 0x4102babe20435c2f, 0xc15dbd4abd4015a9, 0xc15960e30c871450, + 0x41300472c1efbd9f, 0x415cc19e94ffb79b, 0xc160a78b6f5bcd25, + 0x412064b13e931aa9, 0xc14da3c81425b914, 0x415d09ec02a8cc93, + 0xc14151d9b3c1ecaf, 0xc152fbc5ae718384, 0xc15410f34a6ddb48, + 0xc152dfc673d9ba53, 0x414b56bc2d765fad, 0x415d2d85952e8398, + 0xc161dacba74d791e); + asm volatile("vfncvt.f.f.w v16, v8"); + // 153431.766, -7796011.000, -6652812.000, 1049714.750, + // 7538298.500, -8731739.000, 537176.625, -3884944.250, + // 7612336.000, -2270131.500, -4976406.500, -5260237.000, + // -4947738.000, 3583352.250, 7648790.500, -9360989.000 + VCMP_U32(26, v16, 0x4815d5f1, 0xcaedea56, 0xcacb0718, 0x49802396, 0x4ae60cf5, + 0xcb053c5b, 0x4903258a, 0xca6d1e41, 0x4ae84f60, 0xca0a8ece, + 0xca97de2d, 0xcaa0879a, 0xca96fe34, 0x4a5ab5e1, 0x4ae96c2d, + 0xcb0ed65d); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE14(void) { + VSET(16, e16, m2); + // 908.994, -6788.630, -5789.335, 8054.104, 3947.551, 9596.856, + // 2474.506, 3094.286, 7684.992, -6850.149, -54.922, 7737.443, + // 4171.873, 5266.611, 9163.839, 5679.187 + VLOAD_32(v4, 0x44633fa3, 0xc5d4250b, 0xc5b4eaaf, 0x45fbb0d4, 0x4576b8d0, + 0x4615f36d, 0x451aa818, 0x45416494, 0x45f027ef, 0xc5d61131, + 0xc25bb026, 0x45f1cb8c, 0x45825efb, 0x45a494e4, 0x460f2f5b, + 0x45b1797f); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfncvt.f.f.w v8, v4, v0.t"); + // 0.000, -6788.000, 0.000, 8056.000, 0.000, 9600.000, 0.000, + // 3094.000, 0.000, -6852.000, 0.000, 7736.000, 0.000, 5268.000, + // 0.000, 5680.000 + VCMP_U16(27, v8, 0x0, 0xeea1, 0x0, 0x6fde, 0x0, 0x70b0, 0x0, 0x6a0b, 0x0, + 0xeeb1, 0x0, 0x6f8e, 0x0, 0x6d25, 0x0, 0x6d8c); + + VSET(16, e32, m4); + // 153431.766, -7796010.957, -6652812.196, 1049714.758, + // 7538298.328, -8731739.480, 537176.622, -3884944.157, + // 7612336.042, -2270131.404, -4976406.726, -5260237.163, + // -4947737.810, 3583352.355, 7648790.331, -9360989.228 + VLOAD_64(v8, 0x4102babe20435c2f, 0xc15dbd4abd4015a9, 0xc15960e30c871450, + 0x41300472c1efbd9f, 0x415cc19e94ffb79b, 0xc160a78b6f5bcd25, + 0x412064b13e931aa9, 0xc14da3c81425b914, 0x415d09ec02a8cc93, + 0xc14151d9b3c1ecaf, 0xc152fbc5ae718384, 0xc15410f34a6ddb48, + 0xc152dfc673d9ba53, 0x414b56bc2d765fad, 0x415d2d85952e8398, + 0xc161dacba74d791e); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfncvt.f.f.w v16, v8, v0.t"); + // 0.000, -7796011.000, 0.000, 1049714.750, 0.000, -8731739.000, + // 0.000, -3884944.250, 0.000, -2270131.500, 0.000, -5260237.000, + // 0.000, 3583352.250, 0.000, -9360989.000 + VCMP_U32(28, v16, 0x0, 0xcaedea56, 0x0, 0x49802396, 0x0, 0xcb053c5b, 0x0, + 0xca6d1e41, 0x0, 0xca0a8ece, 0x0, 0xcaa0879a, 0x0, 0x4a5ab5e1, 0x0, + 0xcb0ed65d); +}; + +//////////////////// +// vfncvt.rod.f.f // +//////////////////// + +// Simple random test with similar values +void TEST_CASE15(void) { + VSET(16, e16, m2); + // 908.994, -6788.630, -5789.335, 8054.104, 3947.551, + // 9596.856, 2474.506, 3094.286, 7684.992, -6850.149, + // -54.922, 7737.443, 4171.873, 5266.611, 9163.839, + // 5679.187 + VLOAD_32(v4, 0x44633fa3, 0xc5d4250b, 0xc5b4eaaf, 0x45fbb0d4, 0x4576b8d0, + 0x4615f36d, 0x451aa818, 0x45416494, 0x45f027ef, 0xc5d61131, + 0xc25bb026, 0x45f1cb8c, 0x45825efb, 0x45a494e4, 0x460f2f5b, + 0x45b1797f); + asm volatile("vfncvt.rod.f.f.w v8, v4"); + // 909.000, -6788.000, -5788.000, 8056.000, 3948.000, + // 9600.000, 2474.000, 3094.000, 7684.000, -6852.000, + // -54.938, 7736.000, 4172.000, 5268.000, 9160.000, 5680.000 + VCMP_U16(29, v8, 0x6319, 0xeea1, 0xeda7, 0x6fdd, 0x6bb5, 0x70af, 0x68d5, + 0x6a0b, 0x6f81, 0xeeb1, 0xd2dd, 0x6f8f, 0x6c13, 0x6d25, 0x7079, + 0x6d8b); + + VSET(16, e32, m4); + // 153431.766, -7796010.957, -6652812.196, + // 1049714.758, 7538298.328, -8731739.480, 537176.622, + // -3884944.157, 7612336.042, -2270131.404, + // -4976406.726, -5260237.163, -4947737.810, + // 3583352.355, 7648790.331, -9360989.228 + VLOAD_64(v8, 0x4102babe20435c2f, 0xc15dbd4abd4015a9, 0xc15960e30c871450, + 0x41300472c1efbd9f, 0x415cc19e94ffb79b, 0xc160a78b6f5bcd25, + 0x412064b13e931aa9, 0xc14da3c81425b914, 0x415d09ec02a8cc93, + 0xc14151d9b3c1ecaf, 0xc152fbc5ae718384, 0xc15410f34a6ddb48, + 0xc152dfc673d9ba53, 0x414b56bc2d765fad, 0x415d2d85952e8398, + 0xc161dacba74d791e); + asm volatile("vfncvt.rod.f.f.w v16, v8"); + // 153431.766, -7796010.500, -6652812.500, + // 1049714.875, 7538298.500, -8731739.000, + // 537176.5625, -3884944.250, 7612336.500, + // -2270131.250, -4976406.500, -5260237.500, + // -4947737.500, 3583352.250, 7648790.500, + // -9360989.000 + VCMP_U32(30, v16, 0x4815d5f1, 0xcaedea55, 0xcacb0719, 0x49802397, 0x4ae60cf5, + 0xcb053c5b, 0x49032589, 0xca6d1e41, 0x4ae84f61, 0xca0a8ecd, + 0xca97de2d, 0xcaa0879b, 0xca96fe33, 0x4a5ab5e1, 0x4ae96c2d, + 0xcb0ed65d); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE16(void) { + VSET(16, e16, m2); + // 908.994, -6788.630, -5789.335, 8054.104, 3947.551, 9596.856, + // 2474.506, 3094.286, 7684.992, -6850.149, -54.922, 7737.443, + // 4171.873, 5266.611, 9163.839, 5679.187 + VLOAD_32(v4, 0x44633fa3, 0xc5d4250b, 0xc5b4eaaf, 0x45fbb0d4, 0x4576b8d0, + 0x4615f36d, 0x451aa818, 0x45416494, 0x45f027ef, 0xc5d61131, + 0xc25bb026, 0x45f1cb8c, 0x45825efb, 0x45a494e4, 0x460f2f5b, + 0x45b1797f); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfncvt.rod.f.f.w v8, v4, v0.t"); + // 0.000, -6788.000, 0.000, 8056.000, 0.000, 9600.000, 0.000, + // 3094.000, 0.000, -6852.000, 0.000, 7736.000, 0.000, 5268.000, + // 0.000, 5680.000 + VCMP_U16(31, v8, 0x0, 0xeea1, 0x0, 0x6fdd, 0x0, 0x70af, 0x0, 0x6a0b, 0x0, + 0xeeb1, 0x0, 0x6f8f, 0x0, 0x6d25, 0x0, 0x6d8b); + + VSET(16, e32, m4); + // 153431.766, -7796010.957, -6652812.196, 1049714.758, + // 7538298.328, -8731739.480, 537176.622, -3884944.157, + // 7612336.042, -2270131.404, -4976406.726, -5260237.163, + // -4947737.810, 3583352.355, 7648790.331, -9360989.228 + VLOAD_64(v8, 0x4102babe20435c2f, 0xc15dbd4abd4015a9, 0xc15960e30c871450, + 0x41300472c1efbd9f, 0x415cc19e94ffb79b, 0xc160a78b6f5bcd25, + 0x412064b13e931aa9, 0xc14da3c81425b914, 0x415d09ec02a8cc93, + 0xc14151d9b3c1ecaf, 0xc152fbc5ae718384, 0xc15410f34a6ddb48, + 0xc152dfc673d9ba53, 0x414b56bc2d765fad, 0x415d2d85952e8398, + 0xc161dacba74d791e); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfncvt.rod.f.f.w v16, v8, v0.t"); + // 0.000, -7796011.000, 0.000, 1049714.750, 0.000, -8731739.000, + // 0.000, -3884944.250, 0.000, -2270131.500, 0.000, -5260237.000, + // 0.000, 3583352.250, 0.000, -9360989.000 + VCMP_U32(32, v16, 0x0, 0xcaedea55, 0x0, 0x49802397, 0x0, 0xcb053c5b, 0x0, + 0xca6d1e41, 0x0, 0xca0a8ecd, 0x0, 0xcaa0879b, 0x0, 0x4a5ab5e1, 0x0, + 0xcb0ed65d); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + + TEST_CASE3(); + TEST_CASE4(); + + TEST_CASE5(); + TEST_CASE6(); + + TEST_CASE7(); + TEST_CASE8(); + + TEST_CASE9(); + TEST_CASE10(); + + TEST_CASE11(); + TEST_CASE12(); + + TEST_CASE13(); + TEST_CASE14(); + + /* + vfncvt.rod.f.f is not supported yet + + // TEST_CASE15(); + // TEST_CASE16(); + */ + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfnmacc.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfnmacc.c new file mode 100644 index 000000000..3d48cc3e8 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfnmacc.c @@ -0,0 +1,456 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values + 1 subnormal +void TEST_CASE1(void) { + VSET(16, e16, m2); + // -0.6377, -0.2332, 0.9458, -0.2612, -0.6772, 0.4543, 0.1002, + // 0.7764, 0.7979, -0.8599, 0.7837, -0.2461, 0.4221, 0.2251, + // 0.7739, 0.1461 + VLOAD_16(v4, 0xb91a, 0xb376, 0x3b91, 0xb42e, 0xb96b, 0x3745, 0x2e69, 0x3a36, + 0x3a62, 0xbae1, 0x3a45, 0xb3e0, 0x36c1, 0x3334, 0x3a31, 0x30ad); + // 0.9551, -0.6787, 0.5605, -0.7305, -0.7197, -0.1581, 0.7271, + // 0.6113, 0.2971, -0.8062, 0.9668, -0.5278, 0.3972, -0.1084, + // -0.3015, 0.9556 + VLOAD_16(v6, 0x3ba4, 0xb96e, 0x387c, 0xb9d8, 0xb9c2, 0xb10f, 0x39d1, 0x38e4, + 0x34c1, 0xba73, 0x3bbc, 0xb839, 0x365b, 0xaef0, 0xb4d3, 0x3ba5); + // 0.7402, 0.0935, 0.1455, -0.2771, 0.3347, 0.7964, 0.6543, + // -0.7534, 0.2476, 0.0338, 0.9980, 0.3284, 0.2239, + // -0.4551, 0.6694, -0.8550 + VLOAD_16(v2, 0x39ec, 0x2dfc, 0x30a8, 0xb46f, 0x355b, 0x3a5f, 0x393c, 0xba07, + 0x33ec, 0x2853, 0x3bfc, 0x3541, 0x332a, 0xb748, 0x395b, 0xbad7); + asm volatile("vfnmacc.vv v2, v4, v6"); + // -0.1313, -0.2517, -0.6758, 0.0863, -0.8223, -0.7246, -0.7271, + // 0.2788, -0.4846, -0.7271, -1.7559, -0.4583, -0.3916, 0.4795, + // -0.4360, 0.7153 + VCMP_U16(1, v2, 0xb033, 0xb407, 0xb968, 0x2d86, 0xba94, 0xb9cc, 0xb9d1, + 0x3476, 0xb7c1, 0xb9d1, 0xbf06, 0xb755, 0xb644, 0x37ac, 0xb6fa, + 0x39b9); + + VSET(16, e32, m4); + // -0.17374928, -0.36242354, -0.18093164, 0.94970566, + // -0.45790458, -0.17780401, -0.51985794, -0.04832974, + // 0.13252106, 0.77533042, 0.42536697, -0.72199643, + // -0.25088808, 0.28798762, 0.66300607, -0.63549894 + VLOAD_32(v8, 0xbe31eb55, 0xbeb98f94, 0xbe394625, 0x3f731fe9, 0xbeea7278, + 0xbe361241, 0xbf051569, 0xbd45f569, 0x3e07b39a, 0x3f467c0e, + 0x3ed9c9b3, 0xbf38d4c2, 0xbe807467, 0x3e93731d, 0x3f29bac4, + 0xbf22b00f); + // -0.61242568, 0.71439523, -0.15632962, 0.10917858, + // 0.19637996, -0.88467985, 0.73412597, -0.98048240, 0.25438991, + // -0.02058743, -0.00876777, 0.21936898, -0.71130067, + // -0.29675287, -0.96093589, 0.24695934 + VLOAD_32(v12, 0xbf1cc7ee, 0x3f36e29b, 0xbe2014df, 0x3ddf9905, 0x3e4917d4, + 0xbf627a61, 0x3f3befae, 0xbf7b00e5, 0x3e823f65, 0xbca8a6f9, + 0xbc0fa6af, 0x3e60a243, 0xbf3617cd, 0xbe97effe, 0xbf75ffe5, + 0x3e7ce2e9); + // 0.77600455, 0.02542816, -0.63618338, 0.11704731, + // 0.45613721, -0.90825689, 0.21235447, 0.35766414, + // 0.08650716, -0.98431164, 0.21029140, -0.92919809, + // 0.46440944, 0.70648551, -0.80876821, -0.19595607 + VLOAD_32(v4, 0x3f46a83c, 0x3cd04eb8, 0xbf22dcea, 0x3defb680, 0x3ee98ad1, + 0xbf688386, 0x3e597373, 0x3eb71fc1, 0x3db12aaa, 0xbf7bfbd9, + 0x3e5756a1, 0xbf6ddfed, 0x3eedc713, 0x3f34dc3c, 0xbf4f0b6f, + 0xbe48a8b5); + asm volatile("vfnmacc.vv v4, v8, v12"); + // -0.88241309, 0.23348548, 0.60789841, -0.22073483, + // -0.36621392, 0.75095725, 0.16928674, -0.40505061, + // -0.12021918, 1.00027370, -0.20656188, 1.08758175, + // -0.64286631, -0.62102437, 1.44587445, 0.35289848 + VCMP_U32(2, v4, 0xbf61e5d3, 0x3e6f16d2, 0x3f1b9f3b, 0xbe62084f, 0xbebb8064, + 0x3f403ebc, 0x3e2d5982, 0xbecf62cb, 0xbdf63579, 0x3f8008f8, + 0xbe5384f5, 0x3f8b35e1, 0xbf2492e3, 0xbf1efb74, 0x3fb9126b, + 0x3eb4af1c); + + VSET(16, e64, m8); + // -0.3252450595073633, 0.4758165631309326, -0.1595578232245429, + // -0.5062008461482019, -0.8497827573746595, -0.1941654045426651, + // 0.5653121187716577, -0.9852357785633095, -0.4238236947700038, + // 0.5852522737985073, 0.4009389814391957, -0.8725649196362917, + // -0.5946782335830663, 0.4175703122760628, -0.6355596052793091, + // -0.3469340725892474 + VLOAD_64(v16, 0xbfd4d0d0a77142c0, 0x3fde73c75062b7e8, 0xbfc46c6408490198, + 0xbfe032cc1ded3ff0, 0xbfeb316b9bf41faa, 0xbfc8da6977433ee0, + 0x3fe2170970c503fe, 0xbfef870d2ef8e992, 0xbfdb1fed6b13a6c0, + 0x3fe2ba62f9fbf9aa, 0x3fd9a8fbf93e43f0, 0xbfebec0d442f3114, + 0xbfe3079aa59c3bf4, 0x3fdab978d4c06588, 0xbfe4568118eaaa68, + 0xbfd6342af7e8e3dc); + // 0.9024789401717532, 0.1750129013440402, 0.5031110880652467, + // -0.2303324647743561, -0.3880673069078899, + // -0.9441232974464955, -0.9718449040015202, 0.6713775626400460, + // -0.0912048565692380, -0.5347347522064834, + // -0.5209348837668262, 0.1676058792979986, + // -0.3611782231841894, 0.5839305722445856, + // -0.5690013462620132, -0.7273345685963009 + VLOAD_64(v24, 0x3fece11b83abb9b8, 0x3fc666d29fd34b08, 0x3fe0197c6cafd8c4, + 0xbfcd7b88c1b4daf0, 0xbfd8d61841f43c54, 0xbfee36420fbd9482, + 0xbfef195a7bef10b4, 0x3fe57beccc59d47e, 0xbfb7593394338500, + 0xbfe11c8c0e185e4a, 0xbfe0ab7fa223f876, 0x3fc5741c0519e298, + 0xbfd71d8b44269f74, 0x3fe2af8f2add9a18, 0xbfe235424fb26902, + 0xbfe74653252be25a); + // -0.0769255470598902, -0.8447241112550155, -0.1913688167412757, + // 0.7663381230505260, 0.2058488268749510, -0.0251549939511286, + // 0.5275264461714482, -0.7602756587514194, 0.6498044022974587, + // -0.7128277097157256, -0.8385947434294139, 0.8834902787005550, + // 0.5936682304042178, 0.1532178226844403, -0.5096194622607613, + // -0.8578075287458693 + VLOAD_64(v8, 0xbfb3b16484d96110, 0xbfeb07fadbff7462, 0xbfc87ec5fcb06230, + 0x3fe885d78705c2d4, 0x3fca59411dac8758, 0xbf99c23b11679a80, + 0x3fe0e17f24429b70, 0xbfe8542d9e4907ce, 0x3fe4cb329a1542de, + 0xbfe6cf7c0e9d2c04, 0xbfead5c4a4b40f1a, 0x3fec458d67ab4a36, + 0x3fe2ff5484485472, 0x3fc39ca440cc0820, 0xbfe04ecd797a151a, + 0xbfeb7328c6473c1e); + asm volatile("vfnmacc.vv v8, v16, v24"); + // 0.3704523636601942, 0.7614500740339213, 0.2716441267930978, + // -0.8829326116147059, -0.5356217329860959, + // -0.1581610880357251, 0.0218692556270895, 1.4217408543890222, + // -0.6884591815896014, 1.0257824393236514, 1.0474578451230310, + // -0.7372432681003268, -0.8084530581760619, + // -0.3970498940841519, 0.1479851912270807, 0.6054703847278113 + VCMP_U64(3, v8, 0x3fd7b57dd4a95f28, 0x3fe85dcc8bb06629, 0x3fd1629e0c2e846c, + 0xbfec40fbe46ea001, 0xbfe123d0304677d2, 0xbfc43e9f5e4e7ddd, + 0x3f9664e4e6d32991, 0x3ff6bf73568fcea3, 0xbfe607db8cb1dd4b, + 0x3ff0699ad8db4c8b, 0x3ff0c263284bdf71, 0xbfe7977f31b5fc6d, + 0xbfe9ded8f2a6f4b5, 0xbfd96943f57e3046, 0x3fc2f12dc24e6a42, + 0x3fe360036da34794); +}; + +// Simple random test with similar values + 1 subnormal (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // -0.6377, -0.2332, 0.9458, -0.2612, -0.6772, 0.4543, 0.1002, + // 0.7764, 0.7979, -0.8599, 0.7837, -0.2461, 0.4221, 0.2251, + // 0.7739, 0.1461 + VLOAD_16(v4, 0xb91a, 0xb376, 0x3b91, 0xb42e, 0xb96b, 0x3745, 0x2e69, 0x3a36, + 0x3a62, 0xbae1, 0x3a45, 0xb3e0, 0x36c1, 0x3334, 0x3a31, 0x30ad); + // 0.9551, -0.6787, 0.5605, -0.7305, -0.7197, -0.1581, 0.7271, + // 0.6113, 0.2971, -0.8062, 0.9668, -0.5278, 0.3972, -0.1084, + // -0.3015, 0.9556 + VLOAD_16(v6, 0x3ba4, 0xb96e, 0x387c, 0xb9d8, 0xb9c2, 0xb10f, 0x39d1, 0x38e4, + 0x34c1, 0xba73, 0x3bbc, 0xb839, 0x365b, 0xaef0, 0xb4d3, 0x3ba5); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.7402, 0.0935, 0.1455, -0.2771, 0.3347, 0.7964, 0.6543, + // -0.7534, 0.2476, 0.0338, 0.9980, 0.3284, 0.2239, + // -0.4551, 0.6694, -0.8550 + VLOAD_16(v2, 0x39ec, 0x2dfc, 0x30a8, 0xb46f, 0x355b, 0x3a5f, 0x393c, 0xba07, + 0x33ec, 0x2853, 0x3bfc, 0x3541, 0x332a, 0xb748, 0x395b, 0xbad7); + asm volatile("vfnmacc.vv v2, v4, v6, v0.t"); + // 0.0000, -0.2517, 0.0000, 0.0863, 0.0000, -0.7246, 0.0000, + // 0.2788, 0.0000, -0.7271, 0.0000, -0.4583, 0.0000, 0.4795, + // 0.0000, 0.7153 + VCMP_U16(4, v2, 0x39ec, 0xb407, 0x30a8, 0x2d86, 0x355b, 0xb9cc, 0x393c, + 0x3476, 0x33ec, 0xb9d1, 0x3bfc, 0xb755, 0x332a, 0x37ac, 0x395b, + 0x39b9); + + VSET(16, e32, m4); + // -0.17374928, -0.36242354, -0.18093164, 0.94970566, + // -0.45790458, -0.17780401, -0.51985794, -0.04832974, + // 0.13252106, 0.77533042, 0.42536697, -0.72199643, + // -0.25088808, 0.28798762, 0.66300607, -0.63549894 + VLOAD_32(v8, 0xbe31eb55, 0xbeb98f94, 0xbe394625, 0x3f731fe9, 0xbeea7278, + 0xbe361241, 0xbf051569, 0xbd45f569, 0x3e07b39a, 0x3f467c0e, + 0x3ed9c9b3, 0xbf38d4c2, 0xbe807467, 0x3e93731d, 0x3f29bac4, + 0xbf22b00f); + // -0.61242568, 0.71439523, -0.15632962, 0.10917858, + // 0.19637996, -0.88467985, 0.73412597, -0.98048240, 0.25438991, + // -0.02058743, -0.00876777, 0.21936898, -0.71130067, + // -0.29675287, -0.96093589, 0.24695934 + VLOAD_32(v12, 0xbf1cc7ee, 0x3f36e29b, 0xbe2014df, 0x3ddf9905, 0x3e4917d4, + 0xbf627a61, 0x3f3befae, 0xbf7b00e5, 0x3e823f65, 0xbca8a6f9, + 0xbc0fa6af, 0x3e60a243, 0xbf3617cd, 0xbe97effe, 0xbf75ffe5, + 0x3e7ce2e9); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.77600455, 0.02542816, -0.63618338, 0.11704731, + // 0.45613721, -0.90825689, 0.21235447, 0.35766414, + // 0.08650716, -0.98431164, 0.21029140, -0.92919809, + // 0.46440944, 0.70648551, -0.80876821, -0.19595607 + VLOAD_32(v4, 0x3f46a83c, 0x3cd04eb8, 0xbf22dcea, 0x3defb680, 0x3ee98ad1, + 0xbf688386, 0x3e597373, 0x3eb71fc1, 0x3db12aaa, 0xbf7bfbd9, + 0x3e5756a1, 0xbf6ddfed, 0x3eedc713, 0x3f34dc3c, 0xbf4f0b6f, + 0xbe48a8b5); + asm volatile("vfnmacc.vv v4, v8, v12, v0.t"); + // 0.00000000, 0.23348548, 0.00000000, -0.22073483, + // 0.00000000, 0.75095725, 0.00000000, -0.40505061, + // 0.00000000, 1.00027370, 0.00000000, 1.08758175, + // 0.00000000, -0.62102437, 0.00000000, 0.35289848 + VCMP_U32(5, v4, 0x3f46a83c, 0x3e6f16d2, 0xbf22dcea, 0xbe62084f, 0x3ee98ad1, + 0x3f403ebc, 0x3e597373, 0xbecf62cb, 0x3db12aaa, 0x3f8008f8, + 0x3e5756a1, 0x3f8b35e1, 0x3eedc713, 0xbf1efb74, 0xbf4f0b6f, + 0x3eb4af1c); + + VSET(16, e64, m8); + // -0.3252450595073633, 0.4758165631309326, -0.1595578232245429, + // -0.5062008461482019, -0.8497827573746595, -0.1941654045426651, + // 0.5653121187716577, -0.9852357785633095, -0.4238236947700038, + // 0.5852522737985073, 0.4009389814391957, -0.8725649196362917, + // -0.5946782335830663, 0.4175703122760628, -0.6355596052793091, + // -0.3469340725892474 + VLOAD_64(v16, 0xbfd4d0d0a77142c0, 0x3fde73c75062b7e8, 0xbfc46c6408490198, + 0xbfe032cc1ded3ff0, 0xbfeb316b9bf41faa, 0xbfc8da6977433ee0, + 0x3fe2170970c503fe, 0xbfef870d2ef8e992, 0xbfdb1fed6b13a6c0, + 0x3fe2ba62f9fbf9aa, 0x3fd9a8fbf93e43f0, 0xbfebec0d442f3114, + 0xbfe3079aa59c3bf4, 0x3fdab978d4c06588, 0xbfe4568118eaaa68, + 0xbfd6342af7e8e3dc); + // 0.9024789401717532, 0.1750129013440402, 0.5031110880652467, + // -0.2303324647743561, -0.3880673069078899, + // -0.9441232974464955, -0.9718449040015202, 0.6713775626400460, + // -0.0912048565692380, -0.5347347522064834, + // -0.5209348837668262, 0.1676058792979986, + // -0.3611782231841894, 0.5839305722445856, + // -0.5690013462620132, -0.7273345685963009 + VLOAD_64(v24, 0x3fece11b83abb9b8, 0x3fc666d29fd34b08, 0x3fe0197c6cafd8c4, + 0xbfcd7b88c1b4daf0, 0xbfd8d61841f43c54, 0xbfee36420fbd9482, + 0xbfef195a7bef10b4, 0x3fe57beccc59d47e, 0xbfb7593394338500, + 0xbfe11c8c0e185e4a, 0xbfe0ab7fa223f876, 0x3fc5741c0519e298, + 0xbfd71d8b44269f74, 0x3fe2af8f2add9a18, 0xbfe235424fb26902, + 0xbfe74653252be25a); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.0769255470598902, -0.8447241112550155, -0.1913688167412757, + // 0.7663381230505260, 0.2058488268749510, -0.0251549939511286, + // 0.5275264461714482, -0.7602756587514194, 0.6498044022974587, + // -0.7128277097157256, -0.8385947434294139, 0.8834902787005550, + // 0.5936682304042178, 0.1532178226844403, -0.5096194622607613, + // -0.8578075287458693 + VLOAD_64(v8, 0xbfb3b16484d96110, 0xbfeb07fadbff7462, 0xbfc87ec5fcb06230, + 0x3fe885d78705c2d4, 0x3fca59411dac8758, 0xbf99c23b11679a80, + 0x3fe0e17f24429b70, 0xbfe8542d9e4907ce, 0x3fe4cb329a1542de, + 0xbfe6cf7c0e9d2c04, 0xbfead5c4a4b40f1a, 0x3fec458d67ab4a36, + 0x3fe2ff5484485472, 0x3fc39ca440cc0820, 0xbfe04ecd797a151a, + 0xbfeb7328c6473c1e); + asm volatile("vfnmacc.vv v8, v16, v24, v0.t"); + // 0.0000000000000000, 0.7614500740339213, 0.0000000000000000, + // -0.8829326116147059, 0.0000000000000000, + // -0.1581610880357251, 0.0000000000000000, 1.4217408543890222, + // 0.0000000000000000, 1.0257824393236514, 0.0000000000000000, + // -0.7372432681003268, 0.0000000000000000, + // -0.3970498940841519, 0.0000000000000000, 0.6054703847278113 + VCMP_U64(6, v8, 0xbfb3b16484d96110, 0x3fe85dcc8bb06629, 0xbfc87ec5fcb06230, + 0xbfec40fbe46ea001, 0x3fca59411dac8758, 0xbfc43e9f5e4e7ddd, + 0x3fe0e17f24429b70, 0x3ff6bf73568fcea3, 0x3fe4cb329a1542de, + 0x3ff0699ad8db4c8b, 0xbfead5c4a4b40f1a, 0xbfe7977f31b5fc6d, + 0x3fe2ff5484485472, 0xbfd96943f57e3046, 0xbfe04ecd797a151a, + 0x3fe360036da34794); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // 0.1300 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x3029); + // -0.2844, 0.4070, -0.1837, -0.2321, -0.5283, -0.6104, -0.7183, + // -0.1191, 0.7998, 0.1169, 0.1169, -0.9214, -0.4360, -0.6250, + // -0.5386, 0.6543 + VLOAD_16(v4, 0xb48d, 0x3683, 0xb1e1, 0xb36d, 0xb83a, 0xb8e2, 0xb9bf, 0xaf9f, + 0x3a66, 0x2f7c, 0x2f7c, 0xbb5f, 0xb6fa, 0xb900, 0xb84f, 0x393c); + // 0.9268, -0.3337, -0.3225, -0.8306, -0.1857, -0.6831, 0.0557, + // 0.5586, 0.2352, 0.6294, 0.6294, -0.8877, -0.2426, 0.5488, + // 0.4001, 0.1772 + VLOAD_16(v2, 0x3b6a, 0xb557, 0xb529, 0xbaa5, 0xb1f1, 0xb977, 0x2b21, 0x3878, + 0x3387, 0x3909, 0x3909, 0xbb1a, 0xb3c3, 0x3864, 0x3667, 0x31ac); + asm volatile("vfnmacc.vf v2, %[A], v4" ::[A] "f"(dscalar_16)); + // -0.8896, 0.2808, 0.3464, 0.8608, 0.2544, 0.7627, 0.0377, + // -0.5430, -0.3394, -0.6445, -0.6445, 1.0078, 0.2993, -0.4675, + // -0.3301, -0.2622 + VCMP_U16(7, v2, 0xbb1e, 0x347e, 0x358b, 0x3ae3, 0x3412, 0x3a1a, 0x28d3, + 0xb858, 0xb56d, 0xb928, 0xb928, 0x3c08, 0x34ca, 0xb77b, 0xb548, + 0xb432); + + VSET(16, e32, m4); + double dscalar_32; + // -0.26917368 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbe89d122); + // -0.27745819, -0.86308837, -0.16746511, -0.68674469, + // -0.49064314, -0.74352056, -0.17169137, 0.26071417, + // 0.71857828, 0.07920383, -0.43244356, -0.58339220, 0.80679923, + // 0.23900302, 0.73513943, -0.80685192 + VLOAD_32(v8, 0xbe8e0f00, 0xbf5cf35c, 0xbe2b7bf9, 0xbf2fce80, 0xbefb3594, + 0xbf3e575d, 0xbe2fcfdd, 0x3e857c54, 0x3f37f4bf, 0x3da2359e, + 0xbedd693e, 0xbf155931, 0x3f4e8a65, 0x3e74bd35, 0x3f3c3219, + 0xbf4e8dd9); + // 0.13509545, -0.29169917, 0.80494332, -0.63637137, + // 0.63772237, -0.87242430, -0.44194883, -0.41286576, + // -0.57735479, 0.61664599, 0.94073379, -0.89744234, + // -0.70681161, 0.23247144, 0.06774496, -0.38581881 + VLOAD_32(v4, 0x3e0a5676, 0xbe955998, 0x3f4e10c4, 0xbf22e93c, 0x3f2341c6, + 0xbf5f5733, 0xbee2471e, 0xbed36324, 0xbf13cd86, 0x3f1ddc83, + 0x3f70d3ee, 0xbf65bec8, 0xbf34f19b, 0x3e6e0cfe, 0x3d8abdde, + 0xbec58a0b); + asm volatile("vfnmacc.vf v4, %[A], v8" ::[A] "f"(dscalar_32)); + // -0.20977989, 0.05937849, -0.85002053, 0.45151776, + // -0.76979059, 0.67228812, 0.39573404, 0.48304313, + // 0.77077717, -0.59532642, -1.05713618, 0.74040854, 0.92398071, + // -0.16813812, 0.13013524, 0.16863550 + VCMP_U32(8, v4, 0xbe56d08a, 0x3d7336de, 0xbf599af2, 0x3ee72d57, 0xbf4510ff, + 0x3f2c1b13, 0x3eca9da7, 0x3ef7516f, 0x3f4551a7, 0xbf186750, + 0xbf87503d, 0x3f3d8b6a, 0x3f6c8a00, 0xbe2c2c66, 0x3e05422c, + 0x3e2caec9); + + VSET(16, e64, m8); + double dscalar_64; + // 0.1021836258281641 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fba28b4c31e60e0); + // 0.3274079371230154, 0.9254873656544997, 0.9683609308176633, + // -0.6778040243955326, 0.9669615854915627, + // 0.8026267269428324, -0.7388821308618641, 0.7432413708598076, + // 0.9355143976513562, -0.4219868517017851, 0.8950700270161456, + // 0.6727820676214205, -0.8833440526985297, 0.0357808590148252, + // -0.3802125831332157, 0.9831607630398518 + VLOAD_64(v16, 0x3fd4f4406b993a2c, 0x3fed9d97ae0b1cd6, 0x3feefcd01012c05e, + 0xbfe5b09210bc082e, 0x3feef1596c459614, 0x3fe9af1e3ee3adfa, + 0xbfe7a4ec2374d0e2, 0x3fe7c8a2209c110c, 0x3fedefbbe3db30dc, + 0xbfdb01d523d9acc0, 0x3feca469e5b540fa, 0x3fe5876e42389dca, + 0xbfec445abf2e99e4, 0x3fa251de66953a60, 0xbfd8556728856e10, + 0x3fef760d8f7eee22); + // 0.1671854121593166, 0.6264287337062140, 0.1587305627009998, + // -0.3348358495277817, 0.4721131630506652, + // 0.2878076790245236, 0.5083797506594245, 0.9444607965181537, + // -0.2805814092841707, -0.7218856627753110, + // -0.3443302881655670, 0.3680926220616383, + // -0.2344410843781140, 0.3553553454507421, + // 0.0951222110617760, -0.8329780449088213 + VLOAD_64(v8, 0x3fc56654e2cbd888, 0x3fe40bb445915f4a, 0x3fc4514877d696a0, + 0xbfd56df357d00344, 0x3fde371a20d41408, 0x3fd26b70e63cabf0, + 0x3fe044a59c60fcd4, 0x3fee3905d92cc95e, 0xbfd1f50bba2f6e40, + 0xbfe719aff62247a4, 0xbfd60981e7ac601c, 0x3fd78ed45b69d4fc, + 0xbfce022a5b1f1348, 0x3fd6be2458cadcb0, 0x3fb859ede1a22f80, + 0xbfeaa7c192a56bc8); + asm volatile("vfnmacc.vf v8, %[A], v16" ::[A] "f"(dscalar_64)); + // -0.2006411422994659, -0.7209983883869466, -0.2576811937222847, + // 0.4040963223414386, -0.5709208038927434, -0.3698229881701340, + // -0.4328780954683192, -1.0204078946581041, 0.1849871561177042, + // 0.7650058093340112, 0.2528687874349445, -0.4368399331233641, + // 0.3247043825365946, -0.3590115633601233, -0.0562707107317317, + // 0.7325151133694248 + VCMP_U64(9, v8, 0xbfc9ae9be43442c9, 0xbfe7126b3652e68a, 0xbfd07dd942f53687, + 0x3fd9dcb6d238fb8a, 0xbfe244fbb4aa695e, 0xbfd7ab2e09dffb6d, + 0xbfdbb44653cc3c92, 0xbff053973a823036, 0x3fc7ada8bcda50a5, + 0x3fe87aed768addeb, 0x3fd02f00910d95b4, 0xbfdbf52f7a9681dc, + 0x3fd4c7f4e3f733f9, 0xbfd6fa0ba2e11fba, 0xbfaccf83bca18481, + 0x3fe770c388f7eacc); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // 0.1300 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x3029); + // -0.2844, 0.4070, -0.1837, -0.2321, -0.5283, -0.6104, + // -0.7183, -0.1191, 0.7998, 0.1169, 0.2551, -0.9214, + // -0.4360, -0.6250, -0.5386, 0.6543 + VLOAD_16(v4, 0xb48d, 0x3683, 0xb1e1, 0xb36d, 0xb83a, 0xb8e2, 0xb9bf, 0xaf9f, + 0x3a66, 0x2f7c, 0x3415, 0xbb5f, 0xb6fa, 0xb900, 0xb84f, 0x393c); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.9268, -0.3337, -0.3225, -0.8306, -0.1857, -0.6831, 0.0557, + // 0.5586, 0.2352, 0.6294, -0.0325, -0.8877, -0.2426, 0.5488, + // 0.4001, 0.1772 + VLOAD_16(v2, 0x3b6a, 0xb557, 0xb529, 0xbaa5, 0xb1f1, 0xb977, 0x2b21, 0x3878, + 0x3387, 0x3909, 0xa828, 0xbb1a, 0xb3c3, 0x3864, 0x3667, 0x31ac); + asm volatile("vfnmacc.vf v2, %[A], v4, v0.t" ::[A] "f"(dscalar_16)); + // 0.0000, 0.2808, 0.0000, 0.8608, 0.0000, 0.7627, 0.0000, + // -0.5430, 0.0000, -0.6445, 0.0000, 1.0078, 0.0000, + // -0.4675, 0.0000, -0.2622 + VCMP_U16(10, v2, 0x3b6a, 0x347e, 0xb529, 0x3ae3, 0xb1f1, 0x3a1a, 0x2b21, + 0xb858, 0x3387, 0xb928, 0xa828, 0x3c08, 0xb3c3, 0xb77b, 0x3667, + 0xb432); + + VSET(16, e32, m4); + double dscalar_32; + // -0.26917368 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbe89d122); + // -0.27745819, -0.86308837, -0.16746511, -0.68674469, + // -0.49064314, -0.74352056, -0.17169137, 0.26071417, + // 0.71857828, 0.07920383, -0.43244356, -0.58339220, + // 0.80679923, 0.23900302, 0.73513943, -0.80685192 + VLOAD_32(v8, 0xbe8e0f00, 0xbf5cf35c, 0xbe2b7bf9, 0xbf2fce80, 0xbefb3594, + 0xbf3e575d, 0xbe2fcfdd, 0x3e857c54, 0x3f37f4bf, 0x3da2359e, + 0xbedd693e, 0xbf155931, 0x3f4e8a65, 0x3e74bd35, 0x3f3c3219, + 0xbf4e8dd9); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.13509545, -0.29169917, 0.80494332, -0.63637137, + // 0.63772237, -0.87242430, -0.44194883, -0.41286576, + // -0.57735479, 0.61664599, 0.94073379, -0.89744234, + // -0.70681161, 0.23247144, 0.06774496, -0.38581881 + VLOAD_32(v4, 0x3e0a5676, 0xbe955998, 0x3f4e10c4, 0xbf22e93c, 0x3f2341c6, + 0xbf5f5733, 0xbee2471e, 0xbed36324, 0xbf13cd86, 0x3f1ddc83, + 0x3f70d3ee, 0xbf65bec8, 0xbf34f19b, 0x3e6e0cfe, 0x3d8abdde, + 0xbec58a0b); + asm volatile("vfnmacc.vf v4, %[A], v8, v0.t" ::[A] "f"(dscalar_32)); + // 0.00000000, 0.05937849, 0.00000000, 0.45151776, + // 0.00000000, 0.67228812, 0.00000000, 0.48304313, + // 0.00000000, -0.59532642, 0.00000000, 0.74040854, + // 0.00000000, -0.16813812, 0.00000000, 0.16863550 + VCMP_U32(11, v4, 0x3e0a5676, 0x3d7336de, 0x3f4e10c4, 0x3ee72d57, 0x3f2341c6, + 0x3f2c1b13, 0xbee2471e, 0x3ef7516f, 0xbf13cd86, 0xbf186750, + 0x3f70d3ee, 0x3f3d8b6a, 0xbf34f19b, 0xbe2c2c66, 0x3d8abdde, + 0x3e2caec9); + + VSET(16, e64, m8); + double dscalar_64; + // 0.1021836258281641 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fba28b4c31e60e0); + // 0.3274079371230154, 0.9254873656544997, + // 0.9683609308176633, -0.6778040243955326, + // 0.9669615854915627, 0.8026267269428324, + // -0.7388821308618641, 0.7432413708598076, + // 0.9355143976513562, -0.4219868517017851, + // 0.8950700270161456, 0.6727820676214205, + // -0.8833440526985297, 0.0357808590148252, + // -0.3802125831332157, 0.9831607630398518 + VLOAD_64(v16, 0x3fd4f4406b993a2c, 0x3fed9d97ae0b1cd6, 0x3feefcd01012c05e, + 0xbfe5b09210bc082e, 0x3feef1596c459614, 0x3fe9af1e3ee3adfa, + 0xbfe7a4ec2374d0e2, 0x3fe7c8a2209c110c, 0x3fedefbbe3db30dc, + 0xbfdb01d523d9acc0, 0x3feca469e5b540fa, 0x3fe5876e42389dca, + 0xbfec445abf2e99e4, 0x3fa251de66953a60, 0xbfd8556728856e10, + 0x3fef760d8f7eee22); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.1671854121593166, 0.6264287337062140, + // 0.1587305627009998, -0.3348358495277817, + // 0.4721131630506652, 0.2878076790245236, + // 0.5083797506594245, 0.9444607965181537, + // -0.2805814092841707, -0.7218856627753110, + // -0.3443302881655670, 0.3680926220616383, + // -0.2344410843781140, 0.3553553454507421, + // 0.0951222110617760, -0.8329780449088213 + VLOAD_64(v8, 0x3fc56654e2cbd888, 0x3fe40bb445915f4a, 0x3fc4514877d696a0, + 0xbfd56df357d00344, 0x3fde371a20d41408, 0x3fd26b70e63cabf0, + 0x3fe044a59c60fcd4, 0x3fee3905d92cc95e, 0xbfd1f50bba2f6e40, + 0xbfe719aff62247a4, 0xbfd60981e7ac601c, 0x3fd78ed45b69d4fc, + 0xbfce022a5b1f1348, 0x3fd6be2458cadcb0, 0x3fb859ede1a22f80, + 0xbfeaa7c192a56bc8); + asm volatile("vfnmacc.vf v8, %[A], v16, v0.t" ::[A] "f"(dscalar_64)); + // -0.2006411422994659, -0.7209983883869466, + // -0.2576811937222847, 0.4040963223414386, + // -0.5709208038927434, -0.3698229881701340, + // -0.4328780954683192, -1.0204078946581041, + // 0.1849871561177042, 0.7650058093340112, 0.2528687874349445, + // -0.4368399331233641, 0.3247043825365946, + // -0.3590115633601233, -0.0562707107317317, 0.7325151133694248 + VCMP_U64(12, v8, 0x3fc56654e2cbd888, 0xbfe7126b3652e68a, 0x3fc4514877d696a0, + 0x3fd9dcb6d238fb8a, 0x3fde371a20d41408, 0xbfd7ab2e09dffb6d, + 0x3fe044a59c60fcd4, 0xbff053973a823036, 0xbfd1f50bba2f6e40, + 0x3fe87aed768addeb, 0xbfd60981e7ac601c, 0xbfdbf52f7a9681dc, + 0xbfce022a5b1f1348, 0xbfd6fa0ba2e11fba, 0x3fb859ede1a22f80, + 0x3fe770c388f7eacc); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfnmadd.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfnmadd.c new file mode 100644 index 000000000..407bafc88 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfnmadd.c @@ -0,0 +1,458 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values +void TEST_CASE1(void) { + VSET(16, e16, m2); + // -0.7461, 0.0514, -0.3071, -0.4934, -0.5220, -0.7983, 0.4907, + // -0.9028, 0.1752, 0.0676, 0.1040, 0.4526, 0.3525, -0.2686, + // 0.3540, -0.0847 + VLOAD_16(v4, 0xb9f8, 0x2a94, 0xb4ea, 0xb7e5, 0xb82d, 0xba63, 0x37da, 0xbb39, + 0x319b, 0x2c54, 0x2ea8, 0x373e, 0x35a4, 0xb44c, 0x35aa, 0xad6c); + // 0.1573, -0.7700, 0.0804, -0.9438, 0.0790, 0.7998, -0.2854, + // 0.1963, -0.0687, -0.2123, 0.3625, -0.0002, 0.7168, -0.4033, + // 0.2812, -0.3159 + VLOAD_16(v6, 0x3109, 0xba29, 0x2d25, 0xbb8d, 0x2d0e, 0x3a66, 0xb491, 0x3248, + 0xac65, 0xb2cb, 0x35cd, 0x897c, 0x39bc, 0xb674, 0x3480, 0xb50e); + // 0.0337, 0.2034, -0.1886, 0.8242, 0.3225, 0.0331, 0.0698, + // 0.6777, -0.2539, -0.5825, -0.4319, -0.6323, 0.0674, -0.2903, + // -0.8145, 0.1893 + VLOAD_16(v2, 0x284f, 0x3282, 0xb209, 0x3a98, 0x3529, 0x283b, 0x2c77, 0x396c, + 0xb410, 0xb8a9, 0xb6e9, 0xb90f, 0x2c50, 0xb4a5, 0xba84, 0x320f); + asm volatile("vfnmadd.vv v2, v4, v6"); + // -0.1322, 0.7598, -0.1383, 1.3506, 0.0894, -0.7734, 0.2512, + // 0.4155, 0.1132, 0.2517, -0.3176, 0.2864, -0.7407, 0.3254, + // 0.0071, 0.3320 + VCMP_U16(1, v2, 0xb03b, 0x3a14, 0xb06d, 0x3d67, 0x2db8, 0xba30, 0x3405, + 0x36a6, 0x2f3e, 0x3407, 0xb515, 0x3495, 0xb9ed, 0x3535, 0x1f3d, + 0x3550); + + VSET(16, e32, m4); + // -0.36820358, 0.10496315, -0.32905263, -0.92334682, + // 0.43153936, 0.92736709, -0.59600371, 0.75117606, 0.84123290, + // 0.33028743, -0.43412161, 0.95273590, 0.06816643, + // -0.88978988, 0.18573478, 0.61926919 + VLOAD_32(v8, 0xbebc852e, 0x3dd6f6ec, 0xbea87996, 0xbf6c6075, 0x3edcf2ba, + 0x3f6d67ee, 0xbf1893b3, 0x3f404d13, 0x3f575b0a, 0x3ea91b6f, + 0xbede4530, 0x3f73e680, 0x3d8b9ad8, 0xbf63c945, 0x3e3e3142, + 0x3f1e886d); + // 0.69083834, -0.31329882, -0.54809541, 0.25019145, + // -0.67489260, 0.23259214, -0.14038530, 0.09741956, + // -0.23567833, 0.75417399, -0.90357685, -0.87489468, + // 0.54726779, -0.06705534, -0.15476358, -0.96940458 + VLOAD_32(v12, 0x3f30dac8, 0xbea068b4, 0xbf0c4ffb, 0x3e801918, 0xbf2cc5c3, + 0x3e6e2ca2, 0xbe0fc12a, 0x3dc783e8, 0xbe7155a9, 0x3f41118c, + 0xbf6750d0, 0xbf5ff919, 0x3f0c19be, 0xbd895450, 0xbe1e7a58, + 0xbf782ae6); + // 0.03722767, 0.80796093, 0.53925264, -0.50804031, + // 0.63562357, -0.45508829, -0.22051410, 0.42499006, + // -0.59229839, -0.50074077, -0.80474108, -0.20762257, + // 0.15367362, 0.98349953, -0.15871963, -0.07445616 + VLOAD_32(v4, 0x3d187c0a, 0x3f4ed687, 0x3f0a0c76, 0xbf020eee, 0x3f22b83a, + 0xbee90155, 0xbe61ce73, 0x3ed9984c, 0xbf17a0de, 0xbf00308c, + 0xbf4e0383, 0xbe549b03, 0x3e1d5c9e, 0x3f7bc6a0, 0xbe228766, + 0xbd987c79); + asm volatile("vfnmadd.vv v4, v8, v12"); + // -0.67713100, 0.22849269, 0.72553790, -0.71928883, + // 0.40059602, 0.18944177, 0.00895807, -0.41666192, 0.73393923, + // -0.58878565, 0.55422139, 1.07270420, -0.55774319, + // 0.94216329, 0.18424334, 1.01551294 + VCMP_U32(2, v4, 0xbf2d5875, 0x3e69f9fd, 0x3f39bcda, 0xbf382350, 0x3ecd1aec, + 0x3e41fd06, 0x3c12c4e5, 0xbed554b6, 0x3f3be371, 0xbf16baa7, + 0x3f0de173, 0x3f894e5f, 0xbf0ec842, 0x3f71319d, 0x3e3caa49, + 0x3f81fc54); + + VSET(16, e64, m8); + // -0.1517393950396491, -0.0976116299317518, 0.4195080955516000, + // -0.8346165642452430, 0.0078216057137750, -0.5126918345148062, + // -0.9302856586058497, -0.8971839537614414, 0.1317157676127678, + // -0.3423297874984121, 0.7678405723111816, -0.6465198020108864, + // 0.4795090517472360, -0.9006147069685106, 0.9841759200408695, + // 0.8437352562659637 + VLOAD_64(v16, 0xbfc36c324d9ae520, 0xbfb8fd1366442100, 0x3fdad9387bb34990, + 0xbfeab52dcc044330, 0x3f8004c625f16600, 0xbfe067f8b4c55ad2, + 0xbfedc4e66df4cc5a, 0xbfecb5bb1f7cd800, 0x3fc0dc0ff121d700, + 0xbfd5e8bb327025d8, 0x3fe892266453ca54, 0xbfe4b04a4bbb4d06, + 0x3fdeb046bbd8fb80, 0xbfecd1d5ef173e7a, 0x3fef7e5e7fc2c286, + 0x3feaffe114849fb0); + // 0.3915682245289982, 0.0468282563045201, 0.4640582663413180, + // 0.9199907734666593, -0.6702920875531786, 0.6250479001245852, + // -0.3716310293668668, 0.2191474803863191, + // -0.3398132406457823, -0.1436002174993440, + // -0.7049093483038609, 0.0726450331160087, 0.3054536350672581, + // -0.9906780567812383, 0.2659677084286980, -0.6111168392293305 + VLOAD_64(v24, 0x3fd90f742ba04f2c, 0x3fa7f9df8ab696e0, 0x3fddb3217157f678, + 0x3fed70907d95274a, 0xbfe573086459defe, 0x3fe40064742efe82, + 0xbfd7c8cd8353cefc, 0x3fcc0d064ea14910, 0xbfd5bf8008d49208, + 0xbfc2617deeedd880, 0xbfe68e9e0cb3831e, 0x3fb298dd69733960, + 0x3fd38c8d6743b96c, 0xbfefb3a277d7b020, 0x3fd1059d6c5f9294, + 0xbfe38e44e6d0cbb0); + // 0.8932002267748917, 0.5237198185024288, -0.3716642114238491, + // 0.8806741908360942, 0.4285584084885536, -0.9185899240339090, + // -0.3906189235600976, -0.8681987020972610, + // -0.8703598457154336, -0.2254866845234647, 0.7002825787534324, + // 0.0892712008047818, 0.9241326299982451, 0.6615225744181676, + // 0.7351775340550828, -0.4044996673659886 + VLOAD_64(v8, 0x3fec9518a458e4ea, 0x3fe0c25010978504, 0xbfd7c958b04a2e10, + 0x3fec2e7ba402502e, 0x3fdb6d803f3895dc, 0xbfed6516b24524fa, + 0xbfd8ffe68378eb00, 0xbfebc848a4fdff0a, 0xbfebd9fce4232e3a, + 0xbfccdcbf67db1aa0, 0x3fe668b702b68b90, 0x3fb6da7a37ee6240, + 0x3fed927e97e0492a, 0x3fe52b3163d622de, 0x3fe786930930a7fe, + 0xbfd9e35292a51b70); + asm volatile("vfnmadd.vv v8, v16, v24"); + // -0.2560345624688988, 0.0042928888070631, -0.3081421208222118, + // -0.1849655060915788, 0.6669400726566582, -1.0960014534443465, + // 0.0082438467988533, -0.9980814245844917, 0.4544533558235209, + // 0.0664094087027049, 0.1672039722542752, -0.0149294340464271, + // -0.7485835961663959, 1.5864550162939111, -0.9895117344007368, + // 0.9524074697338700 + VCMP_U64(3, v8, 0xbfd062deca1cc612, 0x3f71956b9081d880, 0xbfd3b899badce50e, + 0xbfc7acf31fc694ed, 0x3fe55792b50e7883, 0xbff18938d1ee9749, + 0x3f80e22663278b8b, 0xbfeff04874aabc45, 0x3fdd15c38734723f, + 0x3fb10034fe865a4b, 0x3fc566f0944bf4a6, 0xbf8e9352b7d14aa9, + 0xbfe7f46595fb6a6d, 0x3ff9621ea7b8eb80, 0xbfefaa1483484d07, + 0x3fee7a1f3adf237b); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // -0.7461, 0.0514, -0.3071, -0.4934, -0.5220, -0.7983, 0.4907, + // -0.9028, 0.1752, 0.0676, 0.1040, 0.4526, 0.3525, -0.2686, + // 0.3540, -0.0847 + VLOAD_16(v4, 0xb9f8, 0x2a94, 0xb4ea, 0xb7e5, 0xb82d, 0xba63, 0x37da, 0xbb39, + 0x319b, 0x2c54, 0x2ea8, 0x373e, 0x35a4, 0xb44c, 0x35aa, 0xad6c); + // 0.1573, -0.7700, 0.0804, -0.9438, 0.0790, 0.7998, -0.2854, + // 0.1963, -0.0687, -0.2123, 0.3625, -0.0002, 0.7168, -0.4033, + // 0.2812, -0.3159 + VLOAD_16(v6, 0x3109, 0xba29, 0x2d25, 0xbb8d, 0x2d0e, 0x3a66, 0xb491, 0x3248, + 0xac65, 0xb2cb, 0x35cd, 0x897c, 0x39bc, 0xb674, 0x3480, 0xb50e); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.0337, 0.2034, -0.1886, 0.8242, 0.3225, 0.0331, 0.0698, + // 0.6777, -0.2539, -0.5825, -0.4319, -0.6323, 0.0674, -0.2903, + // -0.8145, 0.1893 + VLOAD_16(v2, 0x284f, 0x3282, 0xb209, 0x3a98, 0x3529, 0x283b, 0x2c77, 0x396c, + 0xb410, 0xb8a9, 0xb6e9, 0xb90f, 0x2c50, 0xb4a5, 0xba84, 0x320f); + asm volatile("vfnmadd.vv v2, v4, v6, v0.t"); + // 0.0337, 0.7598, -0.1886, 1.3506, 0.3225, -0.7734, 0.0698, + // 0.4155, -0.2539, 0.2517, -0.4319, 0.2864, 0.0674, 0.3254, + // -0.8145, 0.3320 + VCMP_U16(4, v2, 0x284f, 0x3a14, 0xb209, 0x3d67, 0x3529, 0xba30, 0x2c77, + 0x36a6, 0xb410, 0x3407, 0xb6e9, 0x3495, 0x2c50, 0x3535, 0xba84, + 0x3550); + + VSET(16, e32, m4); + // -0.36820358, 0.10496315, -0.32905263, -0.92334682, + // 0.43153936, 0.92736709, -0.59600371, 0.75117606, 0.84123290, + // 0.33028743, -0.43412161, 0.95273590, 0.06816643, + // -0.88978988, 0.18573478, 0.61926919 + VLOAD_32(v8, 0xbebc852e, 0x3dd6f6ec, 0xbea87996, 0xbf6c6075, 0x3edcf2ba, + 0x3f6d67ee, 0xbf1893b3, 0x3f404d13, 0x3f575b0a, 0x3ea91b6f, + 0xbede4530, 0x3f73e680, 0x3d8b9ad8, 0xbf63c945, 0x3e3e3142, + 0x3f1e886d); + // 0.69083834, -0.31329882, -0.54809541, 0.25019145, + // -0.67489260, 0.23259214, -0.14038530, 0.09741956, + // -0.23567833, 0.75417399, -0.90357685, -0.87489468, + // 0.54726779, -0.06705534, -0.15476358, -0.96940458 + VLOAD_32(v12, 0x3f30dac8, 0xbea068b4, 0xbf0c4ffb, 0x3e801918, 0xbf2cc5c3, + 0x3e6e2ca2, 0xbe0fc12a, 0x3dc783e8, 0xbe7155a9, 0x3f41118c, + 0xbf6750d0, 0xbf5ff919, 0x3f0c19be, 0xbd895450, 0xbe1e7a58, + 0xbf782ae6); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.03722767, 0.80796093, 0.53925264, -0.50804031, + // 0.63562357, -0.45508829, -0.22051410, 0.42499006, + // -0.59229839, -0.50074077, -0.80474108, -0.20762257, + // 0.15367362, 0.98349953, -0.15871963, -0.07445616 + VLOAD_32(v4, 0x3d187c0a, 0x3f4ed687, 0x3f0a0c76, 0xbf020eee, 0x3f22b83a, + 0xbee90155, 0xbe61ce73, 0x3ed9984c, 0xbf17a0de, 0xbf00308c, + 0xbf4e0383, 0xbe549b03, 0x3e1d5c9e, 0x3f7bc6a0, 0xbe228766, + 0xbd987c79); + asm volatile("vfnmadd.vv v4, v8, v12, v0.t"); + // 0.03722767, 0.22849269, 0.53925264, -0.71928883, + // 0.63562357, 0.18944177, -0.22051410, -0.41666192, + // -0.59229839, -0.58878565, -0.80474108, 1.07270420, + // 0.15367362, 0.94216329, -0.15871963, 1.01551294 + VCMP_U32(5, v4, 0x3d187c0a, 0x3e69f9fd, 0x3f0a0c76, 0xbf382350, 0x3f22b83a, + 0x3e41fd06, 0xbe61ce73, 0xbed554b6, 0xbf17a0de, 0xbf16baa7, + 0xbf4e0383, 0x3f894e5f, 0x3e1d5c9e, 0x3f71319d, 0xbe228766, + 0x3f81fc54); + + VSET(16, e64, m8); + // -0.1517393950396491, -0.0976116299317518, 0.4195080955516000, + // -0.8346165642452430, 0.0078216057137750, -0.5126918345148062, + // -0.9302856586058497, -0.8971839537614414, 0.1317157676127678, + // -0.3423297874984121, 0.7678405723111816, -0.6465198020108864, + // 0.4795090517472360, -0.9006147069685106, 0.9841759200408695, + // 0.8437352562659637 + VLOAD_64(v16, 0xbfc36c324d9ae520, 0xbfb8fd1366442100, 0x3fdad9387bb34990, + 0xbfeab52dcc044330, 0x3f8004c625f16600, 0xbfe067f8b4c55ad2, + 0xbfedc4e66df4cc5a, 0xbfecb5bb1f7cd800, 0x3fc0dc0ff121d700, + 0xbfd5e8bb327025d8, 0x3fe892266453ca54, 0xbfe4b04a4bbb4d06, + 0x3fdeb046bbd8fb80, 0xbfecd1d5ef173e7a, 0x3fef7e5e7fc2c286, + 0x3feaffe114849fb0); + // 0.3915682245289982, 0.0468282563045201, 0.4640582663413180, + // 0.9199907734666593, -0.6702920875531786, 0.6250479001245852, + // -0.3716310293668668, 0.2191474803863191, + // -0.3398132406457823, -0.1436002174993440, + // -0.7049093483038609, 0.0726450331160087, 0.3054536350672581, + // -0.9906780567812383, 0.2659677084286980, -0.6111168392293305 + VLOAD_64(v24, 0x3fd90f742ba04f2c, 0x3fa7f9df8ab696e0, 0x3fddb3217157f678, + 0x3fed70907d95274a, 0xbfe573086459defe, 0x3fe40064742efe82, + 0xbfd7c8cd8353cefc, 0x3fcc0d064ea14910, 0xbfd5bf8008d49208, + 0xbfc2617deeedd880, 0xbfe68e9e0cb3831e, 0x3fb298dd69733960, + 0x3fd38c8d6743b96c, 0xbfefb3a277d7b020, 0x3fd1059d6c5f9294, + 0xbfe38e44e6d0cbb0); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.8932002267748917, 0.5237198185024288, -0.3716642114238491, + // 0.8806741908360942, 0.4285584084885536, -0.9185899240339090, + // -0.3906189235600976, -0.8681987020972610, + // -0.8703598457154336, -0.2254866845234647, 0.7002825787534324, + // 0.0892712008047818, 0.9241326299982451, 0.6615225744181676, + // 0.7351775340550828, -0.4044996673659886 + VLOAD_64(v8, 0x3fec9518a458e4ea, 0x3fe0c25010978504, 0xbfd7c958b04a2e10, + 0x3fec2e7ba402502e, 0x3fdb6d803f3895dc, 0xbfed6516b24524fa, + 0xbfd8ffe68378eb00, 0xbfebc848a4fdff0a, 0xbfebd9fce4232e3a, + 0xbfccdcbf67db1aa0, 0x3fe668b702b68b90, 0x3fb6da7a37ee6240, + 0x3fed927e97e0492a, 0x3fe52b3163d622de, 0x3fe786930930a7fe, + 0xbfd9e35292a51b70); + asm volatile("vfnmadd.vv v8, v16, v24, v0.t"); + // 0.8932002267748917, 0.0042928888070631, -0.3716642114238491, + // -0.1849655060915788, 0.4285584084885536, + // -1.0960014534443465, -0.3906189235600976, + // -0.9980814245844917, -0.8703598457154336, 0.0664094087027049, + // 0.7002825787534324, -0.0149294340464271, + // 0.9241326299982451, 1.5864550162939111, 0.7351775340550828, + // 0.9524074697338700 + VCMP_U64(6, v8, 0x3fec9518a458e4ea, 0x3f71956b9081d880, 0xbfd7c958b04a2e10, + 0xbfc7acf31fc694ed, 0x3fdb6d803f3895dc, 0xbff18938d1ee9749, + 0xbfd8ffe68378eb00, 0xbfeff04874aabc45, 0xbfebd9fce4232e3a, + 0x3fb10034fe865a4b, 0x3fe668b702b68b90, 0xbf8e9352b7d14aa9, + 0x3fed927e97e0492a, 0x3ff9621ea7b8eb80, 0x3fe786930930a7fe, + 0x3fee7a1f3adf237b); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // 0.2646 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x343c); + // 0.4216, -0.2148, 0.0047, 0.6802, -0.8965, -0.2986, -0.1786, + // -0.1904, 0.2805, 0.5322, -0.5298, 0.3208, 0.0567, + // -0.9897, -0.5400, -0.4187 + VLOAD_16(v4, 0x36bf, 0xb2e0, 0x1cc3, 0x3971, 0xbb2c, 0xb4c7, 0xb1b7, 0xb218, + 0x347d, 0x3842, 0xb83d, 0x3522, 0x2b41, 0xbbeb, 0xb852, 0xb6b3); + // -0.7886, -0.5435, -0.8345, 0.7793, 0.5796, -0.8374, -0.8623, + // -0.3313, -0.2690, -0.9214, 0.2126, -0.6772, -0.6514, -0.5703, + // -0.2585, -0.3320 + VLOAD_16(v2, 0xba4f, 0xb859, 0xbaad, 0x3a3c, 0x38a3, 0xbab3, 0xbae6, 0xb54d, + 0xb44e, 0xbb5f, 0x32ce, 0xb96b, 0xb936, 0xb890, 0xb423, 0xb550); + asm volatile("vfnmadd.vf v2, %[A], v4" ::[A] "f"(dscalar_16)); + // -0.2129, 0.3586, 0.2162, -0.8867, 0.7432, 0.5200, 0.4067, + // 0.2781, -0.2092, -0.2883, 0.4736, -0.1416, 0.1157, 1.1406, + // 0.6084, 0.5068 + VCMP_U16(7, v2, 0xb2d0, 0x35bd, 0x32eb, 0xbb17, 0x39f2, 0x3829, 0x3682, + 0x3473, 0xb2b3, 0xb49d, 0x3793, 0xb088, 0x2f68, 0x3c90, 0x38de, + 0x380d); + + VSET(16, e32, m4); + double dscalar_32; + // -0.13809182 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbe0d67f1); + // -0.16977388, -0.30800357, -0.37010264, -0.92290556, + // 0.55768263, 0.47349435, 0.77556002, 0.16363664, 0.80314618, + // -0.48171839, -0.60694915, 0.16937894, 0.86316317, + // 0.00897404, -0.96310323, -0.27890080 + VLOAD_32(v8, 0xbe2dd934, 0xbe9db2a5, 0xbebd7e18, 0xbf6c438a, 0x3f0ec44a, + 0x3ef26dda, 0x3f468b1a, 0x3e27905d, 0x3f4d9afd, 0xbef6a3cb, + 0xbf1b6105, 0x3e2d71ac, 0x3f5cf843, 0x3c1307df, 0xbf768def, + 0xbe8ecc16); + // -0.33133313, -0.48972869, 0.95656961, -0.89211702, + // 0.72045243, -0.36672497, 0.69402671, 0.44954479, + // -0.77024877, -0.83221292, 0.37576449, -0.77536738, + // -0.55040795, -0.71568310, -0.75874990, 0.91956782 + VLOAD_32(v4, 0xbea9a47f, 0xbefabdb8, 0x3f74e1bf, 0xbf6461c8, 0x3f386f92, + 0xbebbc360, 0x3f31abbc, 0x3ee62abc, 0xbf452f06, 0xbf550be8, + 0x3ec06434, 0xbf467e7a, 0xbf0ce789, 0xbf373702, 0xbf423d6f, + 0x3f6b68cc); + asm volatile("vfnmadd.vf v4, %[A], v8" ::[A] "f"(dscalar_32)); + // 0.12401948, 0.24037606, 0.50219709, 0.79971153, + // -0.45819405, -0.52413607, -0.67972064, -0.10155818, + // -0.90951121, 0.36679661, 0.65883917, -0.27645081, + // -0.93917000, -0.10780402, 0.85832608, 0.40588558 + VCMP_U32(8, v4, 0x3dfdfded, 0x3e762524, 0x3f008ffd, 0x3f4cb9e5, 0xbeea9869, + 0xbf062dc8, 0xbf2e022c, 0xbdcffdbd, 0xbf68d5ba, 0x3ebbccc3, + 0x3f28a9af, 0xbe8d8af6, 0xbf706d72, 0xbddcc85b, 0x3f5bbb42, + 0x3ecfd03d); + + VSET(16, e64, m8); + double dscalar_64; + // 0.8978909040536565 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fecbb85b489299a); + // 0.4119623576675431, -0.1190899643735133, 0.9903323592718865, + // 0.5311038519754858, 0.1686986553141236, + // -0.8788301781199843, 0.1880579223718752, + // 0.7610824660598337, -0.0872931389118274, + // -0.6855627317033812, -0.0181686933036735, + // -0.9796673648941667, 0.2148282430178909, + // -0.1529278220414154, -0.7708574130314993, + // -0.4104905538508556 + VLOAD_64(v16, 0x3fda5d975d575ea8, 0xbfbe7cae0e441b80, 0x3fefb0cd7ce7c8e8, + 0x3fe0fecd81607572, 0x3fc597eae3ba06f8, 0xbfec1f6077386c08, + 0x3fc81248312ba2f8, 0x3fe85ac99da9270a, 0xbfb658d7d8ca9eb0, + 0xbfe5f0214100b7de, 0xbf929ad05e338a40, 0xbfef596f5fa5b9ea, + 0x3fcb7f7deb026e00, 0xbfc393238d287f60, 0xbfe8aadd2a5b2eba, + 0xbfda457a2c06ce78); + // 0.5050016609492949, 0.8257750946258060, + // -0.2631016891694440, 0.8041841986447893, + // -0.0322547653971421, -0.3994438840519345, + // -0.6154540433263920, -0.9209485498858390, + // 0.3334000822950238, -0.6004917796663505, 0.4588428764280068, + // 0.8937156106780619, 0.8421999503441004, 0.3083609158934253, + // -0.2219824502919918, 0.5118870280625194 + VLOAD_64(v8, 0x3fe028f93e467e2c, 0x3fea6cbfe4289cd0, 0xbfd0d6a877a053e0, + 0x3fe9bbe080247574, 0xbfa083b2550ab080, 0xbfd9907d14a5c710, + 0xbfe3b1ccad88e3e8, 0xbfed786917e1dd9e, 0x3fd5566d4c7c36a8, + 0xbfe3373a8965e1cc, 0x3fdd5dae8310b1e8, 0x3fec99517af92ea6, + 0x3feaf34d4f6d76aa, 0x3fd3bc2f6c481e9c, 0xbfcc69ebc2252060, + 0x3fe06160e798ce12); + asm volatile("vfnmadd.vf v8, %[A], v16" ::[A] "f"(dscalar_64)); + // -0.8653987555659035, -0.6223659818850454, -0.7540957457254903, + // -1.2531735291223209, -0.1397373948516451, 1.2374872082900796, + // 0.3645526649939371, 0.0658288599840662, + // -0.2120637623916150, 1.2247388386247897, -0.3938221518308497, + // 0.1772082472555760, -0.9710319178262998, -0.1239466395049457, + // 0.9701734360082217, -0.0491281525495393 + VCMP_U64(9, v8, 0xbfebb158bb24f2ec, 0xbfe3ea6c104adab7, 0xbfe8218d66be32e5, + 0xbff40cffafbcb13e, 0xbfc1e2ea3a754147, 0x3ff3ccbf630d300f, + 0x3fd754d4b3746402, 0x3fb0da2900c3b814, 0xbfcb24e7c611c0f4, + 0x3ff39887c0a08d49, 0xbfd93461d3a37236, 0x3fc6aec28545a7b7, + 0xbfef12b1874df4ac, 0xbfbfbaf78b1f72ad, 0x3fef0ba929634dea, + 0xbfa927534106be44); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // 0.2646 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x343c); + // 0.4216, -0.2148, 0.0047, 0.6802, -0.8965, -0.2986, + // -0.1786, -0.1904, 0.2805, 0.5322, -0.5298, 0.3208, + // 0.0567, -0.9897, -0.5400, -0.4187 + VLOAD_16(v16, 0x36bf, 0xb2e0, 0x1cc3, 0x3971, 0xbb2c, 0xb4c7, 0xb1b7, 0xb218, + 0x347d, 0x3842, 0xb83d, 0x3522, 0x2b41, 0xbbeb, 0xb852, 0xb6b3); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.7886, -0.5435, -0.8345, 0.7793, 0.5796, -0.8374, + // -0.8623, -0.3313, -0.2690, -0.9214, 0.2126, -0.6772, + // -0.6514, -0.5703, -0.2585, -0.3320 + VLOAD_16(v8, 0xba4f, 0xb859, 0xbaad, 0x3a3c, 0x38a3, 0xbab3, 0xbae6, 0xb54d, + 0xb44e, 0xbb5f, 0x32ce, 0xb96b, 0xb936, 0xb890, 0xb423, 0xb550); + asm volatile("vfnmadd.vf v8, %[A], v16, v0.t" ::[A] "f"(dscalar_16)); + // -0.7886, 0.3586, -0.8345, -0.8867, 0.5796, 0.5200, + // -0.8623, 0.2781, -0.2690, -0.2883, 0.2126, -0.1416, + // -0.6514, 1.1406, -0.2585, 0.5068 + VCMP_U16(10, v8, 0xba4f, 0x35bd, 0xbaad, 0xbb17, 0x38a3, 0x3829, 0xbae6, + 0x3473, 0xb44e, 0xb49d, 0x32ce, 0xb088, 0xb936, 0x3c90, 0xb423, + 0x380d); + + VSET(16, e32, m4); + double dscalar_32; + // -0.13809182 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbe0d67f1); + // -0.16977388, -0.30800357, -0.37010264, -0.92290556, + // 0.55768263, 0.47349435, 0.77556002, 0.16363664, + // 0.80314618, -0.48171839, -0.60694915, 0.16937894, + // 0.86316317, 0.00897404, -0.96310323, -0.27890080 + VLOAD_32(v8, 0xbe2dd934, 0xbe9db2a5, 0xbebd7e18, 0xbf6c438a, 0x3f0ec44a, + 0x3ef26dda, 0x3f468b1a, 0x3e27905d, 0x3f4d9afd, 0xbef6a3cb, + 0xbf1b6105, 0x3e2d71ac, 0x3f5cf843, 0x3c1307df, 0xbf768def, + 0xbe8ecc16); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.33133313, -0.48972869, 0.95656961, -0.89211702, + // 0.72045243, -0.36672497, 0.69402671, 0.44954479, + // -0.77024877, -0.83221292, 0.37576449, -0.77536738, + // -0.55040795, -0.71568310, -0.75874990, 0.91956782 + VLOAD_32(v4, 0xbea9a47f, 0xbefabdb8, 0x3f74e1bf, 0xbf6461c8, 0x3f386f92, + 0xbebbc360, 0x3f31abbc, 0x3ee62abc, 0xbf452f06, 0xbf550be8, + 0x3ec06434, 0xbf467e7a, 0xbf0ce789, 0xbf373702, 0xbf423d6f, + 0x3f6b68cc); + asm volatile("vfnmadd.vf v4, %[A], v8, v0.t" ::[A] "f"(dscalar_32)); + // -0.33133313, 0.24037606, 0.95656961, 0.79971153, + // 0.72045243, -0.52413607, 0.69402671, -0.10155818, + // -0.77024877, 0.36679661, 0.37576449, -0.27645081, + // -0.55040795, -0.10780402, -0.75874990, 0.40588558 + VCMP_U32(11, v4, 0xbea9a47f, 0x3e762524, 0x3f74e1bf, 0x3f4cb9e5, 0x3f386f92, + 0xbf062dc8, 0x3f31abbc, 0xbdcffdbd, 0xbf452f06, 0x3ebbccc3, + 0x3ec06434, 0xbe8d8af6, 0xbf0ce789, 0xbddcc85b, 0xbf423d6f, + 0x3ecfd03d); + + VSET(16, e64, m8); + double dscalar_64; + // 0.8978909040536565 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fecbb85b489299a); + // 0.4119623576675431, -0.1190899643735133, + // 0.9903323592718865, 0.5311038519754858, + // 0.1686986553141236, -0.8788301781199843, + // 0.1880579223718752, 0.7610824660598337, + // -0.0872931389118274, -0.6855627317033812, + // -0.0181686933036735, -0.9796673648941667, + // 0.2148282430178909, -0.1529278220414154, + // -0.7708574130314993, -0.4104905538508556 + VLOAD_64(v16, 0x3fda5d975d575ea8, 0xbfbe7cae0e441b80, 0x3fefb0cd7ce7c8e8, + 0x3fe0fecd81607572, 0x3fc597eae3ba06f8, 0xbfec1f6077386c08, + 0x3fc81248312ba2f8, 0x3fe85ac99da9270a, 0xbfb658d7d8ca9eb0, + 0xbfe5f0214100b7de, 0xbf929ad05e338a40, 0xbfef596f5fa5b9ea, + 0x3fcb7f7deb026e00, 0xbfc393238d287f60, 0xbfe8aadd2a5b2eba, + 0xbfda457a2c06ce78); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.5050016609492949, 0.8257750946258060, + // -0.2631016891694440, 0.8041841986447893, + // -0.0322547653971421, -0.3994438840519345, + // -0.6154540433263920, -0.9209485498858390, + // 0.3334000822950238, -0.6004917796663505, + // 0.4588428764280068, 0.8937156106780619, + // 0.8421999503441004, 0.3083609158934253, + // -0.2219824502919918, 0.5118870280625194 + VLOAD_64(v8, 0x3fe028f93e467e2c, 0x3fea6cbfe4289cd0, 0xbfd0d6a877a053e0, + 0x3fe9bbe080247574, 0xbfa083b2550ab080, 0xbfd9907d14a5c710, + 0xbfe3b1ccad88e3e8, 0xbfed786917e1dd9e, 0x3fd5566d4c7c36a8, + 0xbfe3373a8965e1cc, 0x3fdd5dae8310b1e8, 0x3fec99517af92ea6, + 0x3feaf34d4f6d76aa, 0x3fd3bc2f6c481e9c, 0xbfcc69ebc2252060, + 0x3fe06160e798ce12); + asm volatile("vfnmadd.vf v8, %[A], v16, v0.t" ::[A] "f"(dscalar_64)); + // 0.5050016609492949, -0.6223659818850454, + // -0.2631016891694440, -1.2531735291223209, + // -0.0322547653971421, 1.2374872082900796, + // -0.6154540433263920, 0.0658288599840662, + // 0.3334000822950238, 1.2247388386247897, 0.4588428764280068, + // 0.1772082472555760, 0.8421999503441004, + // -0.1239466395049457, -0.2219824502919918, + // -0.0491281525495393 + VCMP_U64(12, v8, 0x3fe028f93e467e2c, 0xbfe3ea6c104adab7, 0xbfd0d6a877a053e0, + 0xbff40cffafbcb13e, 0xbfa083b2550ab080, 0x3ff3ccbf630d300f, + 0xbfe3b1ccad88e3e8, 0x3fb0da2900c3b814, 0x3fd5566d4c7c36a8, + 0x3ff39887c0a08d49, 0x3fdd5dae8310b1e8, 0x3fc6aec28545a7b7, + 0x3feaf34d4f6d76aa, 0xbfbfbaf78b1f72ad, 0xbfcc69ebc2252060, + 0xbfa927534106be44); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfnmsac.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfnmsac.c new file mode 100644 index 000000000..27e606e82 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfnmsac.c @@ -0,0 +1,455 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 0.3474, -0.9888, 0.2810, 0.4199, 0.1704, -0.3772, 0.2998, + // 0.7871, -0.2527, -0.8618, 0.2646, 0.5488, -0.3184, -0.3508, + // -0.3589, -0.3914 + VLOAD_16(v4, 0x358f, 0xbbe9, 0x347f, 0x36b8, 0x3174, 0xb609, 0x34cc, 0x3a4c, + 0xb40b, 0xbae5, 0x343c, 0x3864, 0xb518, 0xb59d, 0xb5be, 0xb643); + // 0.0417, 0.3862, -0.9619, -0.5659, 0.1731, 0.4827, 0.7334, + // -0.7271, -0.9814, 0.8003, -0.4836, 0.5234, -0.8540, + // -0.2036, -0.8823, 0.2603 + VLOAD_16(v6, 0x2958, 0x362e, 0xbbb2, 0xb887, 0x318a, 0x37b9, 0x39de, 0xb9d1, + 0xbbda, 0x3a67, 0xb7bd, 0x3830, 0xbad5, 0xb284, 0xbb0f, 0x342a); + // -0.2739, 0.2146, 0.5264, -0.8853, 0.8877, -0.6748, -0.7563, + // -0.9634, 0.7451, -0.5166, 0.0698, 0.4790, -0.0681, -0.9746, + // 0.2129, 0.9072 + VLOAD_16(v2, 0xb462, 0x32de, 0x3836, 0xbb15, 0x3b1a, 0xb966, 0xba0d, 0xbbb5, + 0x39f6, 0xb822, 0x2c77, 0x37aa, 0xac5b, 0xbbcc, 0x32d0, 0x3b42); + asm volatile("vfnmsac.vv v2, v4, v6"); + // -0.2883, 0.5967, 0.7969, -0.6475, 0.8584, -0.4927, -0.9761, + // -0.3911, 0.4971, 0.1733, 0.1978, 0.1917, -0.3401, -1.0459, + // -0.1038, 1.0088 + VCMP_U16(1, v2, 0xb49d, 0x38c6, 0x3a60, 0xb92e, 0x3ade, 0xb7e2, 0xbbcf, + 0xb642, 0x37f4, 0x318a, 0x3254, 0x3223, 0xb570, 0xbc2f, 0xaea4, + 0x3c09); + + VSET(16, e32, m4); + // 0.11577118, -0.10074481, 0.13861528, 0.44782066, + // 0.42196107, -0.67597556, 0.34948668, -0.87903690, + // -0.34136006, -0.19722189, 0.76997000, -0.68663412, + // 0.45603558, 0.60629857, -0.86984915, -0.08019307 + VLOAD_32(v8, 0x3ded1971, 0xbdce534c, 0x3e0df12a, 0x3ee548c0, 0x3ed80b48, + 0xbf2d0cbc, 0x3eb2efeb, 0xbf610890, 0xbeaec6bf, 0xbe49f489, + 0x3f451cc1, 0xbf2fc741, 0x3ee97d7f, 0x3f1b3662, 0xbf5eae6f, + 0xbda43c43); + // -0.38970658, 0.40460527, 0.69067985, -0.98108912, + // 0.47494572, -0.34277225, -0.54462087, -0.90492284, 0.60100728, + // -0.02819708, -0.46859986, 0.87238866, 0.46812481, + // 0.49922746, 0.97036403, 0.04279163 + VLOAD_32(v12, 0xbec7879f, 0x3ecf286c, 0x3f30d065, 0xbf7b28a8, 0x3ef32c16, + 0xbeaf7fd8, 0xbf0b6c46, 0xbf67a906, 0x3f19db9d, 0xbce6fd92, + 0xbeefec52, 0x3f5f54dd, 0x3eefae0e, 0x3eff9abe, 0x3f7869c7, + 0x3d2f4647); + // 0.79804420, -0.70010293, -0.51047552, 0.38566175, + // 0.15318950, 0.15531392, -0.20705318, -0.82493448, + // 0.12047531, 0.57526720, 0.23939800, -0.19725421, + // 0.15403098, 0.03931713, -0.45930895, -0.15395784 + VLOAD_32(v4, 0x3f4c4ca0, 0xbf3339f2, 0xbf02ae86, 0x3ec57575, 0x3e1cddb5, + 0x3e1f0a9d, 0xbe5405c0, 0xbf532ee8, 0x3df6bbc3, 0x3f1344b6, + 0x3e7524c0, 0xbe49fd02, 0x3e1dba4c, 0x3d210b00, 0xbeeb2a8b, + 0xbe1da720); + asm volatile("vfnmsac.vv v4, v8, v12"); + // 0.84316099, -0.65934104, -0.60621428, 0.82501376, + // -0.04721911, -0.07639174, -0.01671545, -1.62039506, + // 0.32563519, 0.56970614, 0.60020584, 0.40175763, + // -0.05945060, -0.26336378, 0.38476136, -0.15052626 + VCMP_U32(2, v4, 0x3f57d966, 0xbf28ca93, 0xbf1b30dc, 0x3f53341a, 0xbd4168d3, + 0xbd9c7345, 0xbc88eed4, 0xbfcf691b, 0x3ea6b9a8, 0x3f11d843, + 0x3f19a717, 0x3ecdb32c, 0xbd738277, 0xbe86d79e, 0x3ec4ff71, + 0xbe1a238e); + + VSET(16, e64, m8); + // -0.1779684802061718, 0.1122733699429854, -0.0166033088608786, + // -0.0418350503858864, 0.0809510021720363, -0.9993917101510512, + // -0.2139048161619248, 0.7196716914796224, 0.6489783595942558, + // 0.5950689618839839, -0.7376256302221853, -0.5442228345597713, + // -0.8234113806545975, -0.6424001059348645, -0.3817524674245201, + // -0.8801262923106541 + VLOAD_64(v16, 0xbfc6c7abd11a2788, 0x3fbcbdf2941de8b0, 0xbf91007532405e80, + 0xbfa56b675a77c100, 0x3fb4b93472e84630, 0xbfeffb0452dfc0ba, + 0xbfcb613ba6efa978, 0x3fe7078ced586224, 0x3fe4c46e43c89c1c, + 0x3fe30ace10450114, 0xbfe79aa110cfdc92, 0xbfe16a46018575da, + 0xbfea5962d2e21a3e, 0xbfe48e8aaabdfd5e, 0xbfd86ea1e6b05c10, + 0xbfec29fe9d3a5e2c); + // 0.6809772463364707, -0.3512739833826983, -0.3746023351803702, + // -0.7912172181005324, 0.8292434726428350, 0.4103374079106952, + // -0.0850673796598582, -0.5834949864830523, + // -0.9215678788036654, 0.4412210589054084, 0.3537359089001260, + // -0.4889461402031243, 0.2341577339668230, 0.0593866008892341, + // -0.4825773777931026, 0.8989772522533539 + VLOAD_64(v24, 0x3fe5ca90cb4aba98, 0xbfd67b45dfa41e18, 0xbfd7f97c12a6b704, + 0xbfe951a6c578c3ac, 0x3fea89299b6d84b0, 0x3fda42f7d4d35178, + 0xbfb5c6f9cd987320, 0xbfe2abfdad8a63b6, 0xbfed7d7beb902fcc, + 0x3fdc3cf7409388b4, 0x3fd6a39bf009666c, 0xbfdf4ae4c06b61e4, + 0x3fcdf8e170bf19c0, 0x3fae67eba9479c60, 0xbfdee28c39da4ac0, + 0x3fecc46bf148d5ca); + // 0.2213384305747967, -0.6962211546566610, -0.0896076892809434, + // -0.1334142611967066, 0.1988436916560323, + // -0.3391391007320459, -0.6137202819751713, 0.6759552396290200, + // -0.7798663937316326, -0.1025181838739857, 0.5296250728149803, + // 0.8832422045338422, 0.8373555508937671, -0.8622529212135799, + // 0.4241832213372883, 0.7769982087360683 + VLOAD_64(v8, 0x3fcc54d154555708, 0xbfe6477196411436, 0xbfb6f0878eee8940, + 0xbfc113b7f0547630, 0x3fc973b5c856db48, 0xbfd5b4747c9c185c, + 0xbfe3a398b77f3552, 0x3fe5a16ce1f8870a, 0xbfe8f4aa5e0a7552, + 0xbfba3ea1b6fbece0, 0x3fe0f2b047dc3902, 0x3fec438527dd6ef6, + 0x3feacb9dde46cf34, 0xbfeb9793702fc4f0, 0x3fdb25d161c9f510, + 0x3fe8dd2b58f24dc8); + asm volatile("vfnmsac.vv v8, v16, v24"); + // 0.3425309161602823, -0.6567824407689892, -0.0958273275519495, + // -0.1665148733821233, 0.1317156015009752, 0.0709487030987733, + // -0.6319166041826899, 1.0958800635211576, + // -0.1817887834908719, -0.3650751413581792, 0.7905497455496533, + // 0.6171465501654385, 1.0301636939103409, -0.8241029625112244, + // 0.2399581166415165, 1.5682117246334322 + VCMP_U64(3, v8, 0x3fd5ec06cab1bfc9, 0xbfe5045c9bf61361, 0xbfb88823c5e61162, + 0xbfc5505bffbca57e, 0x3fc0dc0e8c68ea87, 0x3fb229b1b780ba72, + 0xbfe438a92b9872ef, 0x3ff188b9889296a1, 0xbfc744dad7efbbd3, + 0xbfd75d64202dd23c, 0x3fe94c2efadfe675, 0x3fe3bfaa1f3997d0, + 0x3ff07b8cecedf189, 0xbfea5f0d2d10b7b0, 0x3fceb6f293b149df, + 0x3ff917652d6811d0); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // 0.3474, -0.9888, 0.2810, 0.4199, 0.1704, -0.3772, 0.2998, + // 0.7871, -0.2527, -0.8618, 0.2646, 0.5488, -0.3184, -0.3508, + // -0.3589, -0.3914 + VLOAD_16(v4, 0x358f, 0xbbe9, 0x347f, 0x36b8, 0x3174, 0xb609, 0x34cc, 0x3a4c, + 0xb40b, 0xbae5, 0x343c, 0x3864, 0xb518, 0xb59d, 0xb5be, 0xb643); + // 0.0417, 0.3862, -0.9619, -0.5659, 0.1731, 0.4827, 0.7334, + // -0.7271, -0.9814, 0.8003, -0.4836, 0.5234, -0.8540, + // -0.2036, -0.8823, 0.2603 + VLOAD_16(v6, 0x2958, 0x362e, 0xbbb2, 0xb887, 0x318a, 0x37b9, 0x39de, 0xb9d1, + 0xbbda, 0x3a67, 0xb7bd, 0x3830, 0xbad5, 0xb284, 0xbb0f, 0x342a); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.2739, 0.2146, 0.5264, -0.8853, 0.8877, -0.6748, -0.7563, + // -0.9634, 0.7451, -0.5166, 0.0698, 0.4790, -0.0681, -0.9746, + // 0.2129, 0.9072 + VLOAD_16(v2, 0xb462, 0x32de, 0x3836, 0xbb15, 0x3b1a, 0xb966, 0xba0d, 0xbbb5, + 0x39f6, 0xb822, 0x2c77, 0x37aa, 0xac5b, 0xbbcc, 0x32d0, 0x3b42); + asm volatile("vfnmsac.vv v2, v4, v6, v0.t"); + // -0.2739, 0.5967, 0.5264, -0.6475, 0.8877, -0.4927, -0.7563, + // -0.3911, 0.7451, 0.1733, 0.0698, 0.1917, -0.0681, -1.0459, + // 0.2129, 1.0088 + VCMP_U16(4, v2, 0xb462, 0x38c6, 0x3836, 0xb92e, 0x3b1a, 0xb7e2, 0xba0d, + 0xb642, 0x39f6, 0x318a, 0x2c77, 0x3223, 0xac5b, 0xbc2f, 0x32d0, + 0x3c09); + + VSET(16, e32, m4); + // 0.11577118, -0.10074481, 0.13861528, 0.44782066, + // 0.42196107, -0.67597556, 0.34948668, -0.87903690, + // -0.34136006, -0.19722189, 0.76997000, -0.68663412, + // 0.45603558, 0.60629857, -0.86984915, -0.08019307 + VLOAD_32(v8, 0x3ded1971, 0xbdce534c, 0x3e0df12a, 0x3ee548c0, 0x3ed80b48, + 0xbf2d0cbc, 0x3eb2efeb, 0xbf610890, 0xbeaec6bf, 0xbe49f489, + 0x3f451cc1, 0xbf2fc741, 0x3ee97d7f, 0x3f1b3662, 0xbf5eae6f, + 0xbda43c43); + // -0.38970658, 0.40460527, 0.69067985, -0.98108912, + // 0.47494572, -0.34277225, -0.54462087, -0.90492284, 0.60100728, + // -0.02819708, -0.46859986, 0.87238866, 0.46812481, + // 0.49922746, 0.97036403, 0.04279163 + VLOAD_32(v12, 0xbec7879f, 0x3ecf286c, 0x3f30d065, 0xbf7b28a8, 0x3ef32c16, + 0xbeaf7fd8, 0xbf0b6c46, 0xbf67a906, 0x3f19db9d, 0xbce6fd92, + 0xbeefec52, 0x3f5f54dd, 0x3eefae0e, 0x3eff9abe, 0x3f7869c7, + 0x3d2f4647); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.79804420, -0.70010293, -0.51047552, 0.38566175, + // 0.15318950, 0.15531392, -0.20705318, -0.82493448, + // 0.12047531, 0.57526720, 0.23939800, -0.19725421, + // 0.15403098, 0.03931713, -0.45930895, -0.15395784 + VLOAD_32(v4, 0x3f4c4ca0, 0xbf3339f2, 0xbf02ae86, 0x3ec57575, 0x3e1cddb5, + 0x3e1f0a9d, 0xbe5405c0, 0xbf532ee8, 0x3df6bbc3, 0x3f1344b6, + 0x3e7524c0, 0xbe49fd02, 0x3e1dba4c, 0x3d210b00, 0xbeeb2a8b, + 0xbe1da720); + asm volatile("vfnmsac.vv v4, v8, v12, v0.t"); + // 0.79804420, -0.65934104, -0.51047552, 0.82501376, + // 0.15318950, -0.07639174, -0.20705318, -1.62039506, + // 0.12047531, 0.56970614, 0.23939800, 0.40175763, + // 0.15403098, -0.26336378, -0.45930895, -0.15052626 + VCMP_U32(5, v4, 0x3f4c4ca0, 0xbf28ca93, 0xbf02ae86, 0x3f53341a, 0x3e1cddb5, + 0xbd9c7345, 0xbe5405c0, 0xbfcf691b, 0x3df6bbc3, 0x3f11d843, + 0x3e7524c0, 0x3ecdb32c, 0x3e1dba4c, 0xbe86d79e, 0xbeeb2a8b, + 0xbe1a238e); + + VSET(16, e64, m8); + // -0.1779684802061718, 0.1122733699429854, -0.0166033088608786, + // -0.0418350503858864, 0.0809510021720363, -0.9993917101510512, + // -0.2139048161619248, 0.7196716914796224, 0.6489783595942558, + // 0.5950689618839839, -0.7376256302221853, -0.5442228345597713, + // -0.8234113806545975, -0.6424001059348645, -0.3817524674245201, + // -0.8801262923106541 + VLOAD_64(v16, 0xbfc6c7abd11a2788, 0x3fbcbdf2941de8b0, 0xbf91007532405e80, + 0xbfa56b675a77c100, 0x3fb4b93472e84630, 0xbfeffb0452dfc0ba, + 0xbfcb613ba6efa978, 0x3fe7078ced586224, 0x3fe4c46e43c89c1c, + 0x3fe30ace10450114, 0xbfe79aa110cfdc92, 0xbfe16a46018575da, + 0xbfea5962d2e21a3e, 0xbfe48e8aaabdfd5e, 0xbfd86ea1e6b05c10, + 0xbfec29fe9d3a5e2c); + // 0.6809772463364707, -0.3512739833826983, -0.3746023351803702, + // -0.7912172181005324, 0.8292434726428350, 0.4103374079106952, + // -0.0850673796598582, -0.5834949864830523, + // -0.9215678788036654, 0.4412210589054084, 0.3537359089001260, + // -0.4889461402031243, 0.2341577339668230, 0.0593866008892341, + // -0.4825773777931026, 0.8989772522533539 + VLOAD_64(v24, 0x3fe5ca90cb4aba98, 0xbfd67b45dfa41e18, 0xbfd7f97c12a6b704, + 0xbfe951a6c578c3ac, 0x3fea89299b6d84b0, 0x3fda42f7d4d35178, + 0xbfb5c6f9cd987320, 0xbfe2abfdad8a63b6, 0xbfed7d7beb902fcc, + 0x3fdc3cf7409388b4, 0x3fd6a39bf009666c, 0xbfdf4ae4c06b61e4, + 0x3fcdf8e170bf19c0, 0x3fae67eba9479c60, 0xbfdee28c39da4ac0, + 0x3fecc46bf148d5ca); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.2213384305747967, -0.6962211546566610, -0.0896076892809434, + // -0.1334142611967066, 0.1988436916560323, + // -0.3391391007320459, -0.6137202819751713, 0.6759552396290200, + // -0.7798663937316326, -0.1025181838739857, 0.5296250728149803, + // 0.8832422045338422, 0.8373555508937671, -0.8622529212135799, + // 0.4241832213372883, 0.7769982087360683 + VLOAD_64(v8, 0x3fcc54d154555708, 0xbfe6477196411436, 0xbfb6f0878eee8940, + 0xbfc113b7f0547630, 0x3fc973b5c856db48, 0xbfd5b4747c9c185c, + 0xbfe3a398b77f3552, 0x3fe5a16ce1f8870a, 0xbfe8f4aa5e0a7552, + 0xbfba3ea1b6fbece0, 0x3fe0f2b047dc3902, 0x3fec438527dd6ef6, + 0x3feacb9dde46cf34, 0xbfeb9793702fc4f0, 0x3fdb25d161c9f510, + 0x3fe8dd2b58f24dc8); + asm volatile("vfnmsac.vv v8, v16, v24, v0.t"); + // 0.2213384305747967, -0.6567824407689892, -0.0896076892809434, + // -0.1665148733821233, 0.1988436916560323, 0.0709487030987733, + // -0.6137202819751713, 1.0958800635211576, + // -0.7798663937316326, -0.3650751413581792, 0.5296250728149803, + // 0.6171465501654385, 0.8373555508937671, -0.8241029625112244, + // 0.4241832213372883, 1.5682117246334322 + VCMP_U64(6, v8, 0x3fcc54d154555708, 0xbfe5045c9bf61361, 0xbfb6f0878eee8940, + 0xbfc5505bffbca57e, 0x3fc973b5c856db48, 0x3fb229b1b780ba72, + 0xbfe3a398b77f3552, 0x3ff188b9889296a1, 0xbfe8f4aa5e0a7552, + 0xbfd75d64202dd23c, 0x3fe0f2b047dc3902, 0x3fe3bfaa1f3997d0, + 0x3feacb9dde46cf34, 0xbfea5f0d2d10b7b0, 0x3fdb25d161c9f510, + 0x3ff917652d6811d0); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.4771 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xb7a2); + // -0.6172, 0.8584, -0.1088, -0.6719, 0.3579, 0.5889, 0.1724, + // -0.5239, -0.5732, -0.6167, 0.8271, -0.7334, 0.3489, -0.7607, + // -0.7788, -0.5264 + VLOAD_16(v4, 0xb8f0, 0x3ade, 0xaef7, 0xb960, 0x35ba, 0x38b6, 0x3184, 0xb831, + 0xb896, 0xb8ef, 0x3a9e, 0xb9de, 0x3595, 0xba16, 0xba3b, 0xb836); + // 0.0186, 0.9351, 0.6201, 0.9463, 0.2512, 0.5786, 0.9424, + // -0.5132, -0.7646, 0.0194, -0.2507, -0.2905, 0.3452, + // -0.7803, -0.7666, -0.1387 + VLOAD_16(v2, 0x24c1, 0x3b7b, 0x38f6, 0x3b92, 0x3405, 0x38a1, 0x3b8a, 0xb81b, + 0xba1e, 0x24f6, 0xb403, 0xb4a6, 0x3586, 0xba3e, 0xba22, 0xb070); + asm volatile("vfnmsac.vf v2, %[A], v4" ::[A] "f"(dscalar_16)); + // -0.2759, 1.3447, 0.5684, 0.6260, 0.4219, 0.8594, 1.0244, + // -0.7632, -1.0381, -0.2749, 0.1438, -0.6406, 0.5117, -1.1426, + // -1.1387, -0.3899 + VCMP_U16(7, v2, 0xb46a, 0x3d61, 0x388c, 0x3902, 0x36c0, 0x3ae0, 0x3c19, + 0xba1b, 0xbc27, 0xb466, 0x309a, 0xb920, 0x3818, 0xbc93, 0xbc8d, + 0xb63d); + + VSET(16, e32, m4); + double dscalar_32; + // -0.73549986 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbf3c49b8); + // 0.74252719, 0.73023552, 0.75118375, 0.04020444, + // -0.77184784, -0.41120139, -0.57577437, -0.15976480, + // -0.05041125, 0.42673740, 0.88473374, -0.49891368, + // -0.84324479, -0.26009968, -0.01877740, -0.13754985 + VLOAD_32(v8, 0x3f3e1643, 0x3f3af0b7, 0x3f404d94, 0x3d24ad6a, 0xbf4597d2, + 0xbed288fd, 0xbf1365f3, 0xbe239962, 0xbd4e7c07, 0x3eda7d53, + 0x3f627de9, 0xbeff719d, 0xbf57dee4, 0xbe852bc9, 0xbc99d30f, + 0xbe0cd9de); + // 0.89538908, 0.68592542, 0.67501348, 0.08327232, + // 0.28473541, -0.93230879, -0.77235961, -0.92498165, + // -0.55227244, 0.97729182, 0.28253901, 0.45306230, + // -0.50359881, 0.40307203, -0.65891176, -0.59297264 + VLOAD_32(v4, 0x3f653838, 0x3f2f98cf, 0x3f2ccdaf, 0x3daa8aad, 0x3e91c8d7, + 0xbf6eabca, 0xbf45b95c, 0xbf6ccb99, 0xbf0d61ba, 0x3f7a2fcc, + 0x3e90a8f4, 0x3ee7f7c8, 0xbf00ebda, 0x3ece5f75, 0xbf28ae71, + 0xbf17cd0e); + asm volatile("vfnmsac.vf v4, %[A], v8" ::[A] "f"(dscalar_32)); + // 1.44151771, 1.22301352, 1.22750902, 0.11284268, + // -0.28295860, -1.23474741, -1.19584155, -1.04248869, + // -0.58934993, 1.29115713, 0.93326056, 0.08611137, + // -1.12380528, 0.21176875, -0.67272252, -0.69414055 + VCMP_U32(8, v4, 0x3fb883a7, 0x3f9c8bb5, 0x3f9d1f04, 0x3de71a10, 0xbe90dff2, + 0xbf9e0c34, 0xbf991156, 0xbf857045, 0xbf16dfa3, 0x3fa544a3, + 0x3f6eea2a, 0x3db05b27, 0xbf8fd8da, 0x3e58d9e8, 0xbf2c378b, + 0xbf31b332); + + VSET(16, e64, m8); + double dscalar_64; + // 0.5178244899339752 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fe09204aa2ab2a0); + // 0.8646249694399413, 0.0049558737185129, 0.9278624830778543, + // -0.8820434014846885, 0.9252937592855630, + // -0.0640564429957495, -0.9483662154664578, + // 0.6036726974274773, 0.6321354499926264, 0.8250130840364809, + // 0.1494192541908572, 0.0196333207724690, + // -0.2272946521816408, 0.9289723385226867, + // -0.5162193242581365, -0.3917544955022987 + VLOAD_64(v16, 0x3febab01fbe195e4, 0x3f744c9c38b4b600, 0x3fedb10ca98026d0, + 0xbfec39b31560f050, 0x3fed9c01a86a6166, 0xbfb06600c7c3cc10, + 0xbfee59041b01e946, 0x3fe35149679e0d42, 0x3fe43a741f8b89d2, + 0x3fea6681d6d6ae5e, 0x3fc3202b8d123b90, 0x3f941ac1da84be40, + 0xbfcd17fdbcd68200, 0x3fedba24329afe9c, 0xbfe084de63680fb2, + 0xbfd91281728f9314); + // 0.3888129269587612, -0.8225750100551035, 0.8430062678626642, + // -0.6316792665412014, -0.6696614660277347, + // 0.6130084754374303, -0.2295750183537659, 0.1572393304616742, + // -0.7542147373874082, 0.6149250820738357, + // 0.1236692515687874, 0.9290168852760794, + // -0.0433411597165929, -0.8422695068160440, + // 0.6519328829008422, -0.3347506024828231 + VLOAD_64(v8, 0x3fd8e24f9d6331d4, 0xbfea5288d3d6317c, 0x3feaf9e847d9618e, + 0xbfe436b76feb621e, 0xbfe56ddde1ff608c, 0x3fe39dc3f345962e, + 0xbfcd62b6d5e76d58, 0x3fc4206b1afd2970, 0xbfe82286f1e7af48, + 0x3fe3ad775d9fe964, 0x3fbfa8c9bf023cb0, 0x3fedba819e762954, + 0xbfa630cfff1b61a0, 0xbfeaf3df2e462cb6, 0x3fe4dca25967e02c, + 0xbfd56c8dca7eb8ac); + asm volatile("vfnmsac.vf v8, %[A], v16" ::[A] "f"(dscalar_64)); + // -0.0589110568256553, -0.8251412828355696, 0.3625363508340025, + // -0.1749355920677641, -1.1488012349688719, 0.6461784703586890, + // 0.2615122334407671, -0.1553571761707763, -1.0815499543490239, + // 0.1877131026437891, 0.0462963024810918, 0.9188502709613655, + // 0.0743575776140855, -1.3233141341743264, 0.9192438911788732, + // -0.1318905306700034 + VCMP_U64(9, v8, 0xbfae29970ce0c2e6, 0xbfea678eb10b76d9, 0x3fd733cbaa9c5dc5, + 0xbfc6644a1b6b315b, 0xbff2617d675cbb41, 0x3fe4ad7e78b23c6a, + 0x3fd0bc9dce8872e3, 0xbfc3e2be736d1b43, 0xbff14e07532eb5a9, + 0x3fc806fba27160c3, 0x3fa7b4262229fd93, 0x3fed6738aef664ca, + 0x3fb30919240ff186, 0xbff52c4b7109d007, 0x3fed6a722a352743, + 0xbfc0e1c9f5f09ba4); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.4771 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xb7a2); + // -0.6172, 0.8584, -0.1088, -0.6719, 0.3579, 0.5889, 0.1724, + // -0.5239, -0.5732, -0.6167, 0.8271, -0.7334, 0.3489, + // -0.7607, -0.7788, -0.5264 + VLOAD_16(v4, 0xb8f0, 0x3ade, 0xaef7, 0xb960, 0x35ba, 0x38b6, 0x3184, 0xb831, + 0xb896, 0xb8ef, 0x3a9e, 0xb9de, 0x3595, 0xba16, 0xba3b, 0xb836); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.0186, 0.9351, 0.6201, 0.9463, 0.2512, 0.5786, 0.9424, + // -0.5132, -0.7646, 0.0194, -0.2507, -0.2905, 0.3452, + // -0.7803, -0.7666, -0.1387 + VLOAD_16(v2, 0x24c1, 0x3b7b, 0x38f6, 0x3b92, 0x3405, 0x38a1, 0x3b8a, 0xb81b, + 0xba1e, 0x24f6, 0xb403, 0xb4a6, 0x3586, 0xba3e, 0xba22, 0xb070); + asm volatile("vfnmsac.vf v2, %[A], v4, v0.t" ::[A] "f"(dscalar_16)); + // 0.0186, 1.3447, 0.6201, 0.6260, 0.2512, 0.8594, 0.9424, + // -0.7632, -0.7646, -0.2749, -0.2507, -0.6406, 0.3452, + // -1.1426, -0.7666, -0.3899 + VCMP_U16(10, v2, 0x24c1, 0x3d61, 0x38f6, 0x3902, 0x3405, 0x3ae0, 0x3b8a, + 0xba1b, 0xba1e, 0xb466, 0xb403, 0xb920, 0x3586, 0xbc93, 0xba22, + 0xb63d); + + VSET(16, e32, m4); + double dscalar_32; + // -0.73549986 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbf3c49b8); + // 0.74252719, 0.73023552, 0.75118375, 0.04020444, + // -0.77184784, -0.41120139, -0.57577437, -0.15976480, + // -0.05041125, 0.42673740, 0.88473374, -0.49891368, + // -0.84324479, -0.26009968, -0.01877740, -0.13754985 + VLOAD_32(v8, 0x3f3e1643, 0x3f3af0b7, 0x3f404d94, 0x3d24ad6a, 0xbf4597d2, + 0xbed288fd, 0xbf1365f3, 0xbe239962, 0xbd4e7c07, 0x3eda7d53, + 0x3f627de9, 0xbeff719d, 0xbf57dee4, 0xbe852bc9, 0xbc99d30f, + 0xbe0cd9de); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.89538908, 0.68592542, 0.67501348, 0.08327232, + // 0.28473541, -0.93230879, -0.77235961, -0.92498165, + // -0.55227244, 0.97729182, 0.28253901, 0.45306230, + // -0.50359881, 0.40307203, -0.65891176, -0.59297264 + VLOAD_32(v4, 0x3f653838, 0x3f2f98cf, 0x3f2ccdaf, 0x3daa8aad, 0x3e91c8d7, + 0xbf6eabca, 0xbf45b95c, 0xbf6ccb99, 0xbf0d61ba, 0x3f7a2fcc, + 0x3e90a8f4, 0x3ee7f7c8, 0xbf00ebda, 0x3ece5f75, 0xbf28ae71, + 0xbf17cd0e); + asm volatile("vfnmsac.vf v4, %[A], v8, v0.t" ::[A] "f"(dscalar_32)); + // 0.89538908, 1.22301352, 0.67501348, 0.11284268, + // 0.28473541, -1.23474741, -0.77235961, -1.04248869, + // -0.55227244, 1.29115713, 0.28253901, 0.08611137, + // -0.50359881, 0.21176875, -0.65891176, -0.69414055 + VCMP_U32(11, v4, 0x3f653838, 0x3f9c8bb5, 0x3f2ccdaf, 0x3de71a10, 0x3e91c8d7, + 0xbf9e0c34, 0xbf45b95c, 0xbf857045, 0xbf0d61ba, 0x3fa544a3, + 0x3e90a8f4, 0x3db05b27, 0xbf00ebda, 0x3e58d9e8, 0xbf28ae71, + 0xbf31b332); + + VSET(16, e64, m8); + double dscalar_64; + // 0.5178244899339752 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fe09204aa2ab2a0); + // 0.8646249694399413, 0.0049558737185129, + // 0.9278624830778543, -0.8820434014846885, + // 0.9252937592855630, -0.0640564429957495, + // -0.9483662154664578, 0.6036726974274773, + // 0.6321354499926264, 0.8250130840364809, + // 0.1494192541908572, 0.0196333207724690, + // -0.2272946521816408, 0.9289723385226867, + // -0.5162193242581365, -0.3917544955022987 + VLOAD_64(v16, 0x3febab01fbe195e4, 0x3f744c9c38b4b600, 0x3fedb10ca98026d0, + 0xbfec39b31560f050, 0x3fed9c01a86a6166, 0xbfb06600c7c3cc10, + 0xbfee59041b01e946, 0x3fe35149679e0d42, 0x3fe43a741f8b89d2, + 0x3fea6681d6d6ae5e, 0x3fc3202b8d123b90, 0x3f941ac1da84be40, + 0xbfcd17fdbcd68200, 0x3fedba24329afe9c, 0xbfe084de63680fb2, + 0xbfd91281728f9314); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.3888129269587612, -0.8225750100551035, + // 0.8430062678626642, -0.6316792665412014, + // -0.6696614660277347, 0.6130084754374303, + // -0.2295750183537659, 0.1572393304616742, + // -0.7542147373874082, 0.6149250820738357, + // 0.1236692515687874, 0.9290168852760794, + // -0.0433411597165929, -0.8422695068160440, + // 0.6519328829008422, -0.3347506024828231 + VLOAD_64(v8, 0x3fd8e24f9d6331d4, 0xbfea5288d3d6317c, 0x3feaf9e847d9618e, + 0xbfe436b76feb621e, 0xbfe56ddde1ff608c, 0x3fe39dc3f345962e, + 0xbfcd62b6d5e76d58, 0x3fc4206b1afd2970, 0xbfe82286f1e7af48, + 0x3fe3ad775d9fe964, 0x3fbfa8c9bf023cb0, 0x3fedba819e762954, + 0xbfa630cfff1b61a0, 0xbfeaf3df2e462cb6, 0x3fe4dca25967e02c, + 0xbfd56c8dca7eb8ac); + asm volatile("vfnmsac.vf v8, %[A], v16, v0.t" ::[A] "f"(dscalar_64)); + // 0.3888129269587612, -0.8251412828355696, 0.8430062678626642, + // -0.1749355920677641, -0.6696614660277347, + // 0.6461784703586890, -0.2295750183537659, + // -0.1553571761707763, -0.7542147373874082, + // 0.1877131026437891, 0.1236692515687874, 0.9188502709613655, + // -0.0433411597165929, -1.3233141341743264, + // 0.6519328829008422, -0.1318905306700034 + VCMP_U64(12, v8, 0x3fd8e24f9d6331d4, 0xbfea678eb10b76d9, 0x3feaf9e847d9618e, + 0xbfc6644a1b6b315b, 0xbfe56ddde1ff608c, 0x3fe4ad7e78b23c6a, + 0xbfcd62b6d5e76d58, 0xbfc3e2be736d1b43, 0xbfe82286f1e7af48, + 0x3fc806fba27160c3, 0x3fbfa8c9bf023cb0, 0x3fed6738aef664ca, + 0xbfa630cfff1b61a0, 0xbff52c4b7109d007, 0x3fe4dca25967e02c, + 0xbfc0e1c9f5f09ba4); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfnmsub.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfnmsub.c new file mode 100644 index 000000000..dcbb1f51d --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfnmsub.c @@ -0,0 +1,454 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values +void TEST_CASE1(void) { + VSET(16, e16, m2); + // -0.0091, -0.3794, -0.0005, -0.0464, 0.4834, 0.2932, -0.3042, + // -0.3096, -0.9844, -0.1815, -0.8760, 0.0853, -0.3723, -0.8877, + // 0.1584, 0.1943 + VLOAD_16(v4, 0xa0ac, 0xb612, 0x8f83, 0xa9f0, 0x37bc, 0x34b1, 0xb4de, 0xb4f4, + 0xbbe0, 0xb1cf, 0xbb02, 0x2d75, 0xb5f5, 0xbb1a, 0x3112, 0x3238); + // -0.3301, 0.7769, 0.6572, -0.8193, 0.4529, 0.2349, 0.5264, + // -0.2456, 0.0873, 0.5381, 0.4670, 0.8564, -0.1790, 0.6641, + // 0.0182, 0.0447 + VLOAD_16(v6, 0xb548, 0x3a37, 0x3942, 0xba8e, 0x373f, 0x3384, 0x3836, 0xb3dc, + 0x2d97, 0x384e, 0x3779, 0x3ada, 0xb1ba, 0x3950, 0x24a7, 0x29b9); + // 0.5835, 0.4404, -0.3459, 0.0516, -0.4866, -0.2191, 0.0685, + // -0.5430, -0.1429, -0.7539, -0.6416, -0.6758, -0.1147, 0.3438, + // 0.3440, 0.1991 + VLOAD_16(v2, 0x38ab, 0x370c, 0xb589, 0x2a9b, 0xb7c9, 0xb303, 0x2c62, 0xb858, + 0xb093, 0xba08, 0xb922, 0xb968, 0xaf57, 0x3580, 0x3581, 0x325f); + asm volatile("vfnmsub.vv v2, v4, v6"); + // -0.3247, 0.9438, 0.6572, -0.8169, 0.6880, 0.2991, 0.5474, + // -0.4136, -0.0534, 0.4014, -0.0950, 0.9141, -0.2217, 0.9692, + // -0.0363, 0.0060 + VCMP_U16(1, v2, 0xb532, 0x3b8d, 0x3942, 0xba89, 0x3981, 0x34c9, 0x3861, + 0xb69e, 0xaad5, 0x366b, 0xae14, 0x3b50, 0xb318, 0x3bc1, 0xa8a7, + 0x1e29); + + VSET(16, e32, m4); + // 0.76259303, -0.43966120, -0.19390504, -0.57240725, + // -0.57148474, -0.93710214, 0.24273214, 0.44242114, + // -0.93160200, -0.56412256, -0.75430351, -0.02741535, + // -0.60542876, -0.93627954, 0.02798123, 0.23119579 + VLOAD_32(v8, 0x3f43394c, 0xbee11b46, 0xbe468f0b, 0xbf128948, 0xbf124cd3, + 0xbf6fe5ed, 0x3e788ec6, 0x3ee28506, 0xbf6e7d78, 0xbf106a56, + 0xbf411a09, 0xbce09629, 0xbf1afd61, 0xbf6fb004, 0x3ce538e6, + 0x3e6cbe97); + // 0.48736989, 0.19715627, -0.47227743, 0.13752034, + // -0.16710435, 0.84761631, 0.37147006, 0.25389814, + // -0.44707820, 0.38169226, -0.82191414, -0.81056035, + // 0.29047397, -0.46743703, -0.91869444, -0.08079135 + VLOAD_32(v12, 0x3ef9888c, 0x3e49e355, 0xbef1ce59, 0x3e0cd222, 0xbe2b1d67, + 0x3f58fd62, 0x3ebe3153, 0x3e81fef0, 0xbee4e76f, 0x3ec36d2b, + 0xbf5268f7, 0xbf4f80e2, 0x3e94b901, 0xbeef53e8, 0xbf6b2f8f, + 0xbda575f0); + // -0.48655373, -0.87417608, 0.17854533, 0.67417324, + // 0.46947387, 0.29113689, -0.11920074, 0.63394654, + // -0.82611400, -0.84088647, -0.13328743, 0.29885510, + // 0.91797447, -0.15480036, 0.76857966, 0.16230854 + VLOAD_32(v4, 0xbef91d92, 0xbf5fca01, 0x3e36d496, 0x3f2c969e, 0x3ef05ee1, + 0x3e950fe5, 0xbdf41f84, 0x3f224a52, 0xbf537c35, 0xbf574456, + 0xbe087c80, 0x3e990389, 0x3f6b0060, 0xbe1e83fc, 0x3f44c1a3, + 0x3e263436); + asm volatile("vfnmsub.vv v4, v8, v12"); + // 0.85841238, -0.18718503, -0.43765658, 0.52342200, + // 0.10119282, 1.12044132, 0.40040392, -0.02657321, + // -1.21668768, -0.09267077, -0.92245328, -0.80236715, + // 0.84624207, -0.61237341, -0.94020027, -0.11831641 + VCMP_U32(2, v4, 0x3f5bc0ea, 0xbe3fad70, 0xbee01486, 0x3f05fefc, 0x3dcf3e2c, + 0x3f8f6a9f, 0x3ecd01be, 0xbcd9b00d, 0xbf9bbc6c, 0xbdbdca2c, + 0xbf6c25e6, 0xbf4d67ef, 0x3f58a353, 0xbf1cc481, 0xbf70b0f7, + 0xbdf24fdf); + + VSET(16, e64, m8); + // -0.1307639483617093, 0.9224167823566942, 0.8635785104096312, + // -0.1786758246437388, 0.0810514505300033, 0.4196384170211611, + // 0.9100790646565715, -0.5457616411379209, -0.5513001815564993, + // -0.4320693373833464, 0.2818536966914695, 0.5493933224246561, + // 0.0505621823765807, 0.7247332126666939, -0.8702311369694951, + // -0.0660417836134264 + VLOAD_64(v16, 0xbfc0bcdf80daccc8, 0x3fed847033301d18, 0x3feba26f66779bbe, + 0xbfc6ded973b720d0, 0x3fb4bfc9b151d990, 0x3fdadb5b175011f8, + 0x3fed1f5e216f2d02, 0xbfe176e11e032836, 0xbfe1a44047420c82, + 0xbfdba706266a9d80, 0x3fd209e41662faec, 0x3fe194a14e0e8cc0, + 0x3fa9e3494f719000, 0x3fe73103b4d74f92, 0xbfebd8eef827d60a, + 0xbfb0e81d44ca0760); + // 0.6650460871127466, -0.8389896062690501, 0.3260860096573337, + // 0.4421797679090849, -0.1921872051427089, -0.1798768047606598, + // -0.5065656464186716, 0.8248933299429206, + // -0.3169052211432897, -0.0970247500649024, 0.8584276150948376, + // -0.2642287948226270, 0.2403355182026823, + // -0.0814065713760876, -0.7437013715700231, -0.6226210619792329 + VLOAD_64(v24, 0x3fe5480ebb4f6ca8, 0xbfead900bb1380ea, 0x3fd4de97daca5430, + 0x3fdc4cac5e87d53c, 0xbfc8999720661708, 0xbfc7063400e0c4f8, + 0xbfe035c92894a640, 0x3fea6586b2596362, 0xbfd4482cd62f7e30, + 0xbfb8d69d306e6ba0, 0x3feb783d309a196c, 0xbfd0e91fe41b2de8, + 0x3fcec350735fb5b8, 0xbfb4d70fa1bd62a0, 0xbfe7cc66d19c4666, + 0xbfe3ec8301600d10); + // -0.2122847293404504, 0.9074328134093839, -0.4150374170703475, + // -0.4511563805942409, -0.9126942371441604, -0.8237861842027401, + // -0.0636244117792013, 0.7124530373845765, -0.4126670585839094, + // -0.2810978842877421, -0.3240264495739638, 0.5540367578795606, + // 0.7398533272929233, 0.4690189457399407, -0.2427822500985419, + // 0.2399358773396087 + VLOAD_64(v8, 0xbfcb2c2560fa8d98, 0x3fed09b08a1d236c, 0xbfda8ff9193bbdb8, + 0xbfdcdfbf03022cf4, 0xbfed34ca8b7904d0, 0xbfea5c74d801844e, + 0xbfb049b07fd1a3e0, 0x3fe6cc6a4ff0157a, 0xbfda692318304834, + 0xbfd1fd81faff66b8, 0xbfd4bcd96efd6300, 0x3fe1baab4b7bfc96, + 0x3fe7ace0e291ea9e, 0x3fde04680a4a1390, 0xbfcf137d201c7be0, + 0x3fceb6380527c498); + asm volatile("vfnmsub.vv v8, v16, v24"); + // 0.6372868977272925, -1.6760208622190165, 0.6845034040552052, + // 0.3615690295631244, -0.1182120133317999, 0.1658155255420807, + // -0.4486624012573315, 1.2137228688596235, + // -0.5444086454629855, -0.2184785266689677, 0.9497556677330713, + // -0.5686128899794636, 0.2029269193361774, + // -0.4213201787237407, -0.9549780451092895, -0.6067752686868729 + VCMP_U64(3, v8, 0x3fe464a77dfd0e7c, 0xbffad0fb406a4f74, 0x3fe5e773aecd5e74, + 0x3fd723f26d4e15bc, 0xbfbe43247b412024, 0x3fc5397171afa72c, + 0xbfdcb6e281161599, 0x3ff36b68abc28cd2, 0xbfe16bcbadfd8ab4, + 0xbfcbf71ab775f310, 0x3fee6465ff835579, 0xbfe23213a8d1778a, + 0x3fc9f982610371db, 0xbfdaf6e8e930da95, 0xbfee8f2e1e048ea3, + 0xbfe36ab3f7e103f3); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // -0.0091, -0.3794, -0.0005, -0.0464, 0.4834, 0.2932, -0.3042, + // -0.3096, -0.9844, -0.1815, -0.8760, 0.0853, -0.3723, -0.8877, + // 0.1584, 0.1943 + VLOAD_16(v4, 0xa0ac, 0xb612, 0x8f83, 0xa9f0, 0x37bc, 0x34b1, 0xb4de, 0xb4f4, + 0xbbe0, 0xb1cf, 0xbb02, 0x2d75, 0xb5f5, 0xbb1a, 0x3112, 0x3238); + // -0.3301, 0.7769, 0.6572, -0.8193, 0.4529, 0.2349, 0.5264, + // -0.2456, 0.0873, 0.5381, 0.4670, 0.8564, -0.1790, 0.6641, + // 0.0182, 0.0447 + VLOAD_16(v6, 0xb548, 0x3a37, 0x3942, 0xba8e, 0x373f, 0x3384, 0x3836, 0xb3dc, + 0x2d97, 0x384e, 0x3779, 0x3ada, 0xb1ba, 0x3950, 0x24a7, 0x29b9); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.5835, 0.4404, -0.3459, 0.0516, -0.4866, -0.2191, 0.0685, + // -0.5430, -0.1429, -0.7539, -0.6416, -0.6758, -0.1147, 0.3438, + // 0.3440, 0.1991 + VLOAD_16(v2, 0x38ab, 0x370c, 0xb589, 0x2a9b, 0xb7c9, 0xb303, 0x2c62, 0xb858, + 0xb093, 0xba08, 0xb922, 0xb968, 0xaf57, 0x3580, 0x3581, 0x325f); + asm volatile("vfnmsub.vv v2, v4, v6, v0.t"); + // 0.5835, 0.9438, -0.3459, -0.8169, -0.4866, 0.2991, 0.0685, + // -0.4136, -0.1429, 0.4014, -0.6416, 0.9141, -0.1147, 0.9692, + // 0.3440, 0.0060 + VCMP_U16(4, v2, 0x38ab, 0x3b8d, 0xb589, 0xba89, 0xb7c9, 0x34c9, 0x2c62, + 0xb69e, 0xb093, 0x366b, 0xb922, 0x3b50, 0xaf57, 0x3bc1, 0x3581, + 0x1e29); + + VSET(16, e32, m4); + // 0.76259303, -0.43966120, -0.19390504, -0.57240725, + // -0.57148474, -0.93710214, 0.24273214, 0.44242114, + // -0.93160200, -0.56412256, -0.75430351, -0.02741535, + // -0.60542876, -0.93627954, 0.02798123, 0.23119579 + VLOAD_32(v8, 0x3f43394c, 0xbee11b46, 0xbe468f0b, 0xbf128948, 0xbf124cd3, + 0xbf6fe5ed, 0x3e788ec6, 0x3ee28506, 0xbf6e7d78, 0xbf106a56, + 0xbf411a09, 0xbce09629, 0xbf1afd61, 0xbf6fb004, 0x3ce538e6, + 0x3e6cbe97); + // 0.48736989, 0.19715627, -0.47227743, 0.13752034, + // -0.16710435, 0.84761631, 0.37147006, 0.25389814, + // -0.44707820, 0.38169226, -0.82191414, -0.81056035, + // 0.29047397, -0.46743703, -0.91869444, -0.08079135 + VLOAD_32(v12, 0x3ef9888c, 0x3e49e355, 0xbef1ce59, 0x3e0cd222, 0xbe2b1d67, + 0x3f58fd62, 0x3ebe3153, 0x3e81fef0, 0xbee4e76f, 0x3ec36d2b, + 0xbf5268f7, 0xbf4f80e2, 0x3e94b901, 0xbeef53e8, 0xbf6b2f8f, + 0xbda575f0); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.48655373, -0.87417608, 0.17854533, 0.67417324, + // 0.46947387, 0.29113689, -0.11920074, 0.63394654, + // -0.82611400, -0.84088647, -0.13328743, 0.29885510, + // 0.91797447, -0.15480036, 0.76857966, 0.16230854 + VLOAD_32(v4, 0xbef91d92, 0xbf5fca01, 0x3e36d496, 0x3f2c969e, 0x3ef05ee1, + 0x3e950fe5, 0xbdf41f84, 0x3f224a52, 0xbf537c35, 0xbf574456, + 0xbe087c80, 0x3e990389, 0x3f6b0060, 0xbe1e83fc, 0x3f44c1a3, + 0x3e263436); + asm volatile("vfnmsub.vv v4, v8, v12, v0.t"); + // -0.48655373, -0.18718503, 0.17854533, 0.52342200, + // 0.46947387, 1.12044132, -0.11920074, -0.02657321, + // -0.82611400, -0.09267077, -0.13328743, -0.80236715, + // 0.91797447, -0.61237341, 0.76857966, -0.11831641 + VCMP_U32(5, v4, 0xbef91d92, 0xbe3fad70, 0x3e36d496, 0x3f05fefc, 0x3ef05ee1, + 0x3f8f6a9f, 0xbdf41f84, 0xbcd9b00d, 0xbf537c35, 0xbdbdca2c, + 0xbe087c80, 0xbf4d67ef, 0x3f6b0060, 0xbf1cc481, 0x3f44c1a3, + 0xbdf24fdf); + + VSET(16, e64, m8); + // -0.1307639483617093, 0.9224167823566942, 0.8635785104096312, + // -0.1786758246437388, 0.0810514505300033, 0.4196384170211611, + // 0.9100790646565715, -0.5457616411379209, -0.5513001815564993, + // -0.4320693373833464, 0.2818536966914695, 0.5493933224246561, + // 0.0505621823765807, 0.7247332126666939, -0.8702311369694951, + // -0.0660417836134264 + VLOAD_64(v16, 0xbfc0bcdf80daccc8, 0x3fed847033301d18, 0x3feba26f66779bbe, + 0xbfc6ded973b720d0, 0x3fb4bfc9b151d990, 0x3fdadb5b175011f8, + 0x3fed1f5e216f2d02, 0xbfe176e11e032836, 0xbfe1a44047420c82, + 0xbfdba706266a9d80, 0x3fd209e41662faec, 0x3fe194a14e0e8cc0, + 0x3fa9e3494f719000, 0x3fe73103b4d74f92, 0xbfebd8eef827d60a, + 0xbfb0e81d44ca0760); + // 0.6650460871127466, -0.8389896062690501, 0.3260860096573337, + // 0.4421797679090849, -0.1921872051427089, -0.1798768047606598, + // -0.5065656464186716, 0.8248933299429206, + // -0.3169052211432897, -0.0970247500649024, 0.8584276150948376, + // -0.2642287948226270, 0.2403355182026823, + // -0.0814065713760876, -0.7437013715700231, -0.6226210619792329 + VLOAD_64(v24, 0x3fe5480ebb4f6ca8, 0xbfead900bb1380ea, 0x3fd4de97daca5430, + 0x3fdc4cac5e87d53c, 0xbfc8999720661708, 0xbfc7063400e0c4f8, + 0xbfe035c92894a640, 0x3fea6586b2596362, 0xbfd4482cd62f7e30, + 0xbfb8d69d306e6ba0, 0x3feb783d309a196c, 0xbfd0e91fe41b2de8, + 0x3fcec350735fb5b8, 0xbfb4d70fa1bd62a0, 0xbfe7cc66d19c4666, + 0xbfe3ec8301600d10); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.2122847293404504, 0.9074328134093839, -0.4150374170703475, + // -0.4511563805942409, -0.9126942371441604, -0.8237861842027401, + // -0.0636244117792013, 0.7124530373845765, -0.4126670585839094, + // -0.2810978842877421, -0.3240264495739638, 0.5540367578795606, + // 0.7398533272929233, 0.4690189457399407, -0.2427822500985419, + // 0.2399358773396087 + VLOAD_64(v8, 0xbfcb2c2560fa8d98, 0x3fed09b08a1d236c, 0xbfda8ff9193bbdb8, + 0xbfdcdfbf03022cf4, 0xbfed34ca8b7904d0, 0xbfea5c74d801844e, + 0xbfb049b07fd1a3e0, 0x3fe6cc6a4ff0157a, 0xbfda692318304834, + 0xbfd1fd81faff66b8, 0xbfd4bcd96efd6300, 0x3fe1baab4b7bfc96, + 0x3fe7ace0e291ea9e, 0x3fde04680a4a1390, 0xbfcf137d201c7be0, + 0x3fceb6380527c498); + asm volatile("vfnmsub.vv v8, v16, v24, v0.t"); + // -0.2122847293404504, -1.6760208622190165, -0.4150374170703475, + // 0.3615690295631244, -0.9126942371441604, 0.1658155255420807, + // -0.0636244117792013, 1.2137228688596235, -0.4126670585839094, + // -0.2184785266689677, -0.3240264495739638, -0.5686128899794636, + // 0.7398533272929233, -0.4213201787237407, -0.2427822500985419, + // -0.6067752686868729 + VCMP_U64(6, v8, 0xbfcb2c2560fa8d98, 0xbffad0fb406a4f74, 0xbfda8ff9193bbdb8, + 0x3fd723f26d4e15bc, 0xbfed34ca8b7904d0, 0x3fc5397171afa72c, + 0xbfb049b07fd1a3e0, 0x3ff36b68abc28cd2, 0xbfda692318304834, + 0xbfcbf71ab775f310, 0xbfd4bcd96efd6300, 0xbfe23213a8d1778a, + 0x3fe7ace0e291ea9e, 0xbfdaf6e8e930da95, 0xbfcf137d201c7be0, + 0xbfe36ab3f7e103f3); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.1346 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xb04f); + // -0.1886, 0.9912, -0.0325, 0.5850, 0.2578, -0.2350, -0.8701, + // 0.9209, 0.5859, -0.4795, 0.8682, 0.9233, -0.8896, -0.5981, + // -0.3223, 0.6924 + VLOAD_16(v4, 0xb209, 0x3bee, 0xa82a, 0x38ae, 0x3420, 0xb385, 0xbaf6, 0x3b5e, + 0x38b0, 0xb7ac, 0x3af2, 0x3b63, 0xbb1e, 0xb8c9, 0xb528, 0x398a); + // -0.0126, 0.9678, 0.8945, -0.9600, -0.9272, 0.4412, 0.5527, + // 0.1136, 0.7207, -0.5181, -0.2810, -0.4048, 0.1648, 0.7612, + // -0.8853, 0.1761 + VLOAD_16(v2, 0xa27a, 0x3bbe, 0x3b28, 0xbbae, 0xbb6b, 0x370f, 0x386c, 0x2f45, + 0x39c4, 0xb825, 0xb47f, 0xb67a, 0x3146, 0x3a17, 0xbb15, 0x31a3); + asm volatile("vfnmsub.vf v2, %[A], v4" ::[A] "f"(dscalar_16)); + // -0.1903, 1.1211, 0.0879, 0.4556, 0.1329, -0.1755, -0.7959, + // 0.9360, 0.6831, -0.5493, 0.8301, 0.8687, -0.8677, -0.4956, + // -0.4414, 0.7163 + VCMP_U16(7, v2, 0xb217, 0x3c7c, 0x2da0, 0x374b, 0x3041, 0xb19e, 0xba5e, + 0x3b7d, 0x3977, 0xb865, 0x3aa5, 0x3af3, 0xbaf1, 0xb7ee, 0xb710, + 0x39bb); + + VSET(16, e32, m4); + double dscalar_32; + // -0.16110219 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbe24f7f9); + // -0.31537205, -0.17563045, -0.79069936, 0.22939304, + // -0.89379781, 0.26157290, 0.56702632, -0.11594663, + // 0.09605245, 0.45930776, -0.76518077, -0.26341528, 0.74385208, + // 0.89362013, -0.21185355, 0.23924881 + VLOAD_32(v8, 0xbea17872, 0xbe33d878, 0xbf4a6b46, 0x3e6ae602, 0xbf64cfef, + 0x3e85ece2, 0x3f1128a3, 0xbded756d, 0x3dc4b726, 0x3eeb2a63, + 0xbf43e2e3, 0xbe86de5e, 0x3f3e6d17, 0x3f64c44a, 0xbe58f023, + 0x3e74fda4); + // 0.31856158, 0.48641542, 0.57264513, 0.30210373, + // -0.19719712, 0.85649359, 0.36901370, -0.78377151, + // 0.22567192, -0.75179213, -0.65690833, 0.11298654, + // -0.64884853, -0.48376039, -0.11539485, -0.42667609 + VLOAD_32(v4, 0x3ea31a81, 0x3ef90b71, 0x3f1298df, 0x3e9aad57, 0xbe49ee0b, + 0x3f5b432a, 0x3ebcef5d, 0xbf48a540, 0x3e67168a, 0xbf407573, + 0xbf282b25, 0x3de7657d, 0xbf261af0, 0xbef7af71, 0xbdec5422, + 0xbeda754a); + asm volatile("vfnmsub.vf v4, %[A], v8" ::[A] "f"(dscalar_32)); + // -0.26405108, -0.09726786, -0.69844496, 0.27806261, + // -0.92556667, 0.39955589, 0.62647521, -0.24221393, + // 0.13240869, 0.33819240, -0.87101012, -0.24521290, 0.63932115, + // 0.81568527, -0.23044391, 0.17051035 + VCMP_U32(8, v4, 0xbe8731b4, 0xbdc7345f, 0xbf32cd4a, 0x3e8e5e39, 0xbf6cf1f1, + 0x3ecc9297, 0x3f2060ae, 0xbe7806ee, 0x3e079625, 0x3ead278e, + 0xbf5efa85, 0xbe7b1917, 0x3f23aa8d, 0x3f50d0c0, 0xbe6bf97d, + 0x3e2e9a44); + + VSET(16, e64, m8); + double dscalar_64; + // 0.5849101968457469 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fe2b79596d194ba); + // -0.7607808895269514, -0.0192591699518767, 0.6815284686654297, + // -0.5163928614577513, -0.9560613023939111, + // -0.8652684824342871, 0.4588682754059621, + // -0.5708244737077264, -0.7636024500128011, 0.2236424444447431, + // 0.8245435877598175, 0.8527344486412596, -0.3097355632002228, + // 0.0764086736442742, 0.2567358761671383, 0.1904958118727702 + VLOAD_64(v16, 0xbfe8585129fe14da, 0xbf93b8ad045d9c40, 0x3fe5cf14ca86c05c, + 0xbfe0864a527b2a32, 0xbfee980ddf5818b2, 0xbfebb047874a12f4, + 0x3fdd5e190b029804, 0xbfe24431afca9858, 0xbfe86f6e67be6a22, + 0x3fcca050cc719f20, 0x3fea62a93bf1c1ec, 0x3feb4999c122c714, + 0xbfd3d2b51c969928, 0x3fb38f84d26ed230, 0x3fd06e5c4ff641c8, + 0x3fc8622ab1025ec0); + // 0.1182575129292827, -0.8756460666506833, + // -0.3686593299789440, 0.2802900907620893, + // -0.5167592439660142, 0.3872686605057347, + // -0.0640775227939985, -0.4352087179743556, + // -0.1509314378482451, -0.9803534868251271, + // 0.9211862470421908, 0.7804942879773937, 0.5029472314120484, + // 0.1158347026033590, 0.7422982722940397, 0.0792254120441500 + VLOAD_64(v8, 0x3fbe461fd6899df0, 0xbfec054ae66457d4, 0xbfd7981d4d67fd54, + 0x3fd1f045d94dd3b0, 0xbfe0894aae97abc2, 0x3fd8c9027de8d2a8, + 0xbfb0676270cf1540, 0xbfdbda75aaa8928c, 0xbfc351b8aabf8c50, + 0xbfef5f0e468ddf22, 0x3fed7a5b94924728, 0x3fe8f9cf28327c9e, + 0x3fe01824cad0e968, 0x3fbda757d36c5e40, 0x3fe7c0e84e6c2aa0, + 0x3fb4481dd9bde340); + asm volatile("vfnmsub.vf v8, %[A], v16" ::[A] "f"(dscalar_64)); + // -0.8299509146929066, 0.4929151432599784, 0.8971610699324351, + // -0.6803373936193172, -0.6538035512838903, -1.0917858708828851, + // 0.4963478718767876, -0.3162664568083610, -0.6753211129907725, + // 0.7970611954020426, 0.2857323586707750, 0.3962153810234212, + // -0.6039145273284674, 0.0086557749429749, -0.1774419523986262, + // 0.1441560605188410 + VCMP_U64(9, v8, 0xbfea8ef5387c85b1, 0x3fdf8bebf5004e06, 0x3fecb58b21d3556c, + 0xbfe5c552ecfae837, 0xbfe4ebf56cd8bc27, 0xbff177f4761ad476, + 0x3fdfc429dd49999d, 0xbfd43db5aa3413c6, 0xbfe59c3b05d2ff7f, + 0x3fe981867ae532b2, 0x3fd249705ff9984b, 0x3fd95b97c1eabccb, + 0xbfe353448f0e8fe6, 0x3f81ba1e7269b44b, 0xbfc6b66afb3ec852, + 0x3fc273b4aeb96c59); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.1346 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xb04f); + // -0.1886, 0.9912, -0.0325, 0.5850, 0.2578, -0.2350, + // -0.8701, 0.9209, 0.5859, -0.4795, 0.8682, 0.9233, + // -0.8896, -0.5981, -0.3223, 0.6924 + VLOAD_16(v4, 0xb209, 0x3bee, 0xa82a, 0x38ae, 0x3420, 0xb385, 0xbaf6, 0x3b5e, + 0x38b0, 0xb7ac, 0x3af2, 0x3b63, 0xbb1e, 0xb8c9, 0xb528, 0x398a); + VLOAD_8(v0, 0xAA, 0xAA); + // -0.0126, 0.9678, 0.8945, -0.9600, -0.9272, 0.4412, 0.5527, + // 0.1136, 0.7207, -0.5181, -0.2810, -0.4048, 0.1648, 0.7612, + // -0.8853, 0.1761 + VLOAD_16(v2, 0xa27a, 0x3bbe, 0x3b28, 0xbbae, 0xbb6b, 0x370f, 0x386c, 0x2f45, + 0x39c4, 0xb825, 0xb47f, 0xb67a, 0x3146, 0x3a17, 0xbb15, 0x31a3); + asm volatile("vfnmsub.vf v2, %[A], v4, v0.t" ::[A] "f"(dscalar_16)); + // -0.0126, 1.1211, 0.8945, 0.4556, -0.9272, -0.1755, 0.5527, + // 0.9360, 0.7207, -0.5493, -0.2810, 0.8687, 0.1648, -0.4956, + // -0.8853, 0.7163 + VCMP_U16(10, v2, 0xa27a, 0x3c7c, 0x3b28, 0x374b, 0xbb6b, 0xb19e, 0x386c, + 0x3b7d, 0x39c4, 0xb865, 0xb47f, 0x3af3, 0x3146, 0xb7ee, 0xbb15, + 0x39bb); + + VSET(16, e32, m4); + double dscalar_32; + // -0.16110219 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbe24f7f9); + // -0.31537205, -0.17563045, -0.79069936, 0.22939304, + // -0.89379781, 0.26157290, 0.56702632, -0.11594663, + // 0.09605245, 0.45930776, -0.76518077, -0.26341528, + // 0.74385208, 0.89362013, -0.21185355, 0.23924881 + VLOAD_32(v8, 0xbea17872, 0xbe33d878, 0xbf4a6b46, 0x3e6ae602, 0xbf64cfef, + 0x3e85ece2, 0x3f1128a3, 0xbded756d, 0x3dc4b726, 0x3eeb2a63, + 0xbf43e2e3, 0xbe86de5e, 0x3f3e6d17, 0x3f64c44a, 0xbe58f023, + 0x3e74fda4); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.31856158, 0.48641542, 0.57264513, 0.30210373, + // -0.19719712, 0.85649359, 0.36901370, -0.78377151, + // 0.22567192, -0.75179213, -0.65690833, 0.11298654, + // -0.64884853, -0.48376039, -0.11539485, -0.42667609 + VLOAD_32(v4, 0x3ea31a81, 0x3ef90b71, 0x3f1298df, 0x3e9aad57, 0xbe49ee0b, + 0x3f5b432a, 0x3ebcef5d, 0xbf48a540, 0x3e67168a, 0xbf407573, + 0xbf282b25, 0x3de7657d, 0xbf261af0, 0xbef7af71, 0xbdec5422, + 0xbeda754a); + asm volatile("vfnmsub.vf v4, %[A], v8, v0.t" ::[A] "f"(dscalar_32)); + // 0.31856158, -0.09726786, 0.57264513, 0.27806261, + // -0.19719712, 0.39955589, 0.36901370, -0.24221393, + // 0.22567192, 0.33819240, -0.65690833, -0.24521290, + // -0.64884853, 0.81568527, -0.11539485, 0.17051035 + VCMP_U32(11, v4, 0x3ea31a81, 0xbdc7345f, 0x3f1298df, 0x3e8e5e39, 0xbe49ee0b, + 0x3ecc9297, 0x3ebcef5d, 0xbe7806ee, 0x3e67168a, 0x3ead278e, + 0xbf282b25, 0xbe7b1917, 0xbf261af0, 0x3f50d0c0, 0xbdec5422, + 0x3e2e9a44); + + VSET(16, e64, m8); + double dscalar_64; + // 0.5849101968457469 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fe2b79596d194ba); + // -0.7607808895269514, -0.0192591699518767, + // 0.6815284686654297, -0.5163928614577513, + // -0.9560613023939111, -0.8652684824342871, + // 0.4588682754059621, -0.5708244737077264, + // -0.7636024500128011, 0.2236424444447431, + // 0.8245435877598175, 0.8527344486412596, + // -0.3097355632002228, 0.0764086736442742, + // 0.2567358761671383, 0.1904958118727702 + VLOAD_64(v16, 0xbfe8585129fe14da, 0xbf93b8ad045d9c40, 0x3fe5cf14ca86c05c, + 0xbfe0864a527b2a32, 0xbfee980ddf5818b2, 0xbfebb047874a12f4, + 0x3fdd5e190b029804, 0xbfe24431afca9858, 0xbfe86f6e67be6a22, + 0x3fcca050cc719f20, 0x3fea62a93bf1c1ec, 0x3feb4999c122c714, + 0xbfd3d2b51c969928, 0x3fb38f84d26ed230, 0x3fd06e5c4ff641c8, + 0x3fc8622ab1025ec0); + VLOAD_8(v0, 0xAA, 0xAA); + // 0.1182575129292827, -0.8756460666506833, + // -0.3686593299789440, 0.2802900907620893, + // -0.5167592439660142, 0.3872686605057347, + // -0.0640775227939985, -0.4352087179743556, + // -0.1509314378482451, -0.9803534868251271, + // 0.9211862470421908, 0.7804942879773937, + // 0.5029472314120484, 0.1158347026033590, + // 0.7422982722940397, 0.0792254120441500 + VLOAD_64(v8, 0x3fbe461fd6899df0, 0xbfec054ae66457d4, 0xbfd7981d4d67fd54, + 0x3fd1f045d94dd3b0, 0xbfe0894aae97abc2, 0x3fd8c9027de8d2a8, + 0xbfb0676270cf1540, 0xbfdbda75aaa8928c, 0xbfc351b8aabf8c50, + 0xbfef5f0e468ddf22, 0x3fed7a5b94924728, 0x3fe8f9cf28327c9e, + 0x3fe01824cad0e968, 0x3fbda757d36c5e40, 0x3fe7c0e84e6c2aa0, + 0x3fb4481dd9bde340); + asm volatile("vfnmsub.vf v8, %[A], v16, v0.t" ::[A] "f"(dscalar_64)); + // 0.1182575129292827, 0.4929151432599784, + // -0.3686593299789440, -0.6803373936193172, + // -0.5167592439660142, -1.0917858708828851, + // -0.0640775227939985, -0.3162664568083610, + // -0.1509314378482451, 0.7970611954020426, + // 0.9211862470421908, 0.3962153810234212, 0.5029472314120484, + // 0.0086557749429749, 0.7422982722940397, 0.1441560605188410 + VCMP_U64(12, v8, 0x3fbe461fd6899df0, 0x3fdf8bebf5004e06, 0xbfd7981d4d67fd54, + 0xbfe5c552ecfae837, 0xbfe0894aae97abc2, 0xbff177f4761ad476, + 0xbfb0676270cf1540, 0xbfd43db5aa3413c6, 0xbfc351b8aabf8c50, + 0x3fe981867ae532b2, 0x3fed7a5b94924728, 0x3fd95b97c1eabccb, + 0x3fe01824cad0e968, 0x3f81ba1e7269b44b, 0x3fe7c0e84e6c2aa0, + 0x3fc273b4aeb96c59); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfrdiv.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfrdiv.c new file mode 100644 index 000000000..357bcc2ef --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfrdiv.c @@ -0,0 +1,179 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot +// Matteo Perotti + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values (vector-scalar) +void TEST_CASE1(void) { + VSET(16, e16, m2); + double dscalar_16; + // -35.5312, -61.8125, -37.3125, 23.5938, 44.4688, 38.1250, + // -93.5000, -23.2031, -62.8125, 27.9844, -26.2344, -10.3594, + // -10.7109, -42.0938, 11.0625, 17.8281 + VLOAD_16(v2, 0xd071, 0xd3ba, 0xd0aa, 0x4de6, 0x518f, 0x50c4, 0xd5d8, 0xcdcd, + 0xd3da, 0x4eff, 0xce8f, 0xc92e, 0xc95b, 0xd143, 0x4988, 0x4c75); + // -17.4844 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xcc5f); + asm volatile("vfrdiv.vf v4, v2, %[A]" ::[A] "f"(dscalar_16)); + // 0.4922, 0.2830, 0.4685, -0.7412, -0.3931, -0.4585, 0.1870, + // 0.7534, 0.2783, -0.6250, 0.6665, 1.6875, 1.6328, 0.4153, + // -1.5801, -0.9810 + VCMP_U16(1, v4, 0x37df, 0x3486, 0x377f, 0xb9ed, 0xb64a, 0xb756, 0x31fb, + 0x3a07, 0x3474, 0xb8ff, 0x3954, 0x3ec0, 0x3e87, 0x36a5, 0xbe52, + 0xbbd8); + + VSET(16, e32, m4); + double dscalar_32; + // 981163.06250000, -831670.37500000, -85439.06250000, + // 64225.75781250, -215361.43750000, -292944.75000000, + // 396490.21875000, 954345.93750000, 241910.40625000, + // -62372.83593750, 391838.50000000, 263890.03125000, + // 755217.06250000, -6653.31689453, 526939.25000000, + // -759232.75000000 + VLOAD_32(v4, 0x496f8ab1, 0xc94b0b66, 0xc7a6df88, 0x477ae1c2, 0xc852505c, + 0xc88f0a18, 0x48c19947, 0x4968fe9f, 0x486c3d9a, 0xc773a4d6, + 0x48bf53d0, 0x4880da41, 0x49386111, 0xc5cfea89, 0x4900a5b4, + 0xc9395c0c); + // -816463.43750000 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xc94754f7); + asm volatile("vfrdiv.vf v8, v4, %[A]" ::[A] "f"(dscalar_32)); + // -0.83213836, 0.98171520, 9.55609035, + // -12.71239853, 3.79113102, 2.78709030, -2.05922723, + // -0.85552144, -3.37506533, 13.09004879, -2.08367324, + // -3.09395337, -1.08109772, 122.71524811, + // -1.54944515, 1.07537961 + VCMP_U32(2, v8, 0xbf550705, 0x3f7b51af, 0x4118e5bf, 0xc14b65fc, 0x4072a1e4, + 0x40325faf, 0xc003ca60, 0xbf5b0374, 0xc0580112, 0x415170d6, + 0xc0055ae7, 0xc0460354, 0xbf8a6168, 0x42f56e35, 0xbfc65437, + 0x3f89a60a); + + VSET(16, e64, m8); + double dscalar_64; + // -1436518.0384849868714809, 7616315.8933699131011963, + // -3920170.8619796745479107, -8788296.3276759665459394, + // -4048340.2138868225738406, 7863298.6869412772357464, + // 6686376.3073008488863707, 7004262.4451152756810188, + // 5533006.3396991230547428, 2002846.6050684414803982, + // -1239975.7277694121003151, 4133787.1656649876385927, + // 2465999.3703419454395771, -4337686.8389181373640895, + // -5741249.6292232554405928, 1762825.0474482532590628 + VLOAD_64(v8, 0xc135eb6609da26f0, 0x415d0dcef92cf900, 0xc14de8956e555998, + 0xc160c3290a7c524f, 0xc14ee2ea1b60a4b6, 0x415dff00abf6d88c, + 0x415981aa13aad12e, 0x415ab8199c7cc4c8, 0x41551b5395bda164, + 0x413e8f9e9ae5c3f0, 0xc132eba7ba4f18a0, 0x414f89cd953482a4, + 0x4142d067af675d68, 0xc1508c05b5b0d5b3, 0xc155e6b06845319e, + 0x413ae6090c259198); + // -181636.6228598635643721 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xc1062c24fb9df3c0); + asm volatile("vfrdiv.vf v16, v8, %[A]" ::[A] "f"(dscalar_64)); + // 0.1264422847425051, -0.0238483573164265, 0.0463338536137523, + // 0.0206680130126992, 0.0448669363895861, -0.0230992907800273, + // -0.0271651810355835, -0.0259322982659703, + // -0.0328278356662322, -0.0906892332144711, 0.1464840148013292, + // -0.0439395197625382, -0.0736563946626949, 0.0418740747326899, + // 0.0316371233773435, -0.1030372373723578 + VCMP_U64(3, v16, 0x3fc02f42c2e6795f, 0xbf986bb42af3122b, 0x3fa7b91223effbc4, + 0x3f9529fedfd9f42e, 0x3fa6f8cc90ee127a, 0xbf97a75729d81370, + 0xbf9bd130708d0e6e, 0xbf9a8dff13d98f11, 0xbfa0cecf612b7be2, + 0xbfb73768dac16680, 0x3fc2bffcfa7aafc4, 0xbfa67f3da0c39bb5, + 0xbfb2db253e37b0f2, 0x3fa57084cb0de853, 0x3fa032bdb47d8bce, + 0xbfba60a5fcc8d2be); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE2(void) { + VSET(16, e16, m2); + double dscalar_16; + // -35.5312, -61.8125, -37.3125, 23.5938, 44.4688, 38.1250, + // -93.5000, -23.2031, -62.8125, 27.9844, -26.2344, -10.3594, + // -10.7109, -42.0938, 11.0625, 17.8281 + VLOAD_16(v2, 0xd071, 0xd3ba, 0xd0aa, 0x4de6, 0x518f, 0x50c4, 0xd5d8, 0xcdcd, + 0xd3da, 0x4eff, 0xce8f, 0xc92e, 0xc95b, 0xd143, 0x4988, 0x4c75); + // -17.4844 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xcc5f); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfrdiv.vf v4, v2, %[A], v0.t" ::[A] "f"(dscalar_16)); + // 0.0000, 0.2830, 0.0000, -0.7412, 0.0000, -0.4585, 0.0000, + // 0.7534, 0.0000, -0.6250, 0.0000, 1.6875, 0.0000, 0.4153, + // 0.0000, -0.9810 + VCMP_U16(4, v4, 0x0, 0x3486, 0x0, 0xb9ed, 0x0, 0xb756, 0x0, 0x3a07, 0x0, + 0xb8ff, 0x0, 0x3ec0, 0x0, 0x36a5, 0x0, 0xbbd8); + + VSET(16, e32, m4); + double dscalar_32; + // 981163.06250000, -831670.37500000, -85439.06250000, + // 64225.75781250, -215361.43750000, -292944.75000000, + // 396490.21875000, 954345.93750000, 241910.40625000, + // -62372.83593750, 391838.50000000, 263890.03125000, + // 755217.06250000, -6653.31689453, 526939.25000000, + // -759232.75000000 + VLOAD_32(v4, 0x496f8ab1, 0xc94b0b66, 0xc7a6df88, 0x477ae1c2, 0xc852505c, + 0xc88f0a18, 0x48c19947, 0x4968fe9f, 0x486c3d9a, 0xc773a4d6, + 0x48bf53d0, 0x4880da41, 0x49386111, 0xc5cfea89, 0x4900a5b4, + 0xc9395c0c); + // -816463.43750000 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xc94754f7); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfrdiv.vf v8, v4, %[A], v0.t" ::[A] "f"(dscalar_32)); + // 0.00000000, 0.98171520, 0.00000000, -12.71239853, + // 0.00000000, 2.78709030, 0.00000000, -0.85552144, + // 0.00000000, 13.09004879, 0.00000000, -3.09395337, + // 0.00000000, 122.71524811, 0.00000000, 1.07537961 + VCMP_U32(5, v8, 0x0, 0x3f7b51af, 0x0, 0xc14b65fc, 0x0, 0x40325faf, 0x0, + 0xbf5b0374, 0x0, 0x415170d6, 0x0, 0xc0460354, 0x0, 0x42f56e35, 0x0, + 0x3f89a60a); + + VSET(16, e64, m8); + double dscalar_64; + // -1436518.0384849868714809, 7616315.8933699131011963, + // -3920170.8619796745479107, -8788296.3276759665459394, + // -4048340.2138868225738406, 7863298.6869412772357464, + // 6686376.3073008488863707, 7004262.4451152756810188, + // 5533006.3396991230547428, 2002846.6050684414803982, + // -1239975.7277694121003151, 4133787.1656649876385927, + // 2465999.3703419454395771, -4337686.8389181373640895, + // -5741249.6292232554405928, 1762825.0474482532590628 + VLOAD_64(v8, 0xc135eb6609da26f0, 0x415d0dcef92cf900, 0xc14de8956e555998, + 0xc160c3290a7c524f, 0xc14ee2ea1b60a4b6, 0x415dff00abf6d88c, + 0x415981aa13aad12e, 0x415ab8199c7cc4c8, 0x41551b5395bda164, + 0x413e8f9e9ae5c3f0, 0xc132eba7ba4f18a0, 0x414f89cd953482a4, + 0x4142d067af675d68, 0xc1508c05b5b0d5b3, 0xc155e6b06845319e, + 0x413ae6090c259198); + // -181636.6228598635643721 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xc1062c24fb9df3c0); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + asm volatile("vfrdiv.vf v16, v8, %[A], v0.t" ::[A] "f"(dscalar_64)); + // 0.0000000000000000, -0.0238483573164265, 0.0000000000000000, + // 0.0206680130126992, 0.0000000000000000, + // -0.0230992907800273, 0.0000000000000000, + // -0.0259322982659703, 0.0000000000000000, + // -0.0906892332144711, 0.0000000000000000, + // -0.0439395197625382, 0.0000000000000000, + // 0.0418740747326899, 0.0000000000000000, -0.1030372373723578 + VCMP_U64(6, v16, 0x0, 0xbf986bb42af3122b, 0x0, 0x3f9529fedfd9f42e, 0x0, + 0xbf97a75729d81370, 0x0, 0xbf9a8dff13d98f11, 0x0, 0xbfb73768dac16680, + 0x0, 0xbfa67f3da0c39bb5, 0x0, 0x3fa57084cb0de853, 0x0, + 0xbfba60a5fcc8d2be); +}; + +int main(void) { + enable_vec(); + enable_fp(); + // Change RM to RTZ since there are issues with FDIV + RNE in fpnew + // Update: there are issues also with RTZ... + CHANGE_RM(RM_RTZ); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfredmax.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfredmax.c new file mode 100644 index 000000000..985fbf8b9 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfredmax.c @@ -0,0 +1,348 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Xiaorui Yin +// Date: 2022/05/03 + +#include "float_macros.h" +#include "vector_macros.h" + +// Naive test +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v4, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v6, 0x3c00); + asm volatile("vfredmax.vs v2, v4, v6"); + VCMP_U16(1, v2, 0x4800); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v12, 0x3F800000); + asm volatile("vfredmax.vs v4, v8, v12"); + VCMP_U32(2, v4, 0x41000000); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000); + asm volatile("vfredmax.vs v8, v16, v24"); + VCMP_U64(3, v8, 0x4020000000000000); + + // Super lang vector length + VSET(32, e32, m8); + VLOAD_32( + v16, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, 0x40400000, + 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, 0x41000000, + + 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, + 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, + 0x40A00000, 0x40C00000, 0x40E00000, 0x41000000); + + // 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, + // 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, + // 0x40A00000, 0x40C00000, 0x40E00000, 0x41000000, + + // 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, + // 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, + // 0x40A00000, 0x40C00000, 0x40E00000, 0x41000000); + VLOAD_32(v24, 0x3F800000); + asm volatile("vfredmax.vs v8, v16, v24"); + VCMP_U32(4, v8, 0x41000000); +} + +// Masked naive test +void TEST_CASE2(void) { + VSET(16, e16, m2); + VLOAD_8(v0, 0xaa, 0x55); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v4, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v6, 0x3c00); + asm volatile("vfredmax.vs v2, v4, v6, v0.t"); + VCMP_U16(5, v2, 0x4800); + + VSET(16, e32, m4); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_32(v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v12, 0x3F800000); + asm volatile("vfredmax.vs v8, v8, v12, v0.t"); + VCMP_U32(6, v8, 0x41000000); + + VSET(16, e64, m8); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000); + asm volatile("vfredmax.vs v8, v16, v24, v0.t"); + VCMP_U64(7, v8, 0x4020000000000000); +} + +// Are we respecting the undisturbed tail policy? +void TEST_CASE3(void) { + VSET(16, e16, m2); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v4, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v6, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v2, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + asm volatile("vfredmax.vs v2, v4, v6"); + VCMP_U16(8, v2, 0x4800, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, + 0x4800, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, + 0x4800); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v12, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v4, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + asm volatile("vfredmax.vs v4, v8, v12"); + VCMP_U32(9, v4, 0x41000000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredmax.vs v8, v16, v24"); + VCMP_U64(10, v8, 0x4020000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); +} + +// Odd number of elements, undisturbed policy +void TEST_CASE4(void) { + VSET(1, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredmax.vs v8, v16, v24"); + VCMP_U64(11, v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + + VSET(3, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredmax.vs v8, v16, v24"); + VCMP_U64(12, v8, 0x4008000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3ff0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + + VSET(7, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredmax.vs v8, v16, v24"); + VCMP_U64(13, v8, 0x401C000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3ff0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + + VSET(15, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredmax.vs v8, v16, v24"); + VCMP_U64(14, v8, 0x4020000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); +} + +// Odd number of elements, undisturbed policy, and mask +void TEST_CASE5(void) { + VSET(7, e16, m2); + VLOAD_8(v0, 0x00, 0xff); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v4, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v6, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v2, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + asm volatile("vfredmax.vs v2, v4, v6, v0.t"); + VCMP_U16(15, v2, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, + 0x4800, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, + 0x4800); + + VSET(1, e32, m4); + VLOAD_8(v0, 0xff, 0x00); + VLOAD_32(v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v12, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v4, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + asm volatile("vfredmax.vs v4, v8, v12, v0.t"); + VCMP_U32(16, v4, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + + VSET(3, e64, m8); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredmax.vs v8, v16, v24, v0.t"); + VCMP_U64(17, v8, 0x4000000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfredmin.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfredmin.c new file mode 100644 index 000000000..eb629bd06 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfredmin.c @@ -0,0 +1,350 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Xiaorui Yin +// Date: 2022/05/03 + +#include "float_macros.h" +#include "vector_macros.h" + +// Naive test +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v4, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v6, 0x3c00); + asm volatile("vfredmin.vs v2, v4, v6"); + VCMP_U16(1, v2, 0x3c00); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v12, 0x3F800000); + asm volatile("vfredmin.vs v4, v8, v12"); + VCMP_U32(2, v4, 0x3F800000); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000); + asm volatile("vfredmin.vs v8, v16, v24"); + VCMP_U64(3, v8, 0x3FF0000000000000); + + // Super lang vector length + VSET(32, e32, m8); + VLOAD_32( + v16, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, 0x40400000, + 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, 0x41000000, + + 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, + 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, + 0x40A00000, 0x40C00000, 0x40E00000, 0x41000000); + + // 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + // 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + // 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + // 0x41000000, + // + // 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + // 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + // 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + // 0x41000000); + VLOAD_32(v24, 0x3F800000); + asm volatile("vfredmin.vs v8, v16, v24"); + VCMP_U32(4, v8, 0x3F800000); +} + +// Masked naive test +void TEST_CASE2(void) { + VSET(16, e16, m2); + VLOAD_8(v0, 0xaa, 0x55); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v4, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v6, 0x3c00); + asm volatile("vfredmin.vs v2, v4, v6, v0.t"); + VCMP_U16(5, v2, 0x3c00); + + VSET(16, e32, m4); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_32(v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v12, 0x3F800000); + asm volatile("vfredmin.vs v4, v8, v16, v0.t"); + VCMP_U32(6, v4, 0x3F800000); + + VSET(16, e64, m8); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000); + asm volatile("vfredmin.vs v8, v16, v24, v0.t"); + VCMP_U64(7, v8, 0x3FF0000000000000); +} + +// Are we respecting the undisturbed tail policy? +void TEST_CASE3(void) { + VSET(16, e16, m2); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v4, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v6, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v2, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + asm volatile("vfredmin.vs v2, v4, v6"); + VCMP_U16(8, v2, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, + 0x4800, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, + 0x4800); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v12, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v4, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + asm volatile("vfredmin.vs v4, v8, v12"); + VCMP_U32(9, v4, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredmin.vs v8, v16, v24"); + VCMP_U64(10, v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); +} + +// Odd number of elements, undisturbed policy +void TEST_CASE4(void) { + VSET(1, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredmin.vs v8, v16, v24"); + VCMP_U64(11, v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + + VSET(3, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredmin.vs v8, v16, v24"); + VCMP_U64(12, v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3ff0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + + VSET(7, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredmin.vs v8, v16, v24"); + VCMP_U64(13, v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3ff0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + + VSET(15, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredmin.vs v8, v16, v24"); + VCMP_U64(14, v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); +} + +// Odd number of elements, undisturbed policy, and mask +void TEST_CASE5(void) { + VSET(7, e16, m2); + VLOAD_8(v0, 0x00, 0xff); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v4, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v6, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v2, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + asm volatile("vfredmin.vs v2, v4, v6, v0.t"); + VCMP_U16(15, v2, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, + 0x4800, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, + 0x4800); + + VSET(1, e32, m4); + VLOAD_8(v0, 0xff, 0x00); + VLOAD_32(v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v12, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v4, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + asm volatile("vfredmin.vs v4, v8, v12, v0.t"); + VCMP_U32(16, v4, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + + VSET(3, e64, m8); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredmin.vs v8, v16, v24, v0.t"); + VCMP_U64(17, v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfredosum.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfredosum.c new file mode 100644 index 000000000..bd4d61467 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfredosum.c @@ -0,0 +1,348 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Xiaorui Yin +// Date: 2022/05/03 + +#include "float_macros.h" +#include "vector_macros.h" + +// Naive test +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v4, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v6, 0x3c00); + asm volatile("vfredosum.vs v2, v4, v6"); + VCMP_U16(1, v2, 0x5490); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v12, 0x3F800000); + asm volatile("vfredosum.vs v4, v8, v12"); + VCMP_U32(2, v4, 0x42920000); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000); + asm volatile("vfredosum.vs v8, v16, v24"); + VCMP_U64(3, v8, 0x4052400000000000); + + // Super lang vector length + VSET(32, e32, m8); + VLOAD_32( + v16, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, 0x40400000, + 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, 0x41000000, + + 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, + 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, + 0x40A00000, 0x40C00000, 0x40E00000, 0x41000000); + + // 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, + // 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, + // 0x40A00000, 0x40C00000, 0x40E00000, 0x41000000, + + // 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, + // 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, + // 0x40A00000, 0x40C00000, 0x40E00000, 0x41000000); + VLOAD_32(v24, 0x3F800000); + asm volatile("vfredosum.vs v8, v16, v24"); + VCMP_U32(4, v8, 0x43110000); +} + +// Masked naive test +void TEST_CASE2(void) { + VSET(16, e16, m2); + VLOAD_8(v0, 0xaa, 0x55); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v4, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v6, 0x3c00); + asm volatile("vfredosum.vs v2, v4, v6, v0.t"); + VCMP_U16(5, v2, 0x50A0); + + VSET(16, e32, m4); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_32(v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v12, 0x3F800000); + asm volatile("vfredosum.vs v4, v8, v12, v0.t"); + VCMP_U32(6, v4, 0x42140000); + + VSET(16, e64, m8); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000); + asm volatile("vfredosum.vs v8, v16, v24, v0.t"); + VCMP_U64(7, v8, 0x4042800000000000); +} + +// Are we respecting the undisturbed tail policy? +void TEST_CASE3(void) { + VSET(16, e16, m2); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v4, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v6, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v2, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + asm volatile("vfredosum.vs v2, v4, v6"); + VCMP_U16(8, v2, 0x5490, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, + 0x4800, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, + 0x4800); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v12, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v4, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + asm volatile("vfredosum.vs v4, v8, v12"); + VCMP_U32(9, v4, 0x42920000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredosum.vs v8, v16, v24"); + VCMP_U64(10, v8, 0x4052400000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); +} + +// Odd number of elements, undisturbed policy +void TEST_CASE4(void) { + VSET(1, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredosum.vs v8, v16, v24"); + VCMP_U64(11, v8, 0x4000000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + + VSET(3, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredosum.vs v8, v16, v24"); + VCMP_U64(12, v8, 0x401C000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3ff0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + + VSET(7, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredosum.vs v8, v16, v24"); + VCMP_U64(13, v8, 0x403d000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3ff0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + + VSET(15, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredosum.vs v8, v16, v24"); + VCMP_U64(14, v8, 0x4050400000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); +} + +// Odd number of elements, undisturbed policy, and mask +void TEST_CASE5(void) { + VSET(7, e16, m2); + VLOAD_8(v0, 0x00, 0xff); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v4, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v6, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v2, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + asm volatile("vfredosum.vs v2, v4, v6, v0.t"); + VCMP_U16(15, v2, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, + 0x4800, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, + 0x4800); + + VSET(1, e32, m4); + VLOAD_8(v0, 0xff, 0x00); + VLOAD_32(v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v12, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v4, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + asm volatile("vfredosum.vs v4, v8, v12, v0.t"); + VCMP_U32(16, v4, 0x40000000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + + VSET(3, e64, m8); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredosum.vs v8, v16, v24, v0.t"); + VCMP_U64(17, v8, 0x4008000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfredusum.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfredusum.c new file mode 100644 index 000000000..e019d3787 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfredusum.c @@ -0,0 +1,352 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Xiaorui Yin +// Date: 2022/05/03 + +#include "float_macros.h" +#include "vector_macros.h" + +// Naive test +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v4, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v6, 0x3c00); + asm volatile("vfredsum.vs v2, v4, v6"); + VCMP_U16(1, v2, 0x5490); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v12, 0x3F800000); + asm volatile("vfredsum.vs v4, v8, v12"); + VCMP_F32(2, v4, 0x42920000); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000); + asm volatile("vfredsum.vs v8, v16, v24"); + VCMP_F64(3, v8, 0x4052400000000000); + + // Super lang vector length + // VSET(64, e32, m8); + VSET(32, e32, m8); + VLOAD_32( + v16, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, 0x40400000, + 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, 0x41000000, + + 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, + 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, + 0x40A00000, 0x40C00000, 0x40E00000, 0x41000000); + + // 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + // 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + // 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + // 0x41000000, + // + // 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + // 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + // 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + // 0x41000000); + VLOAD_32(v24, 0x3F800000); + asm volatile("vfredsum.vs v8, v16, v24"); + // VCMP_F32(4, v8, 0x43908000); + VCMP_F32(4, v8, 0x43110000); +} + +// Masked naive test +void TEST_CASE2(void) { + VSET(16, e16, m2); + VLOAD_8(v0, 0xaa, 0x55); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v4, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v6, 0x3c00); + asm volatile("vfredsum.vs v2, v4, v6, v0.t"); + VCMP_U16(5, v2, 0x50A0); + + VSET(16, e32, m4); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_32(v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v12, 0x3F800000); + asm volatile("vfredsum.vs v4, v8, v12, v0.t"); + VCMP_F32(6, v4, 0x42140000); + + VSET(16, e64, m8); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000); + asm volatile("vfredsum.vs v8, v16, v24, v0.t"); + VCMP_F64(7, v8, 0x4042800000000000); +} + +// Are we respecting the undisturbed tail policy? +void TEST_CASE3(void) { + VSET(16, e16, m2); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v4, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v6, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v2, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + asm volatile("vfredsum.vs v2, v4, v6"); + VCMP_U16(8, v2, 0x5490, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, + 0x4800, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, + 0x4800); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v12, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v4, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + asm volatile("vfredsum.vs v4, v8, v12"); + VCMP_F32(9, v4, 0x42920000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredsum.vs v8, v16, v24"); + VCMP_F64(10, v8, 0x4052400000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); +} + +// Odd number of elements, undisturbed policy +void TEST_CASE4(void) { + VSET(1, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredsum.vs v8, v16, v24"); + VCMP_F64(11, v8, 0x4000000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + + VSET(3, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredsum.vs v8, v16, v24"); + VCMP_F64(12, v8, 0x401C000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3ff0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + + VSET(7, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredsum.vs v8, v16, v24"); + VCMP_F64(13, v8, 0x403d000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3ff0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + + VSET(15, e64, m8); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredsum.vs v8, v16, v24"); + VCMP_F64(14, v8, 0x4050400000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); +} + +// Odd number of elements, undisturbed policy, and mask +void TEST_CASE5(void) { + VSET(7, e16, m2); + VLOAD_8(v0, 0x00, 0xff); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v4, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v6, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_16(v2, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + asm volatile("vfredsum.vs v2, v4, v6, v0.t"); + VCMP_U16(15, v2, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, + 0x4800, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, + 0x4800); + + VSET(1, e32, m4); + VLOAD_8(v0, 0xff, 0x00); + VLOAD_32(v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v12, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v4, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + asm volatile("vfredsum.vs v4, v8, v12, v0.t"); + VCMP_F32(16, v4, 0x40000000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + + VSET(3, e64, m8); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v24, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfredsum.vs v8, v16, v24, v0.t"); + VCMP_F64(17, v8, 0x4008000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfrsub.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfrsub.c new file mode 100644 index 000000000..68d22b5ad --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfrsub.c @@ -0,0 +1,167 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values (vector-scalar) +void TEST_CASE1(void) { + VSET(16, e16, m2); + // -0.0273, -0.8511, 0.7173, 0.9551, -0.7842, -0.6509, -0.5771, + // 0.6060, -0.5361, 0.6099, 0.2859, 0.6318, -0.9521, 0.3818, + // 0.2783, -0.7905 + VLOAD_16(v4, 0xa700, 0xbacf, 0x39bd, 0x3ba4, 0xba46, 0xb935, 0xb89e, 0x38d9, + 0xb84a, 0x38e1, 0x3493, 0x390e, 0xbb9e, 0x361c, 0x3474, 0xba53); + double dscalar_16; + // 0.3062 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x34e6); + asm volatile("vfrsub.vf v2, v4, %[A]" ::[A] "f"(dscalar_16)); + // 0.3335, 1.1572, -0.4111, -0.6489, 1.0898, 0.9570, 0.8833, + // -0.2998, 0.8423, -0.3037, 0.0203, -0.3257, 1.2578, + // -0.0757, 0.0278, 1.0967 + VCMP_U16(1, v2, 0x3556, 0x3ca1, 0xb694, 0xb931, 0x3c5c, 0x3ba8, 0x3b11, + 0xb4cc, 0x3abd, 0xb4dc, 0x2530, 0xb536, 0x3d08, 0xacd8, 0x2720, + 0x3c63); + + VSET(16, e32, m4); + // 0.61218858, 0.50298065, 0.82400811, -0.50508654, + // -0.08447543, -0.66344708, -0.94741052, 0.85856712, + // -0.16725175, -0.36700448, -0.86911696, 0.82600677, + // -0.95377433, 0.06016647, 0.67027277, 0.08167093 + VLOAD_32(v8, 0x3f1cb864, 0x3f00c357, 0x3f52f232, 0xbf014d5a, 0xbdad0174, + 0xbf29d7ab, 0xbf72897f, 0x3f5bcb0e, 0xbe2b440b, 0xbebbe803, + 0xbf5e7e73, 0x3f53752e, 0xbf742a8e, 0x3d76711d, 0x3f2b96ff, + 0x3da74316); + double dscalar_32; + // -0.78482366 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbf48ea34); + asm volatile("vfrsub.vf v4, v8, %[A]" ::[A] "f"(dscalar_32)); + // -1.39701223, -1.28780437, -1.60883176, -0.27973711, + // -0.70034826, -0.12137657, 0.16258687, -1.64339077, + // -0.61757189, -0.41781917, 0.08429331, -1.61083043, + // 0.16895068, -0.84499013, -1.45509648, -0.86649460 + VCMP_U32(2, v4, 0xbfb2d14c, 0xbfa4d6c6, 0xbfcdee33, 0xbe8f39b4, 0xbf334a06, + 0xbdf89448, 0x3e267d2c, 0xbfd25aa1, 0xbf1e1931, 0xbed5ec65, + 0x3daca1f8, 0xbfce2fb1, 0x3e2d0168, 0xbf585146, 0xbfba409a, + 0xbf5dd297); + + VSET(16, e64, m8); + // -0.0920900511004143, 0.2386858516984947, 0.7068975504949517, + // 0.5997172971219242, 0.7714780386644180, -0.1053493184316212, + // 0.8711121216121871, -0.7388672665065719, 0.0889924652556937, + // 0.3266446452514173, -0.5909707717470494, -0.2733520923877579, + // 0.2365505631181986, 0.9616545156279142, -0.9315790291358075, + // -0.8056559777055108 + VLOAD_64(v16, 0xbfb79336adc36440, 0x3fce8d420b880e70, 0x3fe69ee79c9ff24a, + 0x3fe330e2543f7e66, 0x3fe8aff2b634ab34, 0xbfbaf82c4551d810, + 0x3febe026872f2710, 0xbfe7a4ccf737616c, 0x3fb6c835cfdd1640, + 0x3fd4e7bef1312ccc, 0xbfe2e93b89317464, 0xbfd17e99c6464f50, + 0x3fce4749f238b5c0, 0x3feec5dfb0d5860a, 0xbfedcf7ed2f8e31e, + 0xbfe9c7ef0b824e6e); + double dscalar_64; + // -0.4500891854782252 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfdcce42defa6264); + asm volatile("vfrsub.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); + // -0.3579991343778108, -0.6887750371767198, -1.1569867359731769, + // -1.0498064826001494, -1.2215672241426432, -0.3447398670466040, + // -1.3212013070904123, 0.2887780810283467, -0.5390816507339189, + // -0.7767338307296425, 0.1408815862688242, -0.1767370930904673, + // -0.6866397485964237, -1.4117437011061393, 0.4814898436575823, + // 0.3555667922272856 + VCMP_U64(3, v8, 0xbfd6e97533898954, 0xbfe60a71f25f34ce, 0xbff28304860e91be, + 0xbff0cc01e1de57cc, 0xbff38b8a12d8ee33, 0xbfd61037cda5ec60, + 0xbff523a3fb562c21, 0x3fd27b570f746074, 0xbfe140282978d3fa, + 0xbfe8db00e815c798, 0x3fc2086866d10cc8, 0xbfc69f5231682628, + 0xbfe5f8f3ec0b5ea2, 0xbff6968090295b9e, 0x3fded0bac6f763d8, + 0x3fd6c19b380a3a78); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE2(void) { + VSET(16, e16, m2); + // -0.0273, -0.8511, 0.7173, 0.9551, -0.7842, -0.6509, + // -0.5771, 0.6060, -0.5361, 0.6099, 0.2859, 0.6318, + // -0.9521, 0.3818, 0.2783, -0.7905 + VLOAD_16(v4, 0xa700, 0xbacf, 0x39bd, 0x3ba4, 0xba46, 0xb935, 0xb89e, 0x38d9, + 0xb84a, 0x38e1, 0x3493, 0x390e, 0xbb9e, 0x361c, 0x3474, 0xba53); + double dscalar_16; + // 0.3062 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x34e6); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfrsub.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + // 0.0000, 1.1572, 0.0000, -0.6489, 0.0000, 0.9570, 0.0000, + // -0.2998, 0.0000, -0.3037, 0.0000, -0.3257, 0.0000, + // -0.0757, 0.0000, 1.0967 + VCMP_U16(4, v2, 0x0, 0x3ca1, 0x0, 0xb931, 0x0, 0x3ba8, 0x0, 0xb4cc, 0x0, + 0xb4dc, 0x0, 0xb536, 0x0, 0xacd8, 0x0, 0x3c63); + + VSET(16, e32, m4); + // 0.61218858, 0.50298065, 0.82400811, -0.50508654, + // -0.08447543, -0.66344708, -0.94741052, 0.85856712, + // -0.16725175, -0.36700448, -0.86911696, 0.82600677, + // -0.95377433, 0.06016647, 0.67027277, 0.08167093 + VLOAD_32(v8, 0x3f1cb864, 0x3f00c357, 0x3f52f232, 0xbf014d5a, 0xbdad0174, + 0xbf29d7ab, 0xbf72897f, 0x3f5bcb0e, 0xbe2b440b, 0xbebbe803, + 0xbf5e7e73, 0x3f53752e, 0xbf742a8e, 0x3d76711d, 0x3f2b96ff, + 0x3da74316); + double dscalar_32; + // -0.78482366 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbf48ea34); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfrsub.vf v4, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + // 0.00000000, -1.28780437, 0.00000000, -0.27973711, + // 0.00000000, -0.12137657, 0.00000000, -1.64339077, + // 0.00000000, -0.41781917, 0.00000000, -1.61083043, + // 0.00000000, -0.84499013, 0.00000000, -0.86649460 + VCMP_U32(5, v4, 0x0, 0xbfa4d6c6, 0x0, 0xbe8f39b4, 0x0, 0xbdf89448, 0x0, + 0xbfd25aa1, 0x0, 0xbed5ec65, 0x0, 0xbfce2fb1, 0x0, 0xbf585146, 0x0, + 0xbf5dd297); + + VSET(16, e64, m8); + // -0.0920900511004143, 0.2386858516984947, + // 0.7068975504949517, 0.5997172971219242, 0.7714780386644180, + // -0.1053493184316212, 0.8711121216121871, + // -0.7388672665065719, 0.0889924652556937, + // 0.3266446452514173, -0.5909707717470494, + // -0.2733520923877579, 0.2365505631181986, + // 0.9616545156279142, -0.9315790291358075, -0.8056559777055108 + VLOAD_64(v16, 0xbfb79336adc36440, 0x3fce8d420b880e70, 0x3fe69ee79c9ff24a, + 0x3fe330e2543f7e66, 0x3fe8aff2b634ab34, 0xbfbaf82c4551d810, + 0x3febe026872f2710, 0xbfe7a4ccf737616c, 0x3fb6c835cfdd1640, + 0x3fd4e7bef1312ccc, 0xbfe2e93b89317464, 0xbfd17e99c6464f50, + 0x3fce4749f238b5c0, 0x3feec5dfb0d5860a, 0xbfedcf7ed2f8e31e, + 0xbfe9c7ef0b824e6e); + double dscalar_64; + // -0.4500891854782252 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfdcce42defa6264); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfrsub.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); + // 0.0000000000000000, -0.6887750371767198, 0.0000000000000000, + // -1.0498064826001494, 0.0000000000000000, + // -0.3447398670466040, 0.0000000000000000, + // 0.2887780810283467, 0.0000000000000000, + // -0.7767338307296425, 0.0000000000000000, + // -0.1767370930904673, 0.0000000000000000, + // -1.4117437011061393, 0.0000000000000000, 0.3555667922272856 + VCMP_U64(6, v8, 0x0, 0xbfe60a71f25f34ce, 0x0, 0xbff0cc01e1de57cc, 0x0, + 0xbfd61037cda5ec60, 0x0, 0x3fd27b570f746074, 0x0, 0xbfe8db00e815c798, + 0x0, 0xbfc69f5231682628, 0x0, 0xbff6968090295b9e, 0x0, + 0x3fd6c19b380a3a78); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfsgnj.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfsgnj.c new file mode 100644 index 000000000..f69ea8d24 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfsgnj.c @@ -0,0 +1,408 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 0.3784, 0.9043, -0.4600, -0.6748, 0.4448, 0.8804, 0.1497, + // 0.7285, 0.9927, 0.9922, 0.8965, 0.8672, -0.1860, 0.9336, + // -0.2959, 0.9668 + VLOAD_16(v4, 0x360e, 0x3b3c, 0xb75c, 0xb966, 0x371e, 0x3b0b, 0x30ca, 0x39d4, + 0x3bf1, 0x3bf0, 0x3b2c, 0x3af0, 0xb1f4, 0x3b78, 0xb4bc, 0x3bbc); + // -0.7988, -0.5054, -0.9380, -0.7383, -0.7168, 0.2181, -0.1597, + // 0.1833, 0.0045, -0.2152, 0.1919, -0.6914, 0.1748, -0.8604, + // 0.6084, 0.1591 + VLOAD_16(v6, 0xba64, 0xb80b, 0xbb81, 0xb9e8, 0xb9bc, 0x32fb, 0xb11c, 0x31de, + 0x1c8f, 0xb2e3, 0x3224, 0xb988, 0x3198, 0xbae2, 0x38de, 0x3117); + asm volatile("vfsgnj.vv v2, v4, v6"); + // -0.3784, -0.9043, -0.4600, -0.6748, -0.4448, 0.8804, -0.1497, + // 0.7285, 0.9927, -0.9922, 0.8965, -0.8672, 0.1860, -0.9336, + // 0.2959, 0.9668 + VCMP_U16(1, v2, 0xb60e, 0xbb3c, 0xb75c, 0xb966, 0xb71e, 0x3b0b, 0xb0ca, + 0x39d4, 0x3bf1, 0xbbf0, 0x3b2c, 0xbaf0, 0x31f4, 0xbb78, 0x34bc, + 0x3bbc); + + VSET(16, e32, m4); + // 0.30226409, 0.06318295, -0.82590002, -0.17829193, + // 0.45379546, 0.85831785, -0.43186289, -0.32250872, + // 0.35404092, -0.55081791, 0.09124859, -0.13254598, + // 0.95786512, 0.95395225, 0.19890578, 0.76956910 + VLOAD_32(v8, 0x3e9ac25c, 0x3d816610, 0xbf536e2f, 0xbe369229, 0x3ee857e1, + 0x3f5bbab8, 0xbedd1d22, 0xbea51fdd, 0x3eb544da, 0xbf0d0267, + 0x3dbae08b, 0xbe07ba22, 0x3f7536a6, 0x3f743637, 0x3e4badf5, + 0x3f45027b); + // 0.06560040, 0.31805936, 0.14663234, -0.85004497, + // -0.49171701, 0.32139263, -0.09995110, -0.34368968, + // 0.33917251, 0.07372360, 0.70147520, 0.82915747, + // -0.14581841, -0.19974701, -0.58837658, 0.95794803 + VLOAD_32(v12, 0x3d865981, 0x3ea2d8ad, 0x3e1626ca, 0xbf599c8c, 0xbefbc255, + 0x3ea48d93, 0xbdccb329, 0xbeaff818, 0x3eada805, 0x3d96fc66, + 0x3f3393e1, 0x3f5443aa, 0xbe15516c, 0xbe4c8a7b, 0xbf169fd9, + 0x3f753c15); + asm volatile("vfsgnj.vv v4, v8, v12"); + // 0.30226409, 0.06318295, 0.82590002, -0.17829193, + // -0.45379546, 0.85831785, -0.43186289, -0.32250872, + // 0.35404092, 0.55081791, 0.09124859, 0.13254598, + // -0.95786512, -0.95395225, -0.19890578, 0.76956910 + VCMP_U32(2, v4, 0x3e9ac25c, 0x3d816610, 0x3f536e2f, 0xbe369229, 0xbee857e1, + 0x3f5bbab8, 0xbedd1d22, 0xbea51fdd, 0x3eb544da, 0x3f0d0267, + 0x3dbae08b, 0x3e07ba22, 0xbf7536a6, 0xbf743637, 0xbe4badf5, + 0x3f45027b); + + VSET(16, e64, m8); + // -0.1900636538602862, -0.9484843154859770, 0.5869658512198073, + // 0.4707187701595239, 0.1954104859873083, 0.0486819373954939, + // -0.1899986048192088, -0.1837438621239862, 0.2694105234528963, + // -0.7960262036276018, 0.6381040017115214, 0.2199215324293253, + // 0.4219965521278597, -0.6541697303087526, 0.7254411745966671, + // 0.2439726910863504 + VLOAD_64(v16, 0xbfc854017cbe7d20, 0xbfee59fbc778ffbc, 0x3fe2c86c9bdb73b4, + 0x3fde20419edcb428, 0x3fc90335f74e33c8, 0x3fa8ecd6c20a0480, + 0xbfc851dfd0fdf7f8, 0xbfc784eb3b54e580, 0x3fd13e05a2db6b68, + 0xbfe9790bf1eadde4, 0x3fe46b59155986dc, 0x3fcc266386bc2e10, + 0x3fdb01fdd39a7d9c, 0xbfe4eef55bb6b208, 0x3fe736d06902107a, + 0x3fcf3a7f44aa9f48); + // 0.0713540199640168, 0.3499800646587572, -0.5478360240866667, + // -0.7324007835973676, 0.5646664961108800, + // -0.7430380608733607, -0.5676032662558192, + // -0.7382565525776155, -0.7933198466305424, + // -0.0650991402083496, -0.1766522935757786, + // -0.4663829943595241, -0.1565231028144627, + // -0.0629224333525875, -0.9086692399439535, -0.1206057821437510 + VLOAD_64(v24, 0x3fb24441ce2eff50, 0x3fd66612c8fd8664, 0xbfe187df69e0bb9c, + 0xbfe76fd3c4a3b1e8, 0x3fe211bf78be2e36, 0xbfe7c6f7c1644c86, + 0xbfe229ce53357d20, 0xbfe79fcc34ac1d30, 0xbfe962e04d917824, + 0xbfb0aa5656314cf0, 0xbfc69c8ad7d5ef20, 0xbfddd9380f0bd244, + 0xbfc408f2f3d40a40, 0xbfb01baf416f2160, 0xbfed13d1838e183a, + 0xbfbee005420412c0); + asm volatile("vfsgnj.vv v8, v16, v24"); + // 0.1900636538602862, 0.9484843154859770, -0.5869658512198073, + // -0.4707187701595239, 0.1954104859873083, + // -0.0486819373954939, -0.1899986048192088, + // -0.1837438621239862, -0.2694105234528963, + // -0.7960262036276018, -0.6381040017115214, + // -0.2199215324293253, -0.4219965521278597, + // -0.6541697303087526, -0.7254411745966671, -0.2439726910863504 + VCMP_U64(3, v8, 0x3fc854017cbe7d20, 0x3fee59fbc778ffbc, 0xbfe2c86c9bdb73b4, + 0xbfde20419edcb428, 0x3fc90335f74e33c8, 0xbfa8ecd6c20a0480, + 0xbfc851dfd0fdf7f8, 0xbfc784eb3b54e580, 0xbfd13e05a2db6b68, + 0xbfe9790bf1eadde4, 0xbfe46b59155986dc, 0xbfcc266386bc2e10, + 0xbfdb01fdd39a7d9c, 0xbfe4eef55bb6b208, 0xbfe736d06902107a, + 0xbfcf3a7f44aa9f48); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // 0.3784, 0.9043, -0.4600, -0.6748, 0.4448, 0.8804, 0.1497, + // 0.7285, 0.9927, 0.9922, 0.8965, 0.8672, -0.1860, 0.9336, + // -0.2959, 0.9668 + VLOAD_16(v4, 0x360e, 0x3b3c, 0xb75c, 0xb966, 0x371e, 0x3b0b, 0x30ca, 0x39d4, + 0x3bf1, 0x3bf0, 0x3b2c, 0x3af0, 0xb1f4, 0x3b78, 0xb4bc, 0x3bbc); + // -0.7988, -0.5054, -0.9380, -0.7383, -0.7168, 0.2181, -0.1597, + // 0.1833, 0.0045, -0.2152, 0.1919, -0.6914, 0.1748, -0.8604, + // 0.6084, 0.1591 + VLOAD_16(v6, 0xba64, 0xb80b, 0xbb81, 0xb9e8, 0xb9bc, 0x32fb, 0xb11c, 0x31de, + 0x1c8f, 0xb2e3, 0x3224, 0xb988, 0x3198, 0xbae2, 0x38de, 0x3117); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfsgnj.vv v2, v4, v6, v0.t"); + // 0.0000, -0.9043, 0.0000, -0.6748, 0.0000, 0.8804, 0.0000, + // 0.7285, 0.0000, -0.9922, 0.0000, -0.8672, 0.0000, -0.9336, + // 0.0000, 0.9668 + VCMP_U16(4, v2, 0x0, 0xbb3c, 0x0, 0xb966, 0x0, 0x3b0b, 0x0, 0x39d4, 0x0, + 0xbbf0, 0x0, 0xbaf0, 0x0, 0xbb78, 0x0, 0x3bbc); + + VSET(16, e32, m4); + // 0.30226409, 0.06318295, -0.82590002, -0.17829193, + // 0.45379546, 0.85831785, -0.43186289, -0.32250872, + // 0.35404092, -0.55081791, 0.09124859, -0.13254598, + // 0.95786512, 0.95395225, 0.19890578, 0.76956910 + VLOAD_32(v8, 0x3e9ac25c, 0x3d816610, 0xbf536e2f, 0xbe369229, 0x3ee857e1, + 0x3f5bbab8, 0xbedd1d22, 0xbea51fdd, 0x3eb544da, 0xbf0d0267, + 0x3dbae08b, 0xbe07ba22, 0x3f7536a6, 0x3f743637, 0x3e4badf5, + 0x3f45027b); + // 0.06560040, 0.31805936, 0.14663234, -0.85004497, + // -0.49171701, 0.32139263, -0.09995110, -0.34368968, + // 0.33917251, 0.07372360, 0.70147520, 0.82915747, + // -0.14581841, -0.19974701, -0.58837658, 0.95794803 + VLOAD_32(v12, 0x3d865981, 0x3ea2d8ad, 0x3e1626ca, 0xbf599c8c, 0xbefbc255, + 0x3ea48d93, 0xbdccb329, 0xbeaff818, 0x3eada805, 0x3d96fc66, + 0x3f3393e1, 0x3f5443aa, 0xbe15516c, 0xbe4c8a7b, 0xbf169fd9, + 0x3f753c15); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfsgnj.vv v4, v8, v12, v0.t"); + // 0.00000000, 0.06318295, 0.00000000, -0.17829193, + // 0.00000000, 0.85831785, 0.00000000, -0.32250872, + // 0.00000000, 0.55081791, 0.00000000, 0.13254598, + // 0.00000000, -0.95395225, 0.00000000, 0.76956910 + VCMP_U32(5, v4, 0x0, 0x3d816610, 0x0, 0xbe369229, 0x0, 0x3f5bbab8, 0x0, + 0xbea51fdd, 0x0, 0x3f0d0267, 0x0, 0x3e07ba22, 0x0, 0xbf743637, 0x0, + 0x3f45027b); + + VSET(16, e64, m8); + // -0.1900636538602862, -0.9484843154859770, 0.5869658512198073, + // 0.4707187701595239, 0.1954104859873083, 0.0486819373954939, + // -0.1899986048192088, -0.1837438621239862, 0.2694105234528963, + // -0.7960262036276018, 0.6381040017115214, 0.2199215324293253, + // 0.4219965521278597, -0.6541697303087526, 0.7254411745966671, + // 0.2439726910863504 + VLOAD_64(v16, 0xbfc854017cbe7d20, 0xbfee59fbc778ffbc, 0x3fe2c86c9bdb73b4, + 0x3fde20419edcb428, 0x3fc90335f74e33c8, 0x3fa8ecd6c20a0480, + 0xbfc851dfd0fdf7f8, 0xbfc784eb3b54e580, 0x3fd13e05a2db6b68, + 0xbfe9790bf1eadde4, 0x3fe46b59155986dc, 0x3fcc266386bc2e10, + 0x3fdb01fdd39a7d9c, 0xbfe4eef55bb6b208, 0x3fe736d06902107a, + 0x3fcf3a7f44aa9f48); + // 0.0713540199640168, 0.3499800646587572, -0.5478360240866667, + // -0.7324007835973676, 0.5646664961108800, + // -0.7430380608733607, -0.5676032662558192, + // -0.7382565525776155, -0.7933198466305424, + // -0.0650991402083496, -0.1766522935757786, + // -0.4663829943595241, -0.1565231028144627, + // -0.0629224333525875, -0.9086692399439535, -0.1206057821437510 + VLOAD_64(v24, 0x3fb24441ce2eff50, 0x3fd66612c8fd8664, 0xbfe187df69e0bb9c, + 0xbfe76fd3c4a3b1e8, 0x3fe211bf78be2e36, 0xbfe7c6f7c1644c86, + 0xbfe229ce53357d20, 0xbfe79fcc34ac1d30, 0xbfe962e04d917824, + 0xbfb0aa5656314cf0, 0xbfc69c8ad7d5ef20, 0xbfddd9380f0bd244, + 0xbfc408f2f3d40a40, 0xbfb01baf416f2160, 0xbfed13d1838e183a, + 0xbfbee005420412c0); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfsgnj.vv v8, v16, v24, v0.t"); + // 0.0000000000000000, 0.9484843154859770, 0.0000000000000000, + // -0.4707187701595239, 0.0000000000000000, + // -0.0486819373954939, 0.0000000000000000, + // -0.1837438621239862, 0.0000000000000000, + // -0.7960262036276018, 0.0000000000000000, + // -0.2199215324293253, 0.0000000000000000, + // -0.6541697303087526, 0.0000000000000000, -0.2439726910863504 + VCMP_U64(6, v8, 0x0, 0x3fee59fbc778ffbc, 0x0, 0xbfde20419edcb428, 0x0, + 0xbfa8ecd6c20a0480, 0x0, 0xbfc784eb3b54e580, 0x0, 0xbfe9790bf1eadde4, + 0x0, 0xbfcc266386bc2e10, 0x0, 0xbfe4eef55bb6b208, 0x0, + 0xbfcf3a7f44aa9f48); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // 0.9023 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x3b38); + // 0.5586, 0.0221, 0.7397, 0.9844, -0.1426, 0.6958, 0.0319, + // 0.3943, -0.5425, 0.9814, 0.7852, -0.7271, -0.1810, -0.7485, + // -0.3499, -0.2178 + VLOAD_16(v4, 0x3878, 0x25a7, 0x39eb, 0x3be0, 0xb090, 0x3991, 0x2816, 0x364f, + 0xb857, 0x3bda, 0x3a48, 0xb9d1, 0xb1cb, 0xb9fd, 0xb599, 0xb2f8); + asm volatile("vfsgnj.vf v2, v4, %[A]" ::[A] "f"(dscalar_16)); + // 0.5586, 0.0221, 0.7397, 0.9844, 0.1426, 0.6958, 0.0319, + // 0.3943, 0.5425, 0.9814, 0.7852, 0.7271, 0.1810, 0.7485, + // 0.3499, 0.2178 + VCMP_U16(7, v2, 0x3878, 0x25a7, 0x39eb, 0x3be0, 0x3090, 0x3991, 0x2816, + 0x364f, 0x3857, 0x3bda, 0x3a48, 0x39d1, 0x31cb, 0x39fd, 0x3599, + 0x32f8); + + VSET(16, e32, m4); + double dscalar_32; + // 0.64529878 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f25324d); + // 0.27794743, 0.64720273, 0.88201439, -0.27750894, + // -0.02381280, -0.27677080, -0.58998328, 0.15329099, + // 0.52908343, -0.63265759, 0.48432603, 0.70191479, + // -0.55785930, 0.34719029, -0.06872076, -0.69960916 + VLOAD_32(v8, 0x3e8e4f20, 0x3f25af14, 0x3f61cbb2, 0xbe8e15a7, 0xbcc31310, + 0xbe8db4e7, 0xbf170925, 0x3e1cf850, 0x3f077203, 0xbf21f5d9, + 0x3ef7f995, 0x3f33b0b0, 0xbf0ecfde, 0x3eb1c2ed, 0xbd8cbd78, + 0xbf331996); + asm volatile("vfsgnj.vf v4, v8, %[A]" ::[A] "f"(dscalar_32)); + // 0.27794743, 0.64720273, 0.88201439, 0.27750894, + // 0.02381280, 0.27677080, 0.58998328, 0.15329099, + // 0.52908343, 0.63265759, 0.48432603, 0.70191479, + // 0.55785930, 0.34719029, 0.06872076, 0.69960916 + VCMP_U32(8, v4, 0x3e8e4f20, 0x3f25af14, 0x3f61cbb2, 0x3e8e15a7, 0x3cc31310, + 0x3e8db4e7, 0x3f170925, 0x3e1cf850, 0x3f077203, 0x3f21f5d9, + 0x3ef7f995, 0x3f33b0b0, 0x3f0ecfde, 0x3eb1c2ed, 0x3d8cbd78, + 0x3f331996); + + VSET(16, e64, m8); + double dscalar_64; + // 0.4863995754678485 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fdf212baf5b0d68); + // 0.4577518787562838, -0.0989909265811582, + // -0.5406373582107198, -0.6896639688670565, + // 0.9053190721589099, -0.7617679756965072, 0.4649312111760273, + // 0.6917063611214438, 0.2205644023843889, 0.1217272698758698, + // -0.3345487709580650, 0.1693366988903542, + // 0.4095982059989967, 0.7157757577569959, + // -0.5339346851091937, 0.4946553559543683 + VLOAD_64(v16, 0x3fdd4bce893c3600, 0xbfb9577828444dc0, 0xbfe14ce6b790591e, + 0xbfe611ba2bf06f2a, 0x3fecf85fb3ebc33c, 0xbfe860673bd8363e, + 0x3fddc16ed6b90158, 0x3fe6227560ee74e0, 0x3fcc3b744f738cd0, + 0x3fbf2984b325f230, 0xbfd5693f3f8ba3fc, 0x3fc5acd32fdf92e8, + 0x3fda36db64d10584, 0x3fe6e7a28fdabfd2, 0xbfe115fe3157cf38, + 0x3fdfa86ef0276044); + asm volatile("vfsgnj.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); + // 0.4577518787562838, 0.0989909265811582, 0.5406373582107198, + // 0.6896639688670565, 0.9053190721589099, 0.7617679756965072, + // 0.4649312111760273, 0.6917063611214438, 0.2205644023843889, + // 0.1217272698758698, 0.3345487709580650, 0.1693366988903542, + // 0.4095982059989967, 0.7157757577569959, 0.5339346851091937, + // 0.4946553559543683 + VCMP_U64(9, v8, 0x3fdd4bce893c3600, 0x3fb9577828444dc0, 0x3fe14ce6b790591e, + 0x3fe611ba2bf06f2a, 0x3fecf85fb3ebc33c, 0x3fe860673bd8363e, + 0x3fddc16ed6b90158, 0x3fe6227560ee74e0, 0x3fcc3b744f738cd0, + 0x3fbf2984b325f230, 0x3fd5693f3f8ba3fc, 0x3fc5acd32fdf92e8, + 0x3fda36db64d10584, 0x3fe6e7a28fdabfd2, 0x3fe115fe3157cf38, + 0x3fdfa86ef0276044); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // 0.9023 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x3b38); + // 0.5586, 0.0221, 0.7397, 0.9844, -0.1426, 0.6958, 0.0319, + // 0.3943, -0.5425, 0.9814, 0.7852, -0.7271, -0.1810, + // -0.7485, -0.3499, -0.2178 + VLOAD_16(v4, 0x3878, 0x25a7, 0x39eb, 0x3be0, 0xb090, 0x3991, 0x2816, 0x364f, + 0xb857, 0x3bda, 0x3a48, 0xb9d1, 0xb1cb, 0xb9fd, 0xb599, 0xb2f8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfsgnj.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + // 0.0000, 0.0221, 0.0000, 0.9844, 0.0000, 0.6958, 0.0000, + // 0.3943, 0.0000, 0.9814, 0.0000, 0.7271, 0.0000, 0.7485, + // 0.0000, 0.2178 + VCMP_U16(10, v2, 0x0, 0x25a7, 0x0, 0x3be0, 0x0, 0x3991, 0x0, 0x364f, 0x0, + 0x3bda, 0x0, 0x39d1, 0x0, 0x39fd, 0x0, 0x32f8); + + VSET(16, e32, m4); + double dscalar_32; + // 0.64529878 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f25324d); + // 0.27794743, 0.64720273, 0.88201439, -0.27750894, + // -0.02381280, -0.27677080, -0.58998328, 0.15329099, + // 0.52908343, -0.63265759, 0.48432603, 0.70191479, + // -0.55785930, 0.34719029, -0.06872076, -0.69960916 + VLOAD_32(v8, 0x3e8e4f20, 0x3f25af14, 0x3f61cbb2, 0xbe8e15a7, 0xbcc31310, + 0xbe8db4e7, 0xbf170925, 0x3e1cf850, 0x3f077203, 0xbf21f5d9, + 0x3ef7f995, 0x3f33b0b0, 0xbf0ecfde, 0x3eb1c2ed, 0xbd8cbd78, + 0xbf331996); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfsgnj.vf v4, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + // 0.00000000, 0.64720273, 0.00000000, 0.27750894, + // 0.00000000, 0.27677080, 0.00000000, 0.15329099, + // 0.00000000, 0.63265759, 0.00000000, 0.70191479, + // 0.00000000, 0.34719029, 0.00000000, 0.69960916 + VCMP_U32(11, v4, 0x0, 0x3f25af14, 0x0, 0x3e8e15a7, 0x0, 0x3e8db4e7, 0x0, + 0x3e1cf850, 0x0, 0x3f21f5d9, 0x0, 0x3f33b0b0, 0x0, 0x3eb1c2ed, 0x0, + 0x3f331996); + + VSET(16, e64, m8); + double dscalar_64; + // 0.4863995754678485 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fdf212baf5b0d68); + // 0.4577518787562838, -0.0989909265811582, + // -0.5406373582107198, -0.6896639688670565, + // 0.9053190721589099, -0.7617679756965072, + // 0.4649312111760273, 0.6917063611214438, + // 0.2205644023843889, 0.1217272698758698, + // -0.3345487709580650, 0.1693366988903542, + // 0.4095982059989967, 0.7157757577569959, + // -0.5339346851091937, 0.4946553559543683 + VLOAD_64(v16, 0x3fdd4bce893c3600, 0xbfb9577828444dc0, 0xbfe14ce6b790591e, + 0xbfe611ba2bf06f2a, 0x3fecf85fb3ebc33c, 0xbfe860673bd8363e, + 0x3fddc16ed6b90158, 0x3fe6227560ee74e0, 0x3fcc3b744f738cd0, + 0x3fbf2984b325f230, 0xbfd5693f3f8ba3fc, 0x3fc5acd32fdf92e8, + 0x3fda36db64d10584, 0x3fe6e7a28fdabfd2, 0xbfe115fe3157cf38, + 0x3fdfa86ef0276044); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfsgnj.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); + // 0.0000000000000000, 0.0989909265811582, 0.0000000000000000, + // 0.6896639688670565, 0.0000000000000000, 0.7617679756965072, + // 0.0000000000000000, 0.6917063611214438, 0.0000000000000000, + // 0.1217272698758698, 0.0000000000000000, 0.1693366988903542, + // 0.0000000000000000, 0.7157757577569959, 0.0000000000000000, + // 0.4946553559543683 + VCMP_U64(12, v8, 0x0, 0x3fb9577828444dc0, 0x0, 0x3fe611ba2bf06f2a, 0x0, + 0x3fe860673bd8363e, 0x0, 0x3fe6227560ee74e0, 0x0, 0x3fbf2984b325f230, + 0x0, 0x3fc5acd32fdf92e8, 0x0, 0x3fe6e7a28fdabfd2, 0x0, + 0x3fdfa86ef0276044); +}; + +// The sign injection should work with NaNs and special values, and should not +// raise any exceptions +void TEST_CASE5(void) { + CLEAR_FFLAGS; + VSET(16, e16, m2); + CHECK_FFLAGS(0); + VLOAD_16(v4, 0x0000, 0x3b3c, 0xb75c, 0x7fff, 0x371e, 0x3b0b, 0x30ca, 0x39d4, + 0x3bf1, 0x3bf0, 0x0000, 0x3af0, 0xb1f4, 0x3b78, 0xb4bc, 0x3bbc); + VLOAD_16(v6, 0x8000, 0xffff, 0xffff, 0xb9e8, 0xb9bc, 0x7fff, 0xb11c, 0x31de, + 0x1c8f, 0xb2e3, 0x7fff, 0xb988, 0x3198, 0xbae2, 0x38de, 0x3117); + asm volatile("vfsgnj.vv v2, v4, v6"); + VCMP_U16(13, v2, 0x8000, 0xbb3c, 0xb75c, 0xffff, 0xb71e, 0x3b0b, 0xb0ca, + 0x39d4, 0x3bf1, 0xbbf0, 0x0000, 0xbaf0, 0x31f4, 0xbb78, 0x34bc, + 0x3bbc); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x00000000, 0x3d816610, 0xbf536e2f, 0xbe369229, 0x3ee857e1, + 0x7fffffff, 0x80000000, 0xbea51fdd, 0x3eb544da, 0xbf0d0267, + 0x3dbae08b, 0xbe07ba22, 0x3f7536a6, 0x3f743637, 0x3e4badf5, + 0x3f45027b); + VLOAD_32(v12, 0x80000000, 0x7fffffff, 0x3e1626ca, 0xffffffff, 0xbefbc255, + 0x7fffffff, 0xffffffff, 0xbeaff818, 0x3eada805, 0x3d96fc66, + 0x3f3393e1, 0x3f5443aa, 0xbe15516c, 0xbe4c8a7b, 0xbf169fd9, + 0x3f753c15); + asm volatile("vfsgnj.vv v4, v8, v12"); + VCMP_U32(14, v4, 0x80000000, 0x3d816610, 0x3f536e2f, 0xbe369229, 0xbee857e1, + 0x7fffffff, 0x80000000, 0xbea51fdd, 0x3eb544da, 0x3f0d0267, + 0x3dbae08b, 0x3e07ba22, 0xbf7536a6, 0xbf743637, 0xbe4badf5, + 0x3f45027b); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x0000000000000000, 0xbfee59fbc778ffbc, 0x7fffffffffffffff, + 0x3fde20419edcb428, 0x8000000000000000, 0x3fa8ecd6c20a0480, + 0xbfc851dfd0fdf7f8, 0xbfc784eb3b54e580, 0x3fd13e05a2db6b68, + 0xbfe9790bf1eadde4, 0x3fe46b59155986dc, 0x3fcc266386bc2e10, + 0x3fdb01fdd39a7d9c, 0xbfe4eef55bb6b208, 0x3fe736d06902107a, + 0x3fcf3a7f44aa9f48); + VLOAD_64(v24, 0x8000000000000000, 0x7fffffffffffffff, 0xbfe187df69e0bb9c, + 0xbfe76fd3c4a3b1e8, 0x0000000000000001, 0xbfe7c6f7c1644c86, + 0xbfe229ce53357d20, 0xbfe79fcc34ac1d30, 0xbfe962e04d917824, + 0xbfb0aa5656314cf0, 0xbfc69c8ad7d5ef20, 0xbfddd9380f0bd244, + 0xbfc408f2f3d40a40, 0xbfb01baf416f2160, 0xbfed13d1838e183a, + 0xbfbee005420412c0); + asm volatile("vfsgnj.vv v8, v16, v24"); + VCMP_U64(15, v8, 0x8000000000000000, 0x3fee59fbc778ffbc, 0xffffffffffffffff, + 0xbfde20419edcb428, 0x0000000000000000, 0xbfa8ecd6c20a0480, + 0xbfc851dfd0fdf7f8, 0xbfc784eb3b54e580, 0xbfd13e05a2db6b68, + 0xbfe9790bf1eadde4, 0xbfe46b59155986dc, 0xbfcc266386bc2e10, + 0xbfdb01fdd39a7d9c, 0xbfe4eef55bb6b208, 0xbfe736d06902107a, + 0xbfcf3a7f44aa9f48); + CHECK_FFLAGS(0); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfsgnjn.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfsgnjn.c new file mode 100644 index 000000000..f07d8e90d --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfsgnjn.c @@ -0,0 +1,350 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 0.5278, -0.6548, 0.2776, 0.8730, 0.2180, 0.6172, -0.8408, + // 0.9922, 0.4250, 0.7393, 0.2549, 0.4998, 0.4609, -0.6348, + // 0.1127, -0.1804 + VLOAD_16(v4, 0x3839, 0xb93d, 0x3471, 0x3afc, 0x32fa, 0x38f0, 0xbaba, 0x3bf0, + 0x36cd, 0x39ea, 0x3414, 0x37ff, 0x3760, 0xb914, 0x2f36, 0xb1c6); + // -0.6348, -0.4368, -0.1896, 0.9419, -0.6108, -0.3594, -0.5166, + // -0.1266, -0.9233, 0.2368, 0.1243, 0.3745, 0.0945, -0.3088, + // 0.0190, -0.6289 + VLOAD_16(v6, 0xb914, 0xb6fd, 0xb211, 0x3b89, 0xb8e3, 0xb5c0, 0xb822, 0xb00d, + 0xbb63, 0x3394, 0x2ff5, 0x35fe, 0x2e0c, 0xb4f1, 0x24da, 0xb908); + asm volatile("vfsgnjn.vv v2, v4, v6"); + // 0.5278, 0.6548, 0.2776, -0.8730, 0.2180, 0.6172, 0.8408, + // 0.9922, 0.4250, -0.7393, -0.2549, -0.4998, -0.4609, 0.6348, + // -0.1127, 0.1804 + VCMP_U16(1, v2, 0x3839, 0x393d, 0x3471, 0xbafc, 0x32fa, 0x38f0, 0x3aba, + 0x3bf0, 0x36cd, 0xb9ea, 0xb414, 0xb7ff, 0xb760, 0x3914, 0xaf36, + 0x31c6); + + VSET(16, e32, m4); + // -0.64186704, 0.87601262, -0.93132722, 0.53574133, + // 0.17954259, -0.80486834, -0.95272040, -0.45182621, 0.20335940, + // 0.96179944, 0.80393785, 0.06180594, 0.86447370, + // -0.24008171, -0.42264909, -0.01868468 + VLOAD_32(v8, 0xbf245166, 0x3f60425d, 0xbf6e6b76, 0x3f092658, 0x3e37da03, + 0xbf4e0bda, 0xbf73e57c, 0xbee755c4, 0x3e503d72, 0x3f76387d, + 0x3f4dcedf, 0x3d7d283b, 0x3f5d4e26, 0xbe75d7fb, 0xbed86576, + 0xbc99109c); + // 0.32984266, -0.78281105, 0.73037797, 0.99060333, + // 0.44768164, 0.66998041, 0.39474848, -0.39895460, + // -0.06065369, 0.53388232, -0.60164928, -0.09839682, + // -0.38704434, 0.47123700, 0.40912241, -0.54495376 + VLOAD_32(v12, 0x3ea8e123, 0xbf48664e, 0x3f3afa0d, 0x3f7d982e, 0x3ee53687, + 0x3f2b83d6, 0x3eca1c79, 0xbecc43c7, 0xbd787002, 0x3f08ac83, + 0xbf1a05b0, 0xbdc98446, 0xbec62aad, 0x3ef145fa, 0x3ed1787e, + 0xbf0b8217); + asm volatile("vfsgnjn.vv v4, v8, v12"); + // -0.64186704, 0.87601262, -0.93132722, -0.53574133, + // -0.17954259, -0.80486834, -0.95272040, 0.45182621, + // 0.20335940, -0.96179944, 0.80393785, 0.06180594, 0.86447370, + // -0.24008171, -0.42264909, 0.01868468 + VCMP_U32(2, v4, 0xbf245166, 0x3f60425d, 0xbf6e6b76, 0xbf092658, 0xbe37da03, + 0xbf4e0bda, 0xbf73e57c, 0x3ee755c4, 0x3e503d72, 0xbf76387d, + 0x3f4dcedf, 0x3d7d283b, 0x3f5d4e26, 0xbe75d7fb, 0xbed86576, + 0x3c99109c); + + VSET(16, e64, m8); + // 0.3054868811191440, -0.2848737407493320, 0.8796894022735833, + // -0.2053728688878902, -0.3336030943630310, 0.2807217618714037, + // 0.4723331455917303, -0.8582398814993568, 0.8015611350975347, + // 0.0545934239457773, 0.8461592442963186, 0.5731810427237676, + // 0.9004228762726765, -0.5815114412549589, -0.4481603571708770, + // -0.3109452697316515 + VLOAD_64(v16, 0x3fd38d18d8f0e180, 0xbfd23b5f12007bec, 0x3fec266a63ace3f8, + 0xbfca49a87dadf9c0, 0xbfd559c0cb088d3c, 0x3fd1f75868a0d7ec, + 0x3fde3ab4cd4887cc, 0xbfeb76b37be53474, 0x3fe9a663899fa232, + 0x3fabf3ab54d8f940, 0x3feb13bc8d2ebe92, 0x3fe2577fc525f1c0, + 0x3fecd043a2c52a30, 0xbfe29bbde1ce1372, 0xbfdcaea8c75a67f8, + 0xbfd3e686fd15f950); + // -0.8601583185162320, -0.2023208019417544, 0.7046992650654684, + // 0.0669209072111863, -0.9495814052980500, 0.4501419112888980, + // 0.1528430256162707, -0.2750771515266404, -0.5539880061109905, + // -0.7302340801247744, 0.1579280396497211, -0.6128023516491234, + // 0.2706272563647967, -0.7982929669593624, -0.2521632643799878, + // 0.9025785865542095 + VLOAD_64(v24, 0xbfeb866abced1b2e, 0xbfc9e5a5e5d1f648, 0x3fe68ce5791f15e4, + 0x3fb121ba83e404a0, 0xbfee62f88b14a294, 0x3fdccf2004e2dd30, + 0x3fc3905c3a38c700, 0xbfd19add326bc2ac, 0xbfe1ba450e13ef3a, + 0xbfe75e13dc91f006, 0x3fc436fc6ab55e68, 0xbfe39c13ad67d608, + 0x3fd151f4fbdf8d78, 0xbfe98b9db136f3e0, 0xbfd023716370f004, + 0x3fece1ec7cea3f5e); + asm volatile("vfsgnjn.vv v8, v16, v24"); + // 0.3054868811191440, 0.2848737407493320, -0.8796894022735833, + // -0.2053728688878902, 0.3336030943630310, + // -0.2807217618714037, -0.4723331455917303, 0.8582398814993568, + // 0.8015611350975347, 0.0545934239457773, -0.8461592442963186, + // 0.5731810427237676, -0.9004228762726765, 0.5815114412549589, + // 0.4481603571708770, -0.3109452697316515 + VCMP_U64(3, v8, 0x3fd38d18d8f0e180, 0x3fd23b5f12007bec, 0xbfec266a63ace3f8, + 0xbfca49a87dadf9c0, 0x3fd559c0cb088d3c, 0xbfd1f75868a0d7ec, + 0xbfde3ab4cd4887cc, 0x3feb76b37be53474, 0x3fe9a663899fa232, + 0x3fabf3ab54d8f940, 0xbfeb13bc8d2ebe92, 0x3fe2577fc525f1c0, + 0xbfecd043a2c52a30, 0x3fe29bbde1ce1372, 0x3fdcaea8c75a67f8, + 0xbfd3e686fd15f950); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // 0.5278, -0.6548, 0.2776, 0.8730, 0.2180, 0.6172, -0.8408, + // 0.9922, 0.4250, 0.7393, 0.2549, 0.4998, 0.4609, -0.6348, + // 0.1127, -0.1804 + VLOAD_16(v4, 0x3839, 0xb93d, 0x3471, 0x3afc, 0x32fa, 0x38f0, 0xbaba, 0x3bf0, + 0x36cd, 0x39ea, 0x3414, 0x37ff, 0x3760, 0xb914, 0x2f36, 0xb1c6); + // -0.6348, -0.4368, -0.1896, 0.9419, -0.6108, -0.3594, -0.5166, + // -0.1266, -0.9233, 0.2368, 0.1243, 0.3745, 0.0945, -0.3088, + // 0.0190, -0.6289 + VLOAD_16(v6, 0xb914, 0xb6fd, 0xb211, 0x3b89, 0xb8e3, 0xb5c0, 0xb822, 0xb00d, + 0xbb63, 0x3394, 0x2ff5, 0x35fe, 0x2e0c, 0xb4f1, 0x24da, 0xb908); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfsgnjn.vv v2, v4, v6, v0.t"); + // 0.0000, 0.6548, 0.0000, -0.8730, 0.0000, 0.6172, 0.0000, + // 0.9922, 0.0000, -0.7393, 0.0000, -0.4998, 0.0000, 0.6348, + // 0.0000, 0.1804 + VCMP_U16(4, v2, 0x0, 0x393d, 0x0, 0xbafc, 0x0, 0x38f0, 0x0, 0x3bf0, 0x0, + 0xb9ea, 0x0, 0xb7ff, 0x0, 0x3914, 0x0, 0x31c6); + + VSET(16, e32, m4); + // -0.64186704, 0.87601262, -0.93132722, 0.53574133, + // 0.17954259, -0.80486834, -0.95272040, -0.45182621, 0.20335940, + // 0.96179944, 0.80393785, 0.06180594, 0.86447370, + // -0.24008171, -0.42264909, -0.01868468 + VLOAD_32(v8, 0xbf245166, 0x3f60425d, 0xbf6e6b76, 0x3f092658, 0x3e37da03, + 0xbf4e0bda, 0xbf73e57c, 0xbee755c4, 0x3e503d72, 0x3f76387d, + 0x3f4dcedf, 0x3d7d283b, 0x3f5d4e26, 0xbe75d7fb, 0xbed86576, + 0xbc99109c); + // 0.32984266, -0.78281105, 0.73037797, 0.99060333, + // 0.44768164, 0.66998041, 0.39474848, -0.39895460, + // -0.06065369, 0.53388232, -0.60164928, -0.09839682, + // -0.38704434, 0.47123700, 0.40912241, -0.54495376 + VLOAD_32(v12, 0x3ea8e123, 0xbf48664e, 0x3f3afa0d, 0x3f7d982e, 0x3ee53687, + 0x3f2b83d6, 0x3eca1c79, 0xbecc43c7, 0xbd787002, 0x3f08ac83, + 0xbf1a05b0, 0xbdc98446, 0xbec62aad, 0x3ef145fa, 0x3ed1787e, + 0xbf0b8217); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfsgnjn.vv v4, v8, v12, v0.t"); + // 0.00000000, 0.87601262, 0.00000000, -0.53574133, + // 0.00000000, -0.80486834, 0.00000000, 0.45182621, + // 0.00000000, -0.96179944, 0.00000000, 0.06180594, + // 0.00000000, -0.24008171, 0.00000000, 0.01868468 + VCMP_U32(5, v4, 0x0, 0x3f60425d, 0x0, 0xbf092658, 0x0, 0xbf4e0bda, 0x0, + 0x3ee755c4, 0x0, 0xbf76387d, 0x0, 0x3d7d283b, 0x0, 0xbe75d7fb, 0x0, + 0x3c99109c); + + VSET(16, e64, m8); + // 0.3054868811191440, -0.2848737407493320, 0.8796894022735833, + // -0.2053728688878902, -0.3336030943630310, 0.2807217618714037, + // 0.4723331455917303, -0.8582398814993568, 0.8015611350975347, + // 0.0545934239457773, 0.8461592442963186, 0.5731810427237676, + // 0.9004228762726765, -0.5815114412549589, -0.4481603571708770, + // -0.3109452697316515 + VLOAD_64(v16, 0x3fd38d18d8f0e180, 0xbfd23b5f12007bec, 0x3fec266a63ace3f8, + 0xbfca49a87dadf9c0, 0xbfd559c0cb088d3c, 0x3fd1f75868a0d7ec, + 0x3fde3ab4cd4887cc, 0xbfeb76b37be53474, 0x3fe9a663899fa232, + 0x3fabf3ab54d8f940, 0x3feb13bc8d2ebe92, 0x3fe2577fc525f1c0, + 0x3fecd043a2c52a30, 0xbfe29bbde1ce1372, 0xbfdcaea8c75a67f8, + 0xbfd3e686fd15f950); + // -0.8601583185162320, -0.2023208019417544, 0.7046992650654684, + // 0.0669209072111863, -0.9495814052980500, 0.4501419112888980, + // 0.1528430256162707, -0.2750771515266404, -0.5539880061109905, + // -0.7302340801247744, 0.1579280396497211, -0.6128023516491234, + // 0.2706272563647967, -0.7982929669593624, -0.2521632643799878, + // 0.9025785865542095 + VLOAD_64(v24, 0xbfeb866abced1b2e, 0xbfc9e5a5e5d1f648, 0x3fe68ce5791f15e4, + 0x3fb121ba83e404a0, 0xbfee62f88b14a294, 0x3fdccf2004e2dd30, + 0x3fc3905c3a38c700, 0xbfd19add326bc2ac, 0xbfe1ba450e13ef3a, + 0xbfe75e13dc91f006, 0x3fc436fc6ab55e68, 0xbfe39c13ad67d608, + 0x3fd151f4fbdf8d78, 0xbfe98b9db136f3e0, 0xbfd023716370f004, + 0x3fece1ec7cea3f5e); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfsgnjn.vv v8, v16, v24, v0.t"); + // 0.0000000000000000, 0.2848737407493320, 0.0000000000000000, + // -0.2053728688878902, 0.0000000000000000, + // -0.2807217618714037, 0.0000000000000000, 0.8582398814993568, + // 0.0000000000000000, 0.0545934239457773, 0.0000000000000000, + // 0.5731810427237676, 0.0000000000000000, 0.5815114412549589, + // 0.0000000000000000, -0.3109452697316515 + VCMP_U64(6, v8, 0x0, 0x3fd23b5f12007bec, 0x0, 0xbfca49a87dadf9c0, 0x0, + 0xbfd1f75868a0d7ec, 0x0, 0x3feb76b37be53474, 0x0, 0x3fabf3ab54d8f940, + 0x0, 0x3fe2577fc525f1c0, 0x0, 0x3fe29bbde1ce1372, 0x0, + 0xbfd3e686fd15f950); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.6143 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xb8ea); + // -0.9351, 0.6538, -0.6743, -0.4695, -0.1439, 0.6250, -0.1511, + // -0.7476, 0.8496, 0.6279, 0.5234, 0.2610, 0.6299, -0.0123, + // -0.9995, -0.3872 + VLOAD_16(v4, 0xbb7b, 0x393b, 0xb965, 0xb783, 0xb09b, 0x3900, 0xb0d6, 0xb9fb, + 0x3acc, 0x3906, 0x3830, 0x342d, 0x390a, 0xa24d, 0xbbff, 0xb632); + asm volatile("vfsgnjn.vf v2, v4, %[A]" ::[A] "f"(dscalar_16)); + // 0.9351, 0.6538, 0.6743, 0.4695, 0.1439, 0.6250, 0.1511, + // 0.7476, 0.8496, 0.6279, 0.5234, 0.2610, 0.6299, 0.0123, + // 0.9995, 0.3872 + VCMP_U16(7, v2, 0x3b7b, 0x393b, 0x3965, 0x3783, 0x309b, 0x3900, 0x30d6, + 0x39fb, 0x3acc, 0x3906, 0x3830, 0x342d, 0x390a, 0x224d, 0x3bff, + 0x3632); + + VSET(16, e32, m4); + double dscalar_32; + // 0.56259364 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f100623); + // -0.00813205, -0.38676089, 0.94379848, 0.39548567, + // 0.90217608, 0.57424510, 0.05995686, -0.00974263, + // -0.45620662, -0.36967716, -0.56535333, -0.93745488, + // -0.55570704, 0.04399948, -0.57520008, -0.05702910 + VLOAD_32(v8, 0xbc053c4a, 0xbec60586, 0x3f719cc7, 0x3eca7d19, 0x3f66f503, + 0x3f1301ba, 0x3d759554, 0xbc1f9f8d, 0xbee993ea, 0xbebd4653, + 0xbf10baff, 0xbf6ffd0b, 0xbf0e42d1, 0x3d3438cd, 0xbf134050, + 0xbd699758); + asm volatile("vfsgnjn.vf v4, v8, %[A]" ::[A] "f"(dscalar_32)); + // -0.00813205, -0.38676089, -0.94379848, -0.39548567, + // -0.90217608, -0.57424510, -0.05995686, -0.00974263, + // -0.45620662, -0.36967716, -0.56535333, -0.93745488, + // -0.55570704, -0.04399948, -0.57520008, -0.05702910 + VCMP_U32(8, v4, 0xbc053c4a, 0xbec60586, 0xbf719cc7, 0xbeca7d19, 0xbf66f503, + 0xbf1301ba, 0xbd759554, 0xbc1f9f8d, 0xbee993ea, 0xbebd4653, + 0xbf10baff, 0xbf6ffd0b, 0xbf0e42d1, 0xbd3438cd, 0xbf134050, + 0xbd699758); + + VSET(16, e64, m8); + double dscalar_64; + // 0.1909501680714165 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fc8710e1b8426e8); + // -0.2692390874696449, -0.3268380231167121, 0.8386824891028197, + // -0.5650452268361481, -0.6389787807266418, 0.5318945600667211, + // -0.7817543128402196, -0.5679136293897145, + // -0.0001555883762874, 0.5283267089670276, 0.5439688283816015, + // -0.2866314604291811, -0.0576946087921848, 0.7960283598249005, + // -0.8999056473475127, 0.2142070697411482 + VLOAD_64(v16, 0xbfd13b3694df2b24, 0xbfd4eaea07180958, 0x3fead67ca8cd9566, + 0xbfe214d9ba40b584, 0xbfe47283a0c1e25c, 0x3fe10547bd8d051e, + 0xbfe904219ee4fb76, 0xbfe22c593425cec0, 0xbf2464adf9bfe000, + 0x3fe0e80d6a13bbf4, 0x3fe1683150fe2844, 0xbfd2582b7b231344, + 0xbfad8a25d3d5fd40, 0x3fe9791077845df2, 0xbfeccc06ed9afc1e, + 0x3fcb6b23238e1bc8); + asm volatile("vfsgnjn.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); + // -0.2692390874696449, -0.3268380231167121, -0.8386824891028197, + // -0.5650452268361481, -0.6389787807266418, -0.5318945600667211, + // -0.7817543128402196, -0.5679136293897145, -0.0001555883762874, + // -0.5283267089670276, -0.5439688283816015, -0.2866314604291811, + // -0.0576946087921848, -0.7960283598249005, -0.8999056473475127, + // -0.2142070697411482 + VCMP_U64(9, v8, 0xbfd13b3694df2b24, 0xbfd4eaea07180958, 0xbfead67ca8cd9566, + 0xbfe214d9ba40b584, 0xbfe47283a0c1e25c, 0xbfe10547bd8d051e, + 0xbfe904219ee4fb76, 0xbfe22c593425cec0, 0xbf2464adf9bfe000, + 0xbfe0e80d6a13bbf4, 0xbfe1683150fe2844, 0xbfd2582b7b231344, + 0xbfad8a25d3d5fd40, 0xbfe9791077845df2, 0xbfeccc06ed9afc1e, + 0xbfcb6b23238e1bc8); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.6143 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xb8ea); + // -0.9351, 0.6538, -0.6743, -0.4695, -0.1439, 0.6250, + // -0.1511, -0.7476, 0.8496, 0.6279, 0.5234, 0.2610, 0.6299, + // -0.0123, -0.9995, -0.3872 + VLOAD_16(v4, 0xbb7b, 0x393b, 0xb965, 0xb783, 0xb09b, 0x3900, 0xb0d6, 0xb9fb, + 0x3acc, 0x3906, 0x3830, 0x342d, 0x390a, 0xa24d, 0xbbff, 0xb632); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfsgnjn.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + // 0.0000, 0.6538, 0.0000, 0.4695, 0.0000, 0.6250, 0.0000, + // 0.7476, 0.0000, 0.6279, 0.0000, 0.2610, 0.0000, 0.0123, + // 0.0000, 0.3872 + VCMP_U16(10, v2, 0x0, 0x393b, 0x0, 0x3783, 0x0, 0x3900, 0x0, 0x39fb, 0x0, + 0x3906, 0x0, 0x342d, 0x0, 0x224d, 0x0, 0x3632); + + VSET(16, e32, m4); + double dscalar_32; + // 0.56259364 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f100623); + // -0.00813205, -0.38676089, 0.94379848, 0.39548567, + // 0.90217608, 0.57424510, 0.05995686, -0.00974263, + // -0.45620662, -0.36967716, -0.56535333, -0.93745488, + // -0.55570704, 0.04399948, -0.57520008, -0.05702910 + VLOAD_32(v8, 0xbc053c4a, 0xbec60586, 0x3f719cc7, 0x3eca7d19, 0x3f66f503, + 0x3f1301ba, 0x3d759554, 0xbc1f9f8d, 0xbee993ea, 0xbebd4653, + 0xbf10baff, 0xbf6ffd0b, 0xbf0e42d1, 0x3d3438cd, 0xbf134050, + 0xbd699758); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfsgnjn.vf v4, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + // 0.00000000, -0.38676089, 0.00000000, -0.39548567, + // 0.00000000, -0.57424510, 0.00000000, -0.00974263, + // 0.00000000, -0.36967716, 0.00000000, -0.93745488, + // 0.00000000, -0.04399948, 0.00000000, -0.05702910 + VCMP_U32(11, v4, 0x0, 0xbec60586, 0x0, 0xbeca7d19, 0x0, 0xbf1301ba, 0x0, + 0xbc1f9f8d, 0x0, 0xbebd4653, 0x0, 0xbf6ffd0b, 0x0, 0xbd3438cd, 0x0, + 0xbd699758); + + VSET(16, e64, m8); + double dscalar_64; + // 0.1909501680714165 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fc8710e1b8426e8); + // -0.2692390874696449, -0.3268380231167121, + // 0.8386824891028197, -0.5650452268361481, + // -0.6389787807266418, 0.5318945600667211, + // -0.7817543128402196, -0.5679136293897145, + // -0.0001555883762874, 0.5283267089670276, + // 0.5439688283816015, -0.2866314604291811, + // -0.0576946087921848, 0.7960283598249005, + // -0.8999056473475127, 0.2142070697411482 + VLOAD_64(v16, 0xbfd13b3694df2b24, 0xbfd4eaea07180958, 0x3fead67ca8cd9566, + 0xbfe214d9ba40b584, 0xbfe47283a0c1e25c, 0x3fe10547bd8d051e, + 0xbfe904219ee4fb76, 0xbfe22c593425cec0, 0xbf2464adf9bfe000, + 0x3fe0e80d6a13bbf4, 0x3fe1683150fe2844, 0xbfd2582b7b231344, + 0xbfad8a25d3d5fd40, 0x3fe9791077845df2, 0xbfeccc06ed9afc1e, + 0x3fcb6b23238e1bc8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfsgnjn.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); + // 0.0000000000000000, -0.3268380231167121, 0.0000000000000000, + // -0.5650452268361481, 0.0000000000000000, + // -0.5318945600667211, 0.0000000000000000, + // -0.5679136293897145, 0.0000000000000000, + // -0.5283267089670276, 0.0000000000000000, + // -0.2866314604291811, 0.0000000000000000, + // -0.7960283598249005, 0.0000000000000000, + // -0.2142070697411482 + VCMP_U64(12, v8, 0x0, 0xbfd4eaea07180958, 0x0, 0xbfe214d9ba40b584, 0x0, + 0xbfe10547bd8d051e, 0x0, 0xbfe22c593425cec0, 0x0, 0xbfe0e80d6a13bbf4, + 0x0, 0xbfd2582b7b231344, 0x0, 0xbfe9791077845df2, 0x0, + 0xbfcb6b23238e1bc8); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfsgnjx.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfsgnjx.c new file mode 100644 index 000000000..2d6ea2cdf --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfsgnjx.c @@ -0,0 +1,348 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values +void TEST_CASE1(void) { + VSET(16, e16, m2); + // -0.0371, 0.8374, 0.7183, 0.8086, -0.8940, 0.4626, 0.5449, + // 0.6831, 0.4661, 0.2981, 0.5615, -0.6167, -0.7075, -0.7603, + // 0.8438, -0.2742 + VLOAD_16(v4, 0xa8c1, 0x3ab3, 0x39bf, 0x3a78, 0xbb27, 0x3767, 0x385c, 0x3977, + 0x3775, 0x34c5, 0x387e, 0xb8ef, 0xb9a9, 0xba15, 0x3ac0, 0xb463); + // 0.3516, 0.7925, -0.5034, -0.1672, 0.0703, -0.9731, 0.0361, + // -0.4077, 0.8965, 0.8242, -0.7822, 0.0265, -0.5361, 0.1226, + // -0.9917, 0.5415 + VLOAD_16(v6, 0x35a0, 0x3a57, 0xb807, 0xb15a, 0x2c7f, 0xbbc9, 0x289f, 0xb686, + 0x3b2c, 0x3a98, 0xba42, 0x26cb, 0xb84a, 0x2fd8, 0xbbef, 0x3855); + asm volatile("vfsgnjx.vv v2, v4, v6"); + // -0.0371, 0.8374, -0.7183, -0.8086, -0.8940, -0.4626, 0.5449, + // -0.6831, 0.4661, 0.2981, -0.5615, -0.6167, 0.7075, -0.7603, + // -0.8438, -0.2742 + VCMP_U16(1, v2, 0xa8c1, 0x3ab3, 0xb9bf, 0xba78, 0xbb27, 0xb767, 0x385c, + 0xb977, 0x3775, 0x34c5, 0xb87e, 0xb8ef, 0x39a9, 0xba15, 0xbac0, + 0xb463); + + VSET(16, e32, m4); + // -0.00918692, -0.23372029, 0.42919466, 0.95128548, + // 0.05014091, 0.08194520, 0.65458435, 0.38167605, + // -0.52784044, 0.46330592, 0.66792834, 0.94584799, + // -0.11679628, 0.12139154, 0.61421394, -0.71422517 + VLOAD_32(v8, 0xbc1684ba, 0xbe6f545f, 0x3edbbf67, 0x3f738772, 0x3d4d608d, + 0x3da7d2e2, 0x3f2792d7, 0x3ec36b0b, 0xbf07208d, 0x3eed366f, + 0x3f2afd5a, 0x3f722318, 0xbdef32e4, 0x3df89c21, 0x3f1d3d20, + 0xbf36d776); + // -0.96525091, -0.82903022, -0.98528612, 0.36915505, + // 0.23285799, 0.19133335, 0.78484982, -0.40654737, + // -0.40144378, -0.94419461, 0.60990387, -0.37662670, + // 0.75369638, -0.82297397, 0.24545205, -0.75572032 + VLOAD_32(v12, 0xbf771aaf, 0xbf543b53, 0xbf7c3bb6, 0x3ebd01e4, 0x3e6e7253, + 0x3e43ece4, 0x3f48ebeb, 0xbed026fa, 0xbecd8a0a, 0xbf71b6bd, + 0x3f1c22a9, 0xbec0d537, 0x3f40f23f, 0xbf52ae6c, 0x3e7b57c8, + 0xbf4176e3); + asm volatile("vfsgnjx.vv v4, v8, v12"); + // 0.00918692, 0.23372029, -0.42919466, 0.95128548, + // 0.05014091, 0.08194520, 0.65458435, -0.38167605, + // 0.52784044, -0.46330592, 0.66792834, -0.94584799, + // -0.11679628, -0.12139154, 0.61421394, 0.71422517 + VCMP_U32(2, v4, 0x3c1684ba, 0x3e6f545f, 0xbedbbf67, 0x3f738772, 0x3d4d608d, + 0x3da7d2e2, 0x3f2792d7, 0xbec36b0b, 0x3f07208d, 0xbeed366f, + 0x3f2afd5a, 0xbf722318, 0xbdef32e4, 0xbdf89c21, 0x3f1d3d20, + 0x3f36d776); + + VSET(16, e64, m8); + // -0.4085246287477386, 0.8681744372264055, -0.9782992825101422, + // 0.9959576051606904, -0.7910104167136705, 0.0799315061445605, + // 0.2562329212571202, -0.0401280831920132, -0.6164331117742006, + // 0.0314794700215042, -0.2391312835511448, 0.2944948324466776, + // -0.3469257666022745, 0.3129356083924371, 0.1418123916338592, + // -0.2697778839142546 + VLOAD_64(v16, 0xbfda25447c0540c8, 0x3febc815c1e38a2c, 0xbfef4e3a4c029a38, + 0x3fefdee27bcbc3c2, 0xbfe94ff513d293d6, 0x3fb4766424cf97d0, + 0x3fd0661ec43d4dd0, 0xbfa48bab09ebf660, 0xbfe3b9d1eee7bc0a, + 0x3fa01e13bc79bd60, 0xbfce9bda9926bde0, 0x3fd2d900da8cc448, + 0xbfd63408216c936c, 0x3fd4072312f3290c, 0x3fc226e8901e1378, + 0xbfd1440a752621b8); + // -0.9042358342806300, 0.2953863994960662, -0.4373909703642964, + // 0.1464626280814265, -0.5161207396769107, -0.3525096032632213, + // -0.0692332757289065, -0.9900711773455610, 0.6225050177521096, + // -0.1361158534833962, 0.1558021548512183, 0.9766583762298613, + // 0.2768845956890595, 0.6672273199701737, -0.4444943981200347, + // 0.5095574851608440 + VLOAD_64(v24, 0xbfecef7ffd03691e, 0x3fd2e79c5b6133f0, 0xbfdbfe36b251f164, + 0x3fc2bf4992d91480, 0xbfe0840fa43663a0, 0xbfd68f847062a774, + 0xbfb1b9459f0cf460, 0xbfefaea9bfed2a32, 0x3fe3eb8fa49aeb32, + 0xbfc16c3e8996d300, 0x3fc3f15333ddbc58, 0x3fef40c91128b1ea, + 0x3fd1b87a2ad00b5c, 0x3fe559ed1bc8a0c2, 0xbfdc7298a1cb9174, + 0x3fe04e4b7fc654a0); + asm volatile("vfsgnjx.vv v8, v16, v24"); + // 0.4085246287477386, 0.8681744372264055, 0.9782992825101422, + // 0.9959576051606904, 0.7910104167136705, -0.0799315061445605, + // -0.2562329212571202, 0.0401280831920132, + // -0.6164331117742006, -0.0314794700215042, + // -0.2391312835511448, 0.2944948324466776, + // -0.3469257666022745, 0.3129356083924371, + // -0.1418123916338592, -0.2697778839142546 + VCMP_U64(3, v8, 0x3fda25447c0540c8, 0x3febc815c1e38a2c, 0x3fef4e3a4c029a38, + 0x3fefdee27bcbc3c2, 0x3fe94ff513d293d6, 0xbfb4766424cf97d0, + 0xbfd0661ec43d4dd0, 0x3fa48bab09ebf660, 0xbfe3b9d1eee7bc0a, + 0xbfa01e13bc79bd60, 0xbfce9bda9926bde0, 0x3fd2d900da8cc448, + 0xbfd63408216c936c, 0x3fd4072312f3290c, 0xbfc226e8901e1378, + 0xbfd1440a752621b8); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // -0.0371, 0.8374, 0.7183, 0.8086, -0.8940, 0.4626, 0.5449, + // 0.6831, 0.4661, 0.2981, 0.5615, -0.6167, -0.7075, -0.7603, + // 0.8438, -0.2742 + VLOAD_16(v4, 0xa8c1, 0x3ab3, 0x39bf, 0x3a78, 0xbb27, 0x3767, 0x385c, 0x3977, + 0x3775, 0x34c5, 0x387e, 0xb8ef, 0xb9a9, 0xba15, 0x3ac0, 0xb463); + // 0.3516, 0.7925, -0.5034, -0.1672, 0.0703, -0.9731, 0.0361, + // -0.4077, 0.8965, 0.8242, -0.7822, 0.0265, -0.5361, 0.1226, + // -0.9917, 0.5415 + VLOAD_16(v6, 0x35a0, 0x3a57, 0xb807, 0xb15a, 0x2c7f, 0xbbc9, 0x289f, 0xb686, + 0x3b2c, 0x3a98, 0xba42, 0x26cb, 0xb84a, 0x2fd8, 0xbbef, 0x3855); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfsgnjx.vv v2, v4, v6, v0.t"); + // 0.0000, 0.8374, 0.0000, -0.8086, 0.0000, -0.4626, 0.0000, + // -0.6831, 0.0000, 0.2981, 0.0000, -0.6167, 0.0000, + // -0.7603, 0.0000, -0.2742 + VCMP_U16(4, v2, 0x0, 0x3ab3, 0x0, 0xba78, 0x0, 0xb767, 0x0, 0xb977, 0x0, + 0x34c5, 0x0, 0xb8ef, 0x0, 0xba15, 0x0, 0xb463); + + VSET(16, e32, m4); + // -0.00918692, -0.23372029, 0.42919466, 0.95128548, + // 0.05014091, 0.08194520, 0.65458435, 0.38167605, + // -0.52784044, 0.46330592, 0.66792834, 0.94584799, + // -0.11679628, 0.12139154, 0.61421394, -0.71422517 + VLOAD_32(v8, 0xbc1684ba, 0xbe6f545f, 0x3edbbf67, 0x3f738772, 0x3d4d608d, + 0x3da7d2e2, 0x3f2792d7, 0x3ec36b0b, 0xbf07208d, 0x3eed366f, + 0x3f2afd5a, 0x3f722318, 0xbdef32e4, 0x3df89c21, 0x3f1d3d20, + 0xbf36d776); + // -0.96525091, -0.82903022, -0.98528612, 0.36915505, + // 0.23285799, 0.19133335, 0.78484982, -0.40654737, + // -0.40144378, -0.94419461, 0.60990387, -0.37662670, + // 0.75369638, -0.82297397, 0.24545205, -0.75572032 + VLOAD_32(v12, 0xbf771aaf, 0xbf543b53, 0xbf7c3bb6, 0x3ebd01e4, 0x3e6e7253, + 0x3e43ece4, 0x3f48ebeb, 0xbed026fa, 0xbecd8a0a, 0xbf71b6bd, + 0x3f1c22a9, 0xbec0d537, 0x3f40f23f, 0xbf52ae6c, 0x3e7b57c8, + 0xbf4176e3); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfsgnjx.vv v4, v8, v12, v0.t"); + // 0.00000000, 0.23372029, 0.00000000, 0.95128548, + // 0.00000000, 0.08194520, 0.00000000, -0.38167605, + // 0.00000000, -0.46330592, 0.00000000, -0.94584799, + // 0.00000000, -0.12139154, 0.00000000, 0.71422517 + VCMP_U32(5, v4, 0x0, 0x3e6f545f, 0x0, 0x3f738772, 0x0, 0x3da7d2e2, 0x0, + 0xbec36b0b, 0x0, 0xbeed366f, 0x0, 0xbf722318, 0x0, 0xbdf89c21, 0x0, + 0x3f36d776); + + VSET(16, e64, m8); + // -0.4085246287477386, 0.8681744372264055, -0.9782992825101422, + // 0.9959576051606904, -0.7910104167136705, 0.0799315061445605, + // 0.2562329212571202, -0.0401280831920132, -0.6164331117742006, + // 0.0314794700215042, -0.2391312835511448, 0.2944948324466776, + // -0.3469257666022745, 0.3129356083924371, 0.1418123916338592, + // -0.2697778839142546 + VLOAD_64(v16, 0xbfda25447c0540c8, 0x3febc815c1e38a2c, 0xbfef4e3a4c029a38, + 0x3fefdee27bcbc3c2, 0xbfe94ff513d293d6, 0x3fb4766424cf97d0, + 0x3fd0661ec43d4dd0, 0xbfa48bab09ebf660, 0xbfe3b9d1eee7bc0a, + 0x3fa01e13bc79bd60, 0xbfce9bda9926bde0, 0x3fd2d900da8cc448, + 0xbfd63408216c936c, 0x3fd4072312f3290c, 0x3fc226e8901e1378, + 0xbfd1440a752621b8); + // -0.9042358342806300, 0.2953863994960662, -0.4373909703642964, + // 0.1464626280814265, -0.5161207396769107, -0.3525096032632213, + // -0.0692332757289065, -0.9900711773455610, 0.6225050177521096, + // -0.1361158534833962, 0.1558021548512183, 0.9766583762298613, + // 0.2768845956890595, 0.6672273199701737, -0.4444943981200347, + // 0.5095574851608440 + VLOAD_64(v24, 0xbfecef7ffd03691e, 0x3fd2e79c5b6133f0, 0xbfdbfe36b251f164, + 0x3fc2bf4992d91480, 0xbfe0840fa43663a0, 0xbfd68f847062a774, + 0xbfb1b9459f0cf460, 0xbfefaea9bfed2a32, 0x3fe3eb8fa49aeb32, + 0xbfc16c3e8996d300, 0x3fc3f15333ddbc58, 0x3fef40c91128b1ea, + 0x3fd1b87a2ad00b5c, 0x3fe559ed1bc8a0c2, 0xbfdc7298a1cb9174, + 0x3fe04e4b7fc654a0); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfsgnjx.vv v8, v16, v24, v0.t"); + // 0.0000000000000000, 0.8681744372264055, 0.0000000000000000, + // 0.9959576051606904, 0.0000000000000000, -0.0799315061445605, + // 0.0000000000000000, 0.0401280831920132, 0.0000000000000000, + // -0.0314794700215042, 0.0000000000000000, 0.2944948324466776, + // 0.0000000000000000, 0.3129356083924371, 0.0000000000000000, + // -0.2697778839142546 + VCMP_U64(6, v8, 0x0, 0x3febc815c1e38a2c, 0x0, 0x3fefdee27bcbc3c2, 0x0, + 0xbfb4766424cf97d0, 0x0, 0x3fa48bab09ebf660, 0x0, 0xbfa01e13bc79bd60, + 0x0, 0x3fd2d900da8cc448, 0x0, 0x3fd4072312f3290c, 0x0, + 0xbfd1440a752621b8); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.9766 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xbbd0); + // 0.8535, -0.3750, -0.8066, -0.9097, -0.2216, -0.5645, -0.3149, + // -0.4512, 0.5981, 0.6587, 0.9546, -0.3040, -0.6157, 0.5723, + // 0.8438, -0.1544 + VLOAD_16(v4, 0x3ad4, 0xb600, 0xba74, 0xbb47, 0xb317, 0xb884, 0xb50a, 0xb738, + 0x38c9, 0x3945, 0x3ba3, 0xb4dd, 0xb8ed, 0x3894, 0x3ac0, 0xb0f1); + asm volatile("vfsgnjx.vf v2, v4, %[A]" ::[A] "f"(dscalar_16)); + // -0.8535, 0.3750, 0.8066, 0.9097, 0.2216, 0.5645, 0.3149, + // 0.4512, -0.5981, -0.6587, -0.9546, 0.3040, 0.6157, -0.5723, + // -0.8438, 0.1544 + VCMP_U16(7, v2, 0xbad4, 0x3600, 0x3a74, 0x3b47, 0x3317, 0x3884, 0x350a, + 0x3738, 0xb8c9, 0xb945, 0xbba3, 0x34dd, 0x38ed, 0xb894, 0xbac0, + 0x30f1); + + VSET(16, e32, m4); + double dscalar_32; + // -0.71056527 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbf35e79b); + // -0.13350210, -0.18642496, 0.30152589, -0.62076813, + // 0.00040700, -0.59566921, -0.88075870, 0.08096603, 0.94059193, + // -0.29601631, -0.54263371, -0.86016685, -0.57158113, + // 0.85538357, -0.76839548, 0.28374606 + VLOAD_32(v8, 0xbe08b4c6, 0xbe3ee62f, 0x3e9a619a, 0xbf1eeaa9, 0x39d561f4, + 0xbf187dc7, 0xbf617967, 0x3da5d185, 0x3f70caa2, 0xbe978f73, + 0xbf0aea0b, 0xbf5c33e5, 0xbf125324, 0x3f5afa6b, 0xbf44b591, + 0x3e91472a); + asm volatile("vfsgnjx.vf v4, v8, %[A]" ::[A] "f"(dscalar_32)); + // 0.13350210, 0.18642496, -0.30152589, 0.62076813, + // -0.00040700, 0.59566921, 0.88075870, -0.08096603, + // -0.94059193, 0.29601631, 0.54263371, 0.86016685, + // 0.57158113, -0.85538357, 0.76839548, -0.28374606 + VCMP_U32(8, v4, 0x3e08b4c6, 0x3e3ee62f, 0xbe9a619a, 0x3f1eeaa9, 0xb9d561f4, + 0x3f187dc7, 0x3f617967, 0xbda5d185, 0xbf70caa2, 0x3e978f73, + 0x3f0aea0b, 0x3f5c33e5, 0x3f125324, 0xbf5afa6b, 0x3f44b591, + 0xbe91472a); + + VSET(16, e64, m8); + double dscalar_64; + // -0.1599292306617626 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfc4788f9faef060); + // -0.3770377828689853, 0.5963307040587882, + // -0.4228346580189990, -0.8395360297727528, 0.2884308755790033, + // -0.9332093226534830, -0.3077793113682024, + // -0.3241690978469995, 0.9848431705043186, 0.5835571766262024, + // 0.6934128987139432, -0.8499240402166686, 0.9392758702585176, + // 0.8754505566292561, -0.4187493105472220, -0.7967172481248119 + VLOAD_64(v16, 0xbfd8216314b1d540, 0x3fe3152420f10f90, 0xbfdb0fb918f3a4fc, + 0xbfeadd7aa9f60146, 0x3fd275a6c6712e84, 0xbfeddcd9cc23cf06, + 0xbfd3b2a7ff2d8ea0, 0xbfd4bf2fbe681ba4, 0x3fef83d5d32028f6, + 0x3fe2ac80199e9490, 0x3fe630703f533af4, 0xbfeb3293e69a12ae, + 0x3fee0e8c4515d52c, 0x3fec03b0e2bf9ad6, 0xbfdaccc9e88176a4, + 0xbfe97eb52b9b5dac); + asm volatile("vfsgnjx.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); + // 0.3770377828689853, -0.5963307040587882, 0.4228346580189990, + // 0.8395360297727528, -0.2884308755790033, 0.9332093226534830, + // 0.3077793113682024, 0.3241690978469995, -0.9848431705043186, + // -0.5835571766262024, -0.6934128987139432, 0.8499240402166686, + // -0.9392758702585176, -0.8754505566292561, 0.4187493105472220, + // 0.7967172481248119 + VCMP_U64(9, v8, 0x3fd8216314b1d540, 0xbfe3152420f10f90, 0x3fdb0fb918f3a4fc, + 0x3feadd7aa9f60146, 0xbfd275a6c6712e84, 0x3feddcd9cc23cf06, + 0x3fd3b2a7ff2d8ea0, 0x3fd4bf2fbe681ba4, 0xbfef83d5d32028f6, + 0xbfe2ac80199e9490, 0xbfe630703f533af4, 0x3feb3293e69a12ae, + 0xbfee0e8c4515d52c, 0xbfec03b0e2bf9ad6, 0x3fdaccc9e88176a4, + 0x3fe97eb52b9b5dac); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.9766 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xbbd0); + // 0.8535, -0.3750, -0.8066, -0.9097, -0.2216, -0.5645, + // -0.3149, -0.4512, 0.5981, 0.6587, 0.9546, -0.3040, + // -0.6157, 0.5723, 0.8438, -0.1544 + VLOAD_16(v4, 0x3ad4, 0xb600, 0xba74, 0xbb47, 0xb317, 0xb884, 0xb50a, 0xb738, + 0x38c9, 0x3945, 0x3ba3, 0xb4dd, 0xb8ed, 0x3894, 0x3ac0, 0xb0f1); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfsgnjx.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + // 0.0000, 0.3750, 0.0000, 0.9097, 0.0000, 0.5645, 0.0000, + // 0.4512, 0.0000, -0.6587, 0.0000, 0.3040, 0.0000, + // -0.5723, 0.0000, 0.1544 + VCMP_U16(10, v2, 0x0, 0x3600, 0x0, 0x3b47, 0x0, 0x3884, 0x0, 0x3738, 0x0, + 0xb945, 0x0, 0x34dd, 0x0, 0xb894, 0x0, 0x30f1); + + VSET(16, e32, m4); + double dscalar_32; + // -0.71056527 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbf35e79b); + // -0.13350210, -0.18642496, 0.30152589, -0.62076813, + // 0.00040700, -0.59566921, -0.88075870, 0.08096603, + // 0.94059193, -0.29601631, -0.54263371, -0.86016685, + // -0.57158113, 0.85538357, -0.76839548, 0.28374606 + VLOAD_32(v8, 0xbe08b4c6, 0xbe3ee62f, 0x3e9a619a, 0xbf1eeaa9, 0x39d561f4, + 0xbf187dc7, 0xbf617967, 0x3da5d185, 0x3f70caa2, 0xbe978f73, + 0xbf0aea0b, 0xbf5c33e5, 0xbf125324, 0x3f5afa6b, 0xbf44b591, + 0x3e91472a); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfsgnjx.vf v4, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + // 0.00000000, 0.18642496, 0.00000000, 0.62076813, + // 0.00000000, 0.59566921, 0.00000000, -0.08096603, + // 0.00000000, 0.29601631, 0.00000000, 0.86016685, + // 0.00000000, -0.85538357, 0.00000000, -0.28374606 + VCMP_U32(11, v4, 0x0, 0x3e3ee62f, 0x0, 0x3f1eeaa9, 0x0, 0x3f187dc7, 0x0, + 0xbda5d185, 0x0, 0x3e978f73, 0x0, 0x3f5c33e5, 0x0, 0xbf5afa6b, 0x0, + 0xbe91472a); + + VSET(16, e64, m8); + double dscalar_64; + // -0.1599292306617626 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfc4788f9faef060); + // -0.3770377828689853, 0.5963307040587882, + // -0.4228346580189990, -0.8395360297727528, + // 0.2884308755790033, -0.9332093226534830, + // -0.3077793113682024, -0.3241690978469995, + // 0.9848431705043186, 0.5835571766262024, 0.6934128987139432, + // -0.8499240402166686, 0.9392758702585176, + // 0.8754505566292561, -0.4187493105472220, -0.7967172481248119 + VLOAD_64(v16, 0xbfd8216314b1d540, 0x3fe3152420f10f90, 0xbfdb0fb918f3a4fc, + 0xbfeadd7aa9f60146, 0x3fd275a6c6712e84, 0xbfeddcd9cc23cf06, + 0xbfd3b2a7ff2d8ea0, 0xbfd4bf2fbe681ba4, 0x3fef83d5d32028f6, + 0x3fe2ac80199e9490, 0x3fe630703f533af4, 0xbfeb3293e69a12ae, + 0x3fee0e8c4515d52c, 0x3fec03b0e2bf9ad6, 0xbfdaccc9e88176a4, + 0xbfe97eb52b9b5dac); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfsgnjx.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); + // 0.0000000000000000, -0.5963307040587882, 0.0000000000000000, + // 0.8395360297727528, 0.0000000000000000, 0.9332093226534830, + // 0.0000000000000000, 0.3241690978469995, 0.0000000000000000, + // -0.5835571766262024, 0.0000000000000000, + // 0.8499240402166686, 0.0000000000000000, + // -0.8754505566292561, 0.0000000000000000, 0.7967172481248119 + VCMP_U64(12, v8, 0x0, 0xbfe3152420f10f90, 0x0, 0x3feadd7aa9f60146, 0x0, + 0x3feddcd9cc23cf06, 0x0, 0x3fd4bf2fbe681ba4, 0x0, 0xbfe2ac80199e9490, + 0x0, 0x3feb3293e69a12ae, 0x0, 0xbfec03b0e2bf9ad6, 0x0, + 0x3fe97eb52b9b5dac); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfslide1down.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfslide1down.c new file mode 100644 index 000000000..9d7b9524f --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfslide1down.c @@ -0,0 +1,101 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +void TEST_CASE1() { + double dscalar_16; + // -0.9380 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xbb81); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(8, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v2, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vfslide1down.vf v2, v4, %[A]" ::[A] "f"(dscalar_16)); + VCMP_U16(1, v2, 2, 3, 4, 5, 6, 7, 8, 0xbb81); + + double dscalar_32; + // -0.96056187 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbf75e762); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(8, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v4, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vfslide1down.vf v4, v8, %[A]" ::[A] "f"(dscalar_32)); + VCMP_U32(2, v4, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 0xbf75e762); + + double dscalar_64; + // 0.9108707261227378 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fed25da5d7296fe); + + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(8, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vfslide1down.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); + VCMP_U64(3, v8, 2, 3, 4, 5, 6, 7, 8, 0x3fed25da5d7296fe); +} + +void TEST_CASE2() { + double dscalar_16; + // -0.9380 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xbb81); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(8, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0x55, 0x55); + asm volatile("vfslide1down.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + VCMP_U16(6, v1, 2, -1, 4, -1, 6, -1, 8, -1); + + double dscalar_32; + // -0.96056187 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbf75e762); + + VSET(32, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + VSET(16, e32, m1); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_32(v4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vfslide1down.vf v4, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + VCMP_U32(7, v4, -1, 3, -1, 5, -1, 7, -1, 0xbf75e762); + + double dscalar_64; + // 0.9108707261227378 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fed25da5d7296fe); + + VSET(32, e64, m1); + VLOAD_64(v2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + VSET(16, e64, m1); + VLOAD_64(v2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_64(v1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0x55, 0x55); + asm volatile("vfslide1down.vf v1, v2, %[A], v0.t" ::[A] "f"(dscalar_64)); + VCMP_U64(8, v1, 2, -1, 4, -1, 6, -1, 8, -1, 10, -1, 12, -1, 14, -1, 16, -1); +} + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfslide1up.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfslide1up.c new file mode 100644 index 000000000..67379b383 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfslide1up.c @@ -0,0 +1,90 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +void TEST_CASE1() { + double dscalar_16; + // -0.9380 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xbb81); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_16(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vfslide1up.vf v2, v4, %[A]" ::[A] "f"(dscalar_16)); + VCMP_U16(1, v2, 0xbb81, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + + double dscalar_32; + // -0.96056187 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbf75e762); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_32(v4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vfslide1up.vf v4, v8, %[A]" ::[A] "f"(dscalar_32)); + VCMP_U32(2, v4, 0xbf75e762, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15); + + double dscalar_64; + // 0.9108707261227378 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fed25da5d7296fe); + + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vfslide1up.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); + VCMP_U64(3, v8, 0x3fed25da5d7296fe, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15); +} + +void TEST_CASE2() { + double dscalar_16; + // -0.9380 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xbb81); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_16(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0x55, 0x55); + asm volatile("vfslide1up.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + VCMP_U16(4, v2, 0xbb81, -1, 2, -1, 4, -1, 6, -1, 8, -1, 10, -1, 12, -1, 14, + -1); + + double dscalar_32; + // -0.96056187 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbf75e762); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_32(v4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vfslide1up.vf v4, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + VCMP_U32(5, v4, -1, 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15); + + double dscalar_64; + // 0.9108707261227378 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0x3fed25da5d7296fe); + + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0x55, 0x55); + asm volatile("vfslide1up.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); + VCMP_U64(6, v8, 0x3fed25da5d7296fe, -1, 2, -1, 4, -1, 6, -1, 8, -1, 10, -1, + 12, -1, 14, -1); +} + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfsqrt.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfsqrt.c new file mode 100644 index 000000000..0b62d412b --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfsqrt.c @@ -0,0 +1,142 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot +// Matteo Perotti + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values +void TEST_CASE1(void) { + VSET(16, e16, m2); + // -4628.000, 5116.000, -9928.000, 9392.000, -140.875, + // 6112.000, 2598.000, 3210.000, 528.000, -3298.000, + // -3674.000, 368.250, 1712.000, -8584.000, -2080.000, + // 4336.000 + VLOAD_16(v2, 0xec85, 0x6cff, 0xf0d9, 0x7096, 0xd867, 0x6df8, 0x6913, 0x6a45, + 0x6020, 0xea71, 0xeb2d, 0x5dc1, 0x66b0, 0xf031, 0xe810, 0x6c3c); + asm volatile("vfsqrt.v v4, v2"); + // nan, 71.500, nan, 96.938, + // nan, 78.188, 50.969, 56.656, 22.984, nan, + // nan, 19.188, 41.375, nan, nan, 65.875 + VCMP_U16(1, v4, 0x7e00, 0x5478, 0x7e00, 0x560e, 0x7e00, 0x54e2, 0x525f, + 0x5315, 0x4dbe, 0x7e00, 0x7e00, 0x4ccc, 0x512c, 0x7e00, 0x7e00, + 0x541d); + + VSET(16, e32, m4); + // 53688.590, -5719.180, -59560.355, -34640.023, -22323.398, + // -52381.586, 19136.160, 13055.238, -68576.781, + // -35066.488, 62475.219, -25604.578, 54705.039, + // -19827.459, 17792.961, -28415.572 + VLOAD_32(v4, 0x4751b897, 0xc5b2b971, 0xc768a85b, 0xc7075006, 0xc6ae66cc, + 0xc74c9d96, 0x46958052, 0x464bfcf4, 0xc785f064, 0xc708fa7d, + 0x47740b38, 0xc6c80928, 0x4755b10a, 0xc69ae6eb, 0x468b01ec, + 0xc6ddff25); + asm volatile("vfsqrt.v v8, v4"); + // 231.708, nan, nan, nan, nan, nan, 138.334, + // 114.260, nan, nan, 249.950, nan, 233.891, nan, + // 133.390, nan + VCMP_U32(2, v8, 0x4367b53e, 0x7fc00000, 0x7fc00000, 0x7fc00000, 0x7fc00000, + 0x7fc00000, 0x430a5560, 0x42e484e0, 0x7fc00000, 0x7fc00000, + 0x4379f34f, 0x7fc00000, 0x4369e41e, 0x7fc00000, 0x430563e7, + 0x7fc00000); + + VSET(16, e64, m8); + // -2532126.867, -601715.939, -7176821.248, 9617114.284, + // -4651296.040, -9962642.835, 4027953.647, 7849763.850, + // -9544132.585, -8682313.823, 7018932.012, 639358.130, + // -7598169.215, -9585529.793, -4604984.668, 314584.590 + VLOAD_64(v8, 0xc143518f6efce4ae, 0xc1225ce7e096cbf0, 0xc15b609d4fd8b968, + 0x416257db4912ef24, 0xc151be4802974a67, 0xc16300925abc1630, + 0x414ebb18d2c34030, 0x415df1c8f662a87c, 0xc162343892b8d28c, + 0xc1608f693a52837e, 0x415ac66d00c810d8, 0x412382fc427c96a0, + 0xc15cfc164dc9e320, 0xc162486f39607ee9, 0xc151910e2ac0e818, + 0x411333625c861bc0); + asm volatile("vfsqrt.v v16, v8"); + // nan, nan, nan, 3101.147, nan, nan, 2006.976, + // 2801.743, nan, nan, 2649.327, 799.599, nan, nan, + // nan, 560.878 + VCMP_U64(3, v16, 0x7ff8000000000000, 0x7ff8000000000000, 0x7ff8000000000000, + 0x40a83a4b64b82189, 0x7ff8000000000000, 0x7ff8000000000000, + 0x409f5be7acad5998, 0x40a5e37c6ac52c2f, 0x7ff8000000000000, + 0x7ff8000000000000, 0x40a4b2a7466e763d, 0x4088fcca333ab72d, + 0x7ff8000000000000, 0x7ff8000000000000, 0x7ff8000000000000, + 0x40818706fb9cc11b); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // -4628.000, 5116.000, -9928.000, 9392.000, -140.875, + // 6112.000, 2598.000, 3210.000, 528.000, -3298.000, + // -3674.000, 368.250, 1712.000, -8584.000, -2080.000, + // 4336.000 + VLOAD_16(v2, 0xec85, 0x6cff, 0xf0d9, 0x7096, 0xd867, 0x6df8, 0x6913, 0x6a45, + 0x6020, 0xea71, 0xeb2d, 0x5dc1, 0x66b0, 0xf031, 0xe810, 0x6c3c); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfsqrt.v v4, v2, v0.t"); + // 0.000, 71.500, 0.000, 96.938, 0.000, 78.188, + // 0.000, 56.656, 0.000, nan, 0.000, 19.188, 0.000, + // nan, 0.000, 65.875 + VCMP_U16(4, v4, 0x0, 0x5478, 0x0, 0x560e, 0x0, 0x54e2, 0x0, 0x5315, 0x0, + 0x7e00, 0x0, 0x4ccc, 0x0, 0x7e00, 0x0, 0x541d); + + VSET(16, e32, m4); + // 53688.590, -5719.180, -59560.355, -34640.023, -22323.398, + // -52381.586, 19136.160, 13055.238, -68576.781, + // -35066.488, 62475.219, -25604.578, 54705.039, + // -19827.459, 17792.961, -28415.572 + VLOAD_32(v4, 0x4751b897, 0xc5b2b971, 0xc768a85b, 0xc7075006, 0xc6ae66cc, + 0xc74c9d96, 0x46958052, 0x464bfcf4, 0xc785f064, 0xc708fa7d, + 0x47740b38, 0xc6c80928, 0x4755b10a, 0xc69ae6eb, 0x468b01ec, + 0xc6ddff25); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfsqrt.v v8, v4, v0.t"); + // 0.000, nan, 0.000, nan, 0.000, nan, 0.000, + // 114.260, 0.000, nan, 0.000, nan, 0.000, nan, + // 0.000, nan + VCMP_U32(5, v8, 0x0, 0x7fc00000, 0x0, 0x7fc00000, 0x0, 0x7fc00000, 0x0, + 0x42e484e0, 0x0, 0x7fc00000, 0x0, 0x7fc00000, 0x0, 0x7fc00000, 0x0, + 0x7fc00000); + + VSET(16, e64, m8); + // -2532126.867, -601715.939, -7176821.248, 9617114.284, + // -4651296.040, -9962642.835, 4027953.647, 7849763.850, + // -9544132.585, -8682313.823, 7018932.012, 639358.130, + // -7598169.215, -9585529.793, -4604984.668, 314584.590 + VLOAD_64(v8, 0xc143518f6efce4ae, 0xc1225ce7e096cbf0, 0xc15b609d4fd8b968, + 0x416257db4912ef24, 0xc151be4802974a67, 0xc16300925abc1630, + 0x414ebb18d2c34030, 0x415df1c8f662a87c, 0xc162343892b8d28c, + 0xc1608f693a52837e, 0x415ac66d00c810d8, 0x412382fc427c96a0, + 0xc15cfc164dc9e320, 0xc162486f39607ee9, 0xc151910e2ac0e818, + 0x411333625c861bc0); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + asm volatile("vfsqrt.v v16, v8, v0.t"); + // 0.000, nan, 0.000, 3101.147, 0.000, nan, 0.000, + // 2801.743, 0.000, nan, 0.000, 799.599, 0.000, nan, + // 0.000, 560.878 + VCMP_U64(6, v16, 0x0, 0x7ff8000000000000, 0x0, 0x40a83a4b64b82189, 0x0, + 0x7ff8000000000000, 0x0, 0x40a5e37c6ac52c2f, 0x0, 0x7ff8000000000000, + 0x0, 0x4088fcca333ab72d, 0x0, 0x7ff8000000000000, 0x0, + 0x40818706fb9cc11b); +}; + +int main(void) { + enable_vec(); + enable_fp(); + // Change RM to RTZ since there are issues with FDIV + RNE in fpnew + // Update: there are issues also with RTZ... + CHANGE_RM(RM_RTZ); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfsub.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfsub.c new file mode 100644 index 000000000..292ee9194 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfsub.c @@ -0,0 +1,349 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values + 1 subnormal +void TEST_CASE1(void) { + VSET(16, e16, m2); + // -0.2161, 0.7432, 0.7871, 0.7583, -0.4546, -0.0478, 0.1260, + // -0.4824, 0.9282, -0.6221, 0.6543, 0.3025, -0.1420, -0.7236, + // 0.2333, -0.0269 + VLOAD_16(v4, 0xb2ea, 0x39f2, 0x3a4c, 0x3a11, 0xb746, 0xaa1f, 0x3008, 0xb7b8, + 0x3b6d, 0xb8fa, 0x393c, 0x34d7, 0xb08b, 0xb9ca, 0x3377, 0xa6e5); + // -0.3289, -0.8408, -0.1754, -0.8472, 0.7739, -0.9111, -0.3152, + // 0.4519, -0.2537, 0.9287, -0.7163, -0.2318, 0.0615, -0.2563, + // 0.1448, 0.6606 + VLOAD_16(v6, 0xb543, 0xbaba, 0xb19d, 0xbac7, 0x3a31, 0xbb4a, 0xb50b, 0x373b, + 0xb40f, 0x3b6e, 0xb9bb, 0xb36b, 0x2bde, 0xb41a, 0x30a2, 0x3949); + asm volatile("vfsub.vv v2, v4, v6"); + // 0.1128, 1.5840, 0.9624, 1.6055, -1.2285, 0.8633, 0.4412, + // -0.9346, 1.1816, -1.5508, 1.3711, 0.5342, -0.2034, + // -0.4673, 0.0885, -0.6875 + VCMP_U16(1, v2, 0x2f38, 0x3e56, 0x3bb3, 0x3e6c, 0xbcea, 0x3ae8, 0x370f, + 0xbb7a, 0x3cba, 0xbe34, 0x3d7c, 0x3846, 0xb282, 0xb77a, 0x2daa, + 0xb980); + + VSET(16, e32, m4); + // -0.12869358, 0.96847999, -0.85811919, -0.21122381, + // -0.05195865, 0.43910158, 0.86828148, -0.90407801, + // 0.62089461, -0.65907389, 0.91886526, -0.57595438, + // -0.35377914, -0.26657876, 0.49153560, 0.42637765 + VLOAD_32(v8, 0xbe03c840, 0x3f77ee4e, 0xbf5badb3, 0xbe584b0e, 0xbd54d298, + 0x3ee0d1ec, 0x3f5e47b2, 0xbf6771a8, 0x3f1ef2f3, 0xbf28b911, + 0x3f6b3ac1, 0xbf1371bf, 0xbeb5228a, 0xbe887d03, 0x3efbaa8e, + 0x3eda4e2c); + // -0.50821143, -0.56901741, -0.88642830, 0.91128469, + // -0.00441748, 0.72763014, 0.81834352, -0.49977919, + // -0.94507313, -0.60766727, 0.21069343, 0.35644454, + // -0.51639801, -0.74812186, -0.97028691, 0.42650157 + VLOAD_32(v12, 0xbf021a25, 0xbf11ab20, 0xbf62ecf7, 0x3f6949f4, 0xbb90c083, + 0x3f3a45f8, 0x3f517ef6, 0xbeffe30f, 0xbf71f050, 0xbf1b9015, + 0x3e57c005, 0x3eb67fe6, 0xbf0432a9, 0xbf3f84ea, 0xbf7864b9, + 0x3eda5e6a); + asm volatile("vfsub.vv v4, v8, v12"); + // 0.37951785, 1.53749740, 0.02830911, -1.12250853, + // -0.04754117, -0.28852856, 0.04993796, + // -0.40429881, 1.56596780, -0.05140662, 0.70817184, + // -0.93239892, 0.16261888, 0.48154309, 1.46182251, + // -0.00012392 + VCMP_U32(2, v4, 0x3ec2502a, 0x3fc4ccb7, 0x3ce7e880, 0xbf8fae5c, 0xbd42ba88, + 0xbe93ba04, 0x3d4c8bc0, 0xbecf0041, 0x3fc871a2, 0xbd528fc0, + 0x3f354ac0, 0xbf6eb1b2, 0x3e268590, 0x3ef68cd1, 0x3fbb1d00, + 0xb901f000); + + VSET(16, e64, m8); + // -0.5053356652713634, -0.6291854947278097, 0.6181258713941662, + // -0.6097328085365348, 0.8960683065358290, 0.1233825892982841, + // -0.7071646124826323, -0.6783334309218909, 0.3533001486660008, + // 0.4732651306122215, -0.7335080825789513, -0.9296500813876505, + // 0.5349827137885166, -0.0621174552558810, -0.8122743533756343, + // -0.8908485518923974 + VLOAD_64(v16, 0xbfe02bb5b37af91c, 0xbfe422499e5f271a, 0x3fe3c7afe84e61dc, + 0xbfe382ee60fece00, 0x3fecac9770f1b62e, 0x3fbf960059ee92f0, + 0xbfe6a117ae700ba0, 0xbfe5b4e84fb2f9d4, 0x3fd69c783a0c5078, + 0x3fde49f9d4944428, 0xbfe778e5f140e788, 0xbfedbfb18709140c, + 0x3fe11e941174b448, 0xbfafcddbedab64a0, 0xbfe9fe26c8e417ba, + 0xbfec81d4d2822346); + // -0.6041772411195545, 0.1691588460867453, -0.3855578735230800, + // -0.9206749118255901, 0.7025181961160538, -0.9905598942344518, + // 0.9510997049380876, 0.2754176494545910, 0.5271936205102918, + // 0.8778238674058336, 0.9294006140978470, -0.8775508592745904, + // 0.7472392658861982, -0.3880038279796372, -0.6483706997783654, + // -0.1530785884604509 + VLOAD_64(v24, 0xbfe3556b82731260, 0x3fc5a6ff3fe2c608, 0xbfd8acfaee5fcdc0, + 0xbfed762b3b913c28, 0x3fe67b0770a53a4a, 0xbfefb2aaa9ceeb06, + 0x3fee6f68a5fe3800, 0x3fd1a071594983a8, 0x3fe0dec527d80c9a, + 0x3fec172214450060, 0x3fedbda65b4dd79c, 0xbfec14e58a252770, + 0x3fe7e962522895aa, 0xbfd8d50e01f94d70, 0xbfe4bf73e8e77264, + 0xbfc398144593e6c0); + asm volatile("vfsub.vv v8, v16, v24"); + // 0.0988415758481911, -0.7983443408145550, 1.0036837449172462, + // 0.3109421032890554, 0.1935501104197752, 1.1139424835327358, + // -1.6582643174207199, -0.9537510803764819, + // -0.1738934718442910, -0.4045587367936121, + // -1.6629086966767983, -0.0520992221130601, + // -0.2122565520976816, 0.3258863727237562, + // -0.1639036535972689, -0.7377699634319466 + VCMP_U64(3, v8, 0x3fb94dae77c0ca20, 0xbfe98c096e57d89c, 0x3ff00f16afbf245e, + 0x3fd3e679b524dc50, 0x3fc8c6400131ef90, 0x3ff1d2b55a865eb2, + 0xbffa88402a3721d0, 0xbfee8520fc57bba8, 0xbfc642242b479178, + 0xbfd9e44a53f5bc98, 0xbffa9b4626475f92, 0xbfaaacbfce3ec9c0, + 0xbfcb2b3902cf8588, 0x3fd4db528443e0dc, 0xbfc4facb7ff29558, + 0xbfe79bcfc11d2996); +}; + +// Simple random test with similar values + 1 subnormal (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // -0.2161, 0.7432, 0.7871, 0.7583, -0.4546, -0.0478, 0.1260, + // -0.4824, 0.9282, -0.6221, 0.6543, 0.3025, -0.1420, -0.7236, + // 0.2333, -0.0269 + VLOAD_16(v4, 0xb2ea, 0x39f2, 0x3a4c, 0x3a11, 0xb746, 0xaa1f, 0x3008, 0xb7b8, + 0x3b6d, 0xb8fa, 0x393c, 0x34d7, 0xb08b, 0xb9ca, 0x3377, 0xa6e5); + // -0.3289, -0.8408, -0.1754, -0.8472, 0.7739, -0.9111, -0.3152, + // 0.4519, -0.2537, 0.9287, -0.7163, -0.2318, 0.0615, -0.2563, + // 0.1448, 0.6606 + VLOAD_16(v6, 0xb543, 0xbaba, 0xb19d, 0xbac7, 0x3a31, 0xbb4a, 0xb50b, 0x373b, + 0xb40f, 0x3b6e, 0xb9bb, 0xb36b, 0x2bde, 0xb41a, 0x30a2, 0x3949); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfsub.vv v2, v4, v6, v0.t"); + // 0.0000, 1.5840, 0.0000, 1.6055, 0.0000, 0.8633, 0.0000, + // -0.9346, 0.0000, -1.5508, 0.0000, 0.5342, 0.0000, + // -0.4673, 0.0000, -0.6875 + VCMP_U16(4, v2, 0x0, 0x3e56, 0x0, 0x3e6c, 0x0, 0x3ae8, 0x0, 0xbb7a, 0x0, + 0xbe34, 0x0, 0x3846, 0x0, 0xb77a, 0x0, 0xb980); + + VSET(16, e32, m4); + // -0.12869358, 0.96847999, -0.85811919, -0.21122381, + // -0.05195865, 0.43910158, 0.86828148, -0.90407801, + // 0.62089461, -0.65907389, 0.91886526, -0.57595438, + // -0.35377914, -0.26657876, 0.49153560, 0.42637765 + VLOAD_32(v8, 0xbe03c840, 0x3f77ee4e, 0xbf5badb3, 0xbe584b0e, 0xbd54d298, + 0x3ee0d1ec, 0x3f5e47b2, 0xbf6771a8, 0x3f1ef2f3, 0xbf28b911, + 0x3f6b3ac1, 0xbf1371bf, 0xbeb5228a, 0xbe887d03, 0x3efbaa8e, + 0x3eda4e2c); + // -0.50821143, -0.56901741, -0.88642830, 0.91128469, + // -0.00441748, 0.72763014, 0.81834352, -0.49977919, + // -0.94507313, -0.60766727, 0.21069343, 0.35644454, + // -0.51639801, -0.74812186, -0.97028691, 0.42650157 + VLOAD_32(v12, 0xbf021a25, 0xbf11ab20, 0xbf62ecf7, 0x3f6949f4, 0xbb90c083, + 0x3f3a45f8, 0x3f517ef6, 0xbeffe30f, 0xbf71f050, 0xbf1b9015, + 0x3e57c005, 0x3eb67fe6, 0xbf0432a9, 0xbf3f84ea, 0xbf7864b9, + 0x3eda5e6a); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfsub.vv v4, v8, v12, v0.t"); + // 0.00000000, 1.53749740, 0.00000000, -1.12250853, + // 0.00000000, -0.28852856, 0.00000000, -0.40429881, + // 0.00000000, -0.05140662, 0.00000000, -0.93239892, + // 0.00000000, 0.48154309, 0.00000000, -0.00012392 + VCMP_U32(5, v4, 0x0, 0x3fc4ccb7, 0x0, 0xbf8fae5c, 0x0, 0xbe93ba04, 0x0, + 0xbecf0041, 0x0, 0xbd528fc0, 0x0, 0xbf6eb1b2, 0x0, 0x3ef68cd1, 0x0, + 0xb901f000); + + VSET(16, e64, m8); + // -0.5053356652713634, -0.6291854947278097, 0.6181258713941662, + // -0.6097328085365348, 0.8960683065358290, 0.1233825892982841, + // -0.7071646124826323, -0.6783334309218909, 0.3533001486660008, + // 0.4732651306122215, -0.7335080825789513, -0.9296500813876505, + // 0.5349827137885166, -0.0621174552558810, -0.8122743533756343, + // -0.8908485518923974 + VLOAD_64(v16, 0xbfe02bb5b37af91c, 0xbfe422499e5f271a, 0x3fe3c7afe84e61dc, + 0xbfe382ee60fece00, 0x3fecac9770f1b62e, 0x3fbf960059ee92f0, + 0xbfe6a117ae700ba0, 0xbfe5b4e84fb2f9d4, 0x3fd69c783a0c5078, + 0x3fde49f9d4944428, 0xbfe778e5f140e788, 0xbfedbfb18709140c, + 0x3fe11e941174b448, 0xbfafcddbedab64a0, 0xbfe9fe26c8e417ba, + 0xbfec81d4d2822346); + // -0.6041772411195545, 0.1691588460867453, -0.3855578735230800, + // -0.9206749118255901, 0.7025181961160538, -0.9905598942344518, + // 0.9510997049380876, 0.2754176494545910, 0.5271936205102918, + // 0.8778238674058336, 0.9294006140978470, -0.8775508592745904, + // 0.7472392658861982, -0.3880038279796372, -0.6483706997783654, + // -0.1530785884604509 + VLOAD_64(v24, 0xbfe3556b82731260, 0x3fc5a6ff3fe2c608, 0xbfd8acfaee5fcdc0, + 0xbfed762b3b913c28, 0x3fe67b0770a53a4a, 0xbfefb2aaa9ceeb06, + 0x3fee6f68a5fe3800, 0x3fd1a071594983a8, 0x3fe0dec527d80c9a, + 0x3fec172214450060, 0x3fedbda65b4dd79c, 0xbfec14e58a252770, + 0x3fe7e962522895aa, 0xbfd8d50e01f94d70, 0xbfe4bf73e8e77264, + 0xbfc398144593e6c0); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfsub.vv v8, v16, v24, v0.t"); + // 0.0000000000000000, -0.7983443408145550, 0.0000000000000000, + // 0.3109421032890554, 0.0000000000000000, 1.1139424835327358, + // 0.0000000000000000, -0.9537510803764819, 0.0000000000000000, + // -0.4045587367936121, 0.0000000000000000, + // -0.0520992221130601, 0.0000000000000000, 0.3258863727237562, + // 0.0000000000000000, -0.7377699634319466 + VCMP_U64(6, v8, 0x0, 0xbfe98c096e57d89c, 0x0, 0x3fd3e679b524dc50, 0x0, + 0x3ff1d2b55a865eb2, 0x0, 0xbfee8520fc57bba8, 0x0, 0xbfd9e44a53f5bc98, + 0x0, 0xbfaaacbfce3ec9c0, 0x0, 0x3fd4db528443e0dc, 0x0, + 0xbfe79bcfc11d2996); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + // 0.9727, 0.7676, 0.0876, -0.4526, -0.1158, 0.6221, 0.7612, + // -0.7539, 0.3875, -0.2002, 0.2168, -0.1055, -0.4348, 0.9795, + // 0.3650, 0.5171 + VLOAD_16(v4, 0x3bc8, 0x3a24, 0x2d9c, 0xb73e, 0xaf6a, 0x38fa, 0x3a17, 0xba08, + 0x3633, 0xb268, 0x32f0, 0xaec0, 0xb6f5, 0x3bd6, 0x35d7, 0x3823); + double dscalar_16; + // -0.8667 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xbaef); + asm volatile("vfsub.vf v2, v4, %[A]" ::[A] "f"(dscalar_16)); + // 1.8398, 1.6348, 0.9541, 0.4141, 0.7510, 1.4883, 1.6279, + // 0.1128, 1.2539, 0.6665, 1.0840, 0.7612, + // 0.4319, 1.8457, 1.2314, 1.3838 + VCMP_U16(7, v2, 0x3f5c, 0x3e8a, 0x3ba2, 0x36a0, 0x3a02, 0x3df4, 0x3e83, + 0x2f38, 0x3d04, 0x3955, 0x3c56, 0x3a17, 0x36e9, 0x3f62, 0x3ced, + 0x3d89); + + VSET(16, e32, m4); + // 0.85933530, -0.31821987, 0.18340160, -0.58902484, + // -0.83326858, -0.98716992, -0.74268776, -0.50486410, + // 0.91496444, -0.46108878, -0.75265163, -0.17853038, + // 0.09909800, -0.22828153, 0.31248060, 0.70940411 + VLOAD_32(v8, 0x3f5bfd66, 0xbea2edb7, 0x3e3bcda1, 0xbf16ca55, 0xbf555117, + 0xbf7cb72b, 0xbf3e20c9, 0xbf013ec6, 0x3f6a3b1c, 0xbeec13d4, + 0xbf40adc7, 0xbe36d0ab, 0x3dcaf3e5, 0xbe69c2a2, 0x3e9ffd75, + 0x3f359b82); + double dscalar_32; + // -0.16449618 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbe2871b0); + asm volatile("vfsub.vf v4, v8, %[A]" ::[A] "f"(dscalar_32)); + // 1.02383149, -0.15372369, 0.34789777, -0.42452866, + // -0.66877240, -0.82267374, -0.57819158, + // -0.34036791, 1.07946062, -0.29659259, -0.58815545, + // -0.01403420, 0.26359418, -0.06378534, 0.47697678, + // 0.87390029 + VCMP_U32(8, v4, 0x3f830ce9, 0xbe1d69be, 0x3eb21fa8, 0xbed95bd2, 0xbf2b34ab, + 0xbf529abf, 0xbf14045d, 0xbeae44b4, 0x3f8a2bc4, 0xbe97dafc, + 0xbf16915b, 0xbc65efb0, 0x3e86f5d1, 0xbd82a1e4, 0x3ef4364d, + 0x3f5fb7ee); + + VSET(16, e64, m8); + // -0.5270370833343294, -0.3892108170289901, 0.3278104985181656, + // 0.8978904717616114, 0.2838388271052681, 0.1890152734369528, + // -0.5587120809764872, 0.0329118609761476, 0.2661042157694802, + // 0.4284631655495406, 0.3525175873513684, -0.7218762878034530, + // -0.1902187411555145, 0.0621279131630217, + // -0.3175600204168794, 0.2653267716685161 + VLOAD_64(v16, 0xbfe0dd7cdf9667ce, 0xbfd8e8d47c98e498, 0x3fd4fad8e29af14c, + 0x3fecbb84cc736570, 0x3fd22a6a53f022d0, 0x3fc831a708ed9848, + 0xbfe1e0f82875925c, 0x3fa0d9d2cd160b00, 0x3fd107d9fa03b074, + 0x3fdb6bf0c4e4dbb8, 0x3fd68fa5ed3c17c4, 0xbfe7199c4cfbf578, + 0xbfc85916742cb360, 0x3fafcf3ad6686660, 0xbfd452e7438d4924, + 0x3fd0fb1d23c47348); + double dscalar_64; + // -0.3447987329466446 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfd6112eb46d5120); + asm volatile("vfsub.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); + // -0.1822383503876848, -0.0444120840823454, + // 0.6726092314648102, 1.2426892047082561, 0.6286375600519127, + // 0.5338140063835974, -0.2139133480298425, 0.3777105939227923, + // 0.6109029487161248, 0.7732618984961852, 0.6973163202980130, + // -0.3770775548568084, 0.1545799917911301, 0.4069266461096663, + // 0.0272387125297653, 0.6101255046151608 + VCMP_U64(9, v8, 0xbfc75396157efcf8, 0xbfa6bd2e415c9bc0, 0x3fe58603cb842136, + 0x3ff3e20e13550700, 0x3fe41dcc842eb9f8, 0x3fe115011c720ea2, + 0xbfcb618338fba730, 0x3fd82c690e101280, 0x3fe38c84573880ca, + 0x3fe8be8fbca9166c, 0x3fe6506a50d4b472, 0xbfd82209e58a99d0, + 0x3fc3c946f4adeee0, 0x3fda0b160f3a5dec, 0x3f9be4770e007fc0, + 0x3fe38625ec18e234); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + // 0.9727, 0.7676, 0.0876, -0.4526, -0.1158, 0.6221, 0.7612, + // -0.7539, 0.3875, -0.2002, 0.2168, -0.1055, -0.4348, + // 0.9795, 0.3650, 0.5171 + VLOAD_16(v4, 0x3bc8, 0x3a24, 0x2d9c, 0xb73e, 0xaf6a, 0x38fa, 0x3a17, 0xba08, + 0x3633, 0xb268, 0x32f0, 0xaec0, 0xb6f5, 0x3bd6, 0x35d7, 0x3823); + double dscalar_16; + // -0.8667 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xbaef); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vfsub.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + // 0.0000, 1.6348, 0.0000, 0.4141, 0.0000, 1.4883, 0.0000, + // 0.1128, 0.0000, 0.6665, 0.0000, 0.7612, 0.0000, 1.8457, + // 0.0000, 1.3838 + VCMP_U16(10, v2, 0x0, 0x3e8a, 0x0, 0x36a0, 0x0, 0x3df4, 0x0, 0x2f38, 0x0, + 0x3955, 0x0, 0x3a17, 0x0, 0x3f62, 0x0, 0x3d89); + + VSET(16, e32, m4); + // 0.85933530, -0.31821987, 0.18340160, -0.58902484, + // -0.83326858, -0.98716992, -0.74268776, -0.50486410, + // 0.91496444, -0.46108878, -0.75265163, -0.17853038, + // 0.09909800, -0.22828153, 0.31248060, 0.70940411 + VLOAD_32(v8, 0x3f5bfd66, 0xbea2edb7, 0x3e3bcda1, 0xbf16ca55, 0xbf555117, + 0xbf7cb72b, 0xbf3e20c9, 0xbf013ec6, 0x3f6a3b1c, 0xbeec13d4, + 0xbf40adc7, 0xbe36d0ab, 0x3dcaf3e5, 0xbe69c2a2, 0x3e9ffd75, + 0x3f359b82); + double dscalar_32; + // -0.16449618 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xbe2871b0); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vfsub.vf v4, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + // 0.00000000, -0.15372369, 0.00000000, -0.42452866, + // 0.00000000, -0.82267374, 0.00000000, -0.34036791, + // 0.00000000, -0.29659259, 0.00000000, -0.01403420, + // 0.00000000, -0.06378534, 0.00000000, 0.87390029 + VCMP_U32(11, v4, 0x0, 0xbe1d69be, 0x0, 0xbed95bd2, 0x0, 0xbf529abf, 0x0, + 0xbeae44b4, 0x0, 0xbe97dafc, 0x0, 0xbc65efb0, 0x0, 0xbd82a1e4, 0x0, + 0x3f5fb7ee); + + VSET(16, e64, m8); + // -0.5270370833343294, -0.3892108170289901, + // 0.3278104985181656, 0.8978904717616114, 0.2838388271052681, + // 0.1890152734369528, -0.5587120809764872, 0.0329118609761476, + // 0.2661042157694802, 0.4284631655495406, 0.3525175873513684, + // -0.7218762878034530, -0.1902187411555145, + // 0.0621279131630217, -0.3175600204168794, 0.2653267716685161 + VLOAD_64(v16, 0xbfe0dd7cdf9667ce, 0xbfd8e8d47c98e498, 0x3fd4fad8e29af14c, + 0x3fecbb84cc736570, 0x3fd22a6a53f022d0, 0x3fc831a708ed9848, + 0xbfe1e0f82875925c, 0x3fa0d9d2cd160b00, 0x3fd107d9fa03b074, + 0x3fdb6bf0c4e4dbb8, 0x3fd68fa5ed3c17c4, 0xbfe7199c4cfbf578, + 0xbfc85916742cb360, 0x3fafcf3ad6686660, 0xbfd452e7438d4924, + 0x3fd0fb1d23c47348); + double dscalar_64; + // -0.3447987329466446 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfd6112eb46d5120); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vfsub.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); + // 0.0000000000000000, -0.0444120840823454, 0.0000000000000000, + // 1.2426892047082561, 0.0000000000000000, 0.5338140063835974, + // 0.0000000000000000, 0.3777105939227923, 0.0000000000000000, + // 0.7732618984961852, 0.0000000000000000, + // -0.3770775548568084, 0.0000000000000000, + // 0.4069266461096663, 0.0000000000000000, 0.6101255046151608 + VCMP_U64(12, v8, 0x0, 0xbfa6bd2e415c9bc0, 0x0, 0x3ff3e20e13550700, 0x0, + 0x3fe115011c720ea2, 0x0, 0x3fd82c690e101280, 0x0, 0x3fe8be8fbca9166c, + 0x0, 0xbfd82209e58a99d0, 0x0, 0x3fda0b160f3a5dec, 0x0, + 0x3fe38625ec18e234); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwadd.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwadd.c new file mode 100644 index 000000000..53d9bdcb2 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwadd.c @@ -0,0 +1,531 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot +// Matteo Perotti + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 10.5312, 79.6250, 12.2891, 89.5000, 62.5938, 53.5625, + // -37.3438, -48.3750, 49.3438, 1.1475, -79.6250, 52.0000, + // -61.0312, 5.1641, 48.7500, -25.6250 + VLOAD_16(v4, 0x4944, 0x54fa, 0x4a25, 0x5598, 0x53d3, 0x52b2, 0xd0ab, 0xd20c, + 0x522b, 0x3c97, 0xd4fa, 0x5280, 0xd3a1, 0x452a, 0x5218, 0xce68); + // 68.1875, + // -7.0742, 20.2656, 72.3125, 88.5625, 36.0000, 96.6250, 70.4375, + // -87.6875, 87.6875, 8.7891, -18.8906, -40.1562, 88.8750, + // -55.5312, 2.1875 + VLOAD_16(v6, 0x5443, 0xc713, 0x4d11, 0x5485, 0x5589, 0x5080, 0x560a, 0x5467, + 0xd57b, 0x557b, 0x4865, 0xccb9, 0xd105, 0x558e, 0xd2f1, 0x4060); + asm volatile("vfwadd.vv v8, v4, v6"); + // 78.71875000, 72.55078125, 32.55468750, 161.81250000, + // 151.15625000, 89.56250000, 59.28125000, 22.06250000, + // -38.34375000, 88.83496094, -70.83593750, 33.10937500, + // -101.18750000, 94.03906250, -6.78125000, -23.43750000 + VCMP_U32(1, v8, 0x429d7000, 0x42911a00, 0x42023800, 0x4321d000, 0x43172800, + 0x42b32000, 0x426d2000, 0x41b08000, 0xc2196000, 0x42b1ab80, + 0xc28dac00, 0x42047000, 0xc2ca6000, 0x42bc1400, 0xc0d90000, + 0xc1bb8000); + + VSET(16, e32, m4); + // -35386.17187500, -52670.69531250, 69391.31250000, + // 3219.84130859, 74596.35156250, -45488.69921875, + // 6598.72949219, 20221.24609375, 75105.62500000, + // 67354.82031250, 32844.19140625, -73470.18750000, + // -28377.67382812, 61998.10937500, 24691.54296875, + // -29873.45507812 + VLOAD_32(v8, 0xc70a3a2c, 0xc74dbeb2, 0x478787a8, 0x45493d76, 0x4791b22d, + 0xc731b0b3, 0x45ce35d6, 0x469dfa7e, 0x4792b0d0, 0x47838d69, + 0x47004c31, 0xc78f7f18, 0xc6ddb359, 0x47722e1c, 0x46c0e716, + 0xc6e962e9); + // -90937.21875000, -72297.07031250, 18867.19531250, + // -516.01525879, -13301.11425781, 85173.41406250, + // -32079.35546875, -23770.60937500, 39295.43359375, + // 38042.19140625, -61343.24218750, 76844.01562500, + // 26642.50390625, 91893.05468750, 88349.72656250, + // 29134.96093750 + VLOAD_32(v12, 0xc7b19c9c, 0xc78d3489, 0x46936664, 0xc40100fa, 0xc64fd475, + 0x47a65ab5, 0xc6fa9eb6, 0xc6b9b538, 0x47197f6f, 0x47149a31, + 0xc76f9f3e, 0x47961602, 0x46d02502, 0x47b37a87, 0x47ac8edd, + 0x46e39dec); + asm volatile("vfwadd.vv v16, v8, v12"); + // -126323.3906250000000000, -124967.7656250000000000, + // 88258.5078125000000000, 2703.8260498046875000, + // 61295.2373046875000000, 39684.7148437500000000, + // -25480.6259765625000000, -3549.3632812500000000, + // 114401.0585937500000000, 105397.0117187500000000, + // -28499.0507812500000000, 3373.8281250000000000, + // -1735.1699218750000000, 153891.1640625000000000, + // 113041.2695312500000000, -738.4941406250000000 + VCMP_U64(2, v16, 0xc0fed73640000000, 0xc0fe827c40000000, 0x40f58c2820000000, + 0x40a51fa6f0000000, 0x40edede798000000, 0x40e36096e0000000, + 0xc0d8e22810000000, 0xc0abbaba00000000, 0x40fbee10f0000000, + 0x40f9bb5030000000, 0xc0dbd4c340000000, 0x40aa5ba800000000, + 0xc09b1cae00000000, 0x4102c91950000000, 0x40fb991450000000, + 0xc08713f400000000); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // 10.5312, 79.6250, 12.2891, 89.5000, 62.5938, 53.5625, + // -37.3438, -48.3750, 49.3438, 1.1475, -79.6250, 52.0000, + // -61.0312, 5.1641, 48.7500, -25.6250 + VLOAD_16(v4, 0x4944, 0x54fa, 0x4a25, 0x5598, 0x53d3, 0x52b2, 0xd0ab, 0xd20c, + 0x522b, 0x3c97, 0xd4fa, 0x5280, 0xd3a1, 0x452a, 0x5218, 0xce68); + // 68.1875, + // -7.0742, 20.2656, 72.3125, 88.5625, 36.0000, 96.6250, 70.4375, + // -87.6875, 87.6875, 8.7891, -18.8906, -40.1562, 88.8750, + // -55.5312, 2.1875 + VLOAD_16(v6, 0x5443, 0xc713, 0x4d11, 0x5485, 0x5589, 0x5080, 0x560a, 0x5467, + 0xd57b, 0x557b, 0x4865, 0xccb9, 0xd105, 0x558e, 0xd2f1, 0x4060); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfwadd.vv v8, v4, v6, v0.t"); + // 0.00000000, 72.55078125, 0.00000000, 161.81250000, + // 0.00000000, 89.56250000, 0.00000000, 22.06250000, + // 0.00000000, 88.83496094, 0.00000000, 33.10937500, + // 0.00000000, 94.03906250, 0.00000000, -23.43750000 + VCMP_U32(3, v8, 0x0, 0x42911a00, 0x0, 0x4321d000, 0x0, 0x42b32000, 0x0, + 0x41b08000, 0x0, 0x42b1ab80, 0x0, 0x42047000, 0x0, 0x42bc1400, 0x0, + 0xc1bb8000); + + VSET(16, e32, m4); + // -35386.17187500, -52670.69531250, 69391.31250000, + // 3219.84130859, 74596.35156250, -45488.69921875, + // 6598.72949219, 20221.24609375, 75105.62500000, + // 67354.82031250, 32844.19140625, -73470.18750000, + // -28377.67382812, 61998.10937500, 24691.54296875, + // -29873.45507812 + VLOAD_32(v8, 0xc70a3a2c, 0xc74dbeb2, 0x478787a8, 0x45493d76, 0x4791b22d, + 0xc731b0b3, 0x45ce35d6, 0x469dfa7e, 0x4792b0d0, 0x47838d69, + 0x47004c31, 0xc78f7f18, 0xc6ddb359, 0x47722e1c, 0x46c0e716, + 0xc6e962e9); + // -90937.21875000, -72297.07031250, 18867.19531250, + // -516.01525879, -13301.11425781, 85173.41406250, + // -32079.35546875, -23770.60937500, 39295.43359375, + // 38042.19140625, -61343.24218750, 76844.01562500, + // 26642.50390625, 91893.05468750, 88349.72656250, + // 29134.96093750 + VLOAD_32(v12, 0xc7b19c9c, 0xc78d3489, 0x46936664, 0xc40100fa, 0xc64fd475, + 0x47a65ab5, 0xc6fa9eb6, 0xc6b9b538, 0x47197f6f, 0x47149a31, + 0xc76f9f3e, 0x47961602, 0x46d02502, 0x47b37a87, 0x47ac8edd, + 0x46e39dec); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfwadd.vv v16, v8, v12, v0.t"); + // 0.0000000000000000, -124967.7656250000000000, + // 0.0000000000000000, 2703.8260498046875000, + // 0.0000000000000000, 39684.7148437500000000, + // 0.0000000000000000, -3549.3632812500000000, + // 0.0000000000000000, 105397.0117187500000000, + // 0.0000000000000000, 3373.8281250000000000, + // 0.0000000000000000, 153891.1640625000000000, + // 0.0000000000000000, -738.4941406250000000 + VCMP_U64(4, v16, 0x0, 0xc0fe827c40000000, 0x0, 0x40a51fa6f0000000, 0x0, + 0x40e36096e0000000, 0x0, 0xc0abbaba00000000, 0x0, 0x40f9bb5030000000, + 0x0, 0x40aa5ba800000000, 0x0, 0x4102c91950000000, 0x0, + 0xc08713f400000000); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // 12.5859 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x4a4b); + // 2.1094, 59.2188, 89.4375, 75.2500, -4.0742, 64.7500, + // -82.9375, -84.9375, -48.0625, + // -6.1016, 29.9688, 10.6172, 52.5938, -8.5000, 70.5000, + // -83.6875 + VLOAD_16(v4, 0x4038, 0x5367, 0x5597, 0x54b4, 0xc413, 0x540c, 0xd52f, 0xd54f, + 0xd202, 0xc61a, 0x4f7e, 0x494f, 0x5293, 0xc840, 0x5468, 0xd53b); + asm volatile("vfwadd.vf v8, v4, %[A]" ::[A] "f"(dscalar_16)); + // 14.69531250, 71.80468750, + // 102.02343750, 87.83593750, 8.51171875, 77.33593750, + // -70.35156250, -72.35156250, + // -35.47656250, 6.48437500, 42.55468750, 23.20312500, 65.17968750, + // 4.08593750, 83.08593750, -71.10156250 + VCMP_U32(5, v8, 0x416b2000, 0x428f9c00, 0x42cc0c00, 0x42afac00, 0x41083000, + 0x429aac00, 0xc28cb400, 0xc290b400, 0xc20de800, 0x40cf8000, + 0x422a3800, 0x41b9a000, 0x42825c00, 0x4082c000, 0x42a62c00, + 0xc28e3400); + + VSET(16, e32, m4); + double dscalar_32; + // -497871.25000000 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xc8f319e8); + // -568507.00000000, 457995.96875000, 409553.18750000, + // -797866.87500000, 263520.50000000, 290072.90625000, + // 530785.75000000, 226941.43750000, 285558.65625000, + // -987117.31250000, -423428.21875000, -140664.31250000, + // -844580.25000000, 549948.50000000, 304879.87500000, + // -655518.43750000 + VLOAD_32(v8, 0xc90acbb0, 0x48dfa17f, 0x48c7fa26, 0xc942caae, 0x4880ac10, + 0x488da31d, 0x4901961c, 0x485d9f5c, 0x488b6ed5, 0xc970fed5, + 0xc8cec087, 0xc8095e14, 0xc94e3244, 0x490643c8, 0x4894ddfc, + 0xc92009e7); + asm volatile("vfwadd.vf v16, v8, %[A]" ::[A] "f"(dscalar_32)); + // -1066378.2500000000000000, -39875.2812500000000000, + // -88318.0625000000000000, -1295738.1250000000000000, + // -234350.7500000000000000, -207798.3437500000000000, + // 32914.5000000000000000, -270929.8125000000000000, + // -212312.5937500000000000, -1484988.5625000000000000, + // -921299.4687500000000000, -638535.5625000000000000, + // -1342451.5000000000000000, 52077.2500000000000000, + // -192991.3750000000000000, -1153389.6875000000000000 + VCMP_U64(6, v16, 0xc130458a40000000, 0xc0e3786900000000, 0xc0f58fe100000000, + 0xc133c57a20000000, 0xc10c9b7600000000, 0xc1095db2c0000000, + 0x40e0125000000000, 0xc110894740000000, 0xc109eac4c0000000, + 0xc136a8bc90000000, 0xc12c1da6f0000000, 0xc1237c8f20000000, + 0xc1347bf380000000, 0x40e96da800000000, 0xc1078efb00000000, + 0xc131996db0000000); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // 12.5859 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x4a4b); + // 2.1094, 59.2188, 89.4375, 75.2500, -4.0742, 64.7500, + // -82.9375, -84.9375, -48.0625, + // -6.1016, 29.9688, 10.6172, 52.5938, -8.5000, 70.5000, + // -83.6875 + VLOAD_16(v4, 0x4038, 0x5367, 0x5597, 0x54b4, 0xc413, 0x540c, 0xd52f, 0xd54f, + 0xd202, 0xc61a, 0x4f7e, 0x494f, 0x5293, 0xc840, 0x5468, 0xd53b); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfwadd.vf v8, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + // 0.00000000, 71.80468750, 0.00000000, 87.83593750, + // 0.00000000, 77.33593750, 0.00000000, -72.35156250, + // 0.00000000, 6.48437500, 0.00000000, 23.20312500, + // 0.00000000, 4.08593750, 0.00000000, -71.10156250 + VCMP_U32(7, v8, 0x0, 0x428f9c00, 0x0, 0x42afac00, 0x0, 0x429aac00, 0x0, + 0xc290b400, 0x0, 0x40cf8000, 0x0, 0x41b9a000, 0x0, 0x4082c000, 0x0, + 0xc28e3400); + + VSET(16, e32, m4); + double dscalar_32; + // -497871.25000000 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xc8f319e8); + // -568507.00000000, 457995.96875000, 409553.18750000, + // -797866.87500000, 263520.50000000, 290072.90625000, + // 530785.75000000, 226941.43750000, 285558.65625000, + // -987117.31250000, -423428.21875000, -140664.31250000, + // -844580.25000000, 549948.50000000, 304879.87500000, + // -655518.43750000 + VLOAD_32(v8, 0xc90acbb0, 0x48dfa17f, 0x48c7fa26, 0xc942caae, 0x4880ac10, + 0x488da31d, 0x4901961c, 0x485d9f5c, 0x488b6ed5, 0xc970fed5, + 0xc8cec087, 0xc8095e14, 0xc94e3244, 0x490643c8, 0x4894ddfc, + 0xc92009e7); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfwadd.vf v16, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + // 0.0000000000000000, -39875.2812500000000000, + // 0.0000000000000000, -1295738.1250000000000000, + // 0.0000000000000000, -207798.3437500000000000, + // 0.0000000000000000, -270929.8125000000000000, + // 0.0000000000000000, -1484988.5625000000000000, + // 0.0000000000000000, -638535.5625000000000000, + // 0.0000000000000000, 52077.2500000000000000, + // 0.0000000000000000, -1153389.6875000000000000 + VCMP_U64(8, v16, 0x0, 0xc0e3786900000000, 0x0, 0xc133c57a20000000, 0x0, + 0xc1095db2c0000000, 0x0, 0xc110894740000000, 0x0, 0xc136a8bc90000000, + 0x0, 0xc1237c8f20000000, 0x0, 0x40e96da800000000, 0x0, + 0xc131996db0000000); +}; + +// Simple random test with similar values +void TEST_CASE5(void) { + VSET(16, e16, m2); + // -4.22890615, 94.35176849, -2.66183305, 81.53971863, + // -30.80995941, -61.45680237, 53.70308304, 26.43629074, + // -50.49792862, 12.57134342, -18.77090454, -0.50017655, + // -33.71574402, 14.64656544, 89.57390594, 94.32437897 + VLOAD_32(v4, 0xc0875333, 0x42bcb41b, 0xc02a5b79, 0x42a31456, 0xc1f67acc, + 0xc275d3c4, 0x4256cff5, 0x41d37d86, 0xc249fde1, 0x41492439, + 0xc1962ad0, 0xbf000b92, 0xc206dcec, 0x416a5855, 0x42b325d7, + 0x42bca615); + // -38.1875, + // -22.7500, 51.3438, 8.8594, 23.0938, 32.6875, 71.7500, 7.6758, + // -12.8047, 98.0625, -8.7734, -73.5625, + // -59.0312, 44.9688, 63.8438, 30.1406 + VLOAD_16(v8, 0xd0c6, 0xcdb0, 0x526b, 0x486e, 0x4dc6, 0x5016, 0x547c, 0x47ad, + 0xca67, 0x5621, 0xc863, 0xd499, 0xd361, 0x519f, 0x53fb, 0x4f89); + asm volatile("vfwadd.wv v12, v4, v8"); + // -42.41640472, 71.60176849, 48.68191528, 90.39909363, + // -7.71620941, -28.76930237, 125.45307922, 34.11207199, + // -63.30261612, 110.63384247, -27.54434204, -74.06267548, + // -92.74699402, 59.61531448, 153.41766357, 124.46500397 + VCMP_U32(9, v12, 0xc229aa66, 0x428f341b, 0x4242ba48, 0x42b4cc56, 0xc0f6eb30, + 0xc1e62788, 0x42fae7fa, 0x420872c3, 0xc27d35e1, 0x42dd4487, + 0xc1dc5ad0, 0xc2942017, 0xc2b97e76, 0x426e7615, 0x43196aec, + 0x42f8ee15); + + VSET(16, e32, m4); + // -55997.9824854041071376, 64501.1750668793683872, + // -29542.1742966430028901, -97235.1376669598394074, + // -76290.1568635256844573, -53719.7602741207738291, + // -71738.9507989753619768, 76087.4621994893532246, + // 88201.1680542646208778, -10676.6526112916035345, + // -87188.1793410585087258, 19855.3190320774738211, + // 17509.5892884960630909, 30185.4023848686192650, + // 54203.3669640090665780, -57884.3948619379953016 + VLOAD_64(v8, 0xc0eb57bf70853aee, 0x40ef7ea59a25db30, 0xc0dcd98b27ad1b60, + 0xc0f7bd3233e24524, 0xc0f2a0228283540c, 0xc0ea3af8542a6497, + 0xc0f183af3678fc84, 0x40f29377652b4ab0, 0x40f58892b059ab28, + 0xc0c4da5388c44d38, 0xc0f54942de94bad2, 0x40d363d46b0584c8, + 0x40d11965b6e718a0, 0x40dd7a59c0ac76c8, 0x40ea776bbe2b4e38, + 0xc0ec438ca2b580c4); + // -73536.78125000, 57454.64062500, -32693.95507812, + // -56205.09375000, 12513.70898438, 20858.82226562, + // 12284.94335938, 61625.35156250, -11893.36132812, + // -46430.30078125, 30247.85937500, -94111.64843750, + // 89016.30468750, -52090.74609375, 72764.65625000, + // -47109.86328125 + VLOAD_32(v16, 0xc78fa064, 0x47606ea4, 0xc6ff6be9, 0xc75b8d18, 0x464386d6, + 0x46a2f5a5, 0x463ff3c6, 0x4770b95a, 0xc639d572, 0xc7355e4d, + 0x46ec4fb8, 0xc7b7cfd3, 0x47addc27, 0xc74b7abf, 0x478e1e54, + 0xc73805dd); + asm volatile("vfwadd.wv v24, v8, v16"); + // -129534.7637354041071376, 121955.8156918793683872, + // -62236.1293747680028901, -153440.2314169598394074, + // -63776.4478791506844573, -32860.9380084957738291, + // -59454.0074396003619768, 137712.8137619893532246, + // 76307.8067261396208778, -57106.9533925416035345, + // -56940.3199660585087258, -74256.3294054225261789, + // 106525.8939759960630909, -21905.3437088813807350, + // 126968.0232140090665780, -104994.2581431879953016 + VCMP_U64(10, v24, 0xc0ff9fec38429d77, 0x40fdc63d0d12ed98, 0xc0ee638423d68db0, + 0xc102bb01d9f12292, 0xc0ef240e5506a818, 0xc0e00b9e042a6497, + 0xc0ed07c03cf1f908, 0x4100cf868295a558, 0x40f2a13ce859ab28, + 0xc0ebe25e8231134e, 0xc0ebcd8a3d2975a4, 0xc0f22105453e9ece, + 0x40fa01de4db9c628, 0xc0d56455ff538938, 0x40feff805f15a71c, + 0xc0f9a224215ac062); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE6(void) { + VSET(16, e16, m2); + // -4.22890615, 94.35176849, -2.66183305, 81.53971863, + // -30.80995941, -61.45680237, 53.70308304, 26.43629074, + // -50.49792862, 12.57134342, -18.77090454, -0.50017655, + // -33.71574402, 14.64656544, 89.57390594, 94.32437897 + VLOAD_32(v4, 0xc0875333, 0x42bcb41b, 0xc02a5b79, 0x42a31456, 0xc1f67acc, + 0xc275d3c4, 0x4256cff5, 0x41d37d86, 0xc249fde1, 0x41492439, + 0xc1962ad0, 0xbf000b92, 0xc206dcec, 0x416a5855, 0x42b325d7, + 0x42bca615); + // -38.1875, + // -22.7500, 51.3438, 8.8594, 23.0938, 32.6875, 71.7500, 7.6758, + // -12.8047, 98.0625, -8.7734, -73.5625, + // -59.0312, 44.9688, 63.8438, 30.1406 + VLOAD_16(v8, 0xd0c6, 0xcdb0, 0x526b, 0x486e, 0x4dc6, 0x5016, 0x547c, 0x47ad, + 0xca67, 0x5621, 0xc863, 0xd499, 0xd361, 0x519f, 0x53fb, 0x4f89); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + VCLEAR(v14); + asm volatile("vfwadd.wv v12, v4, v8, v0.t"); + // 0.00000000, 71.60176849, 0.00000000, 90.39909363, + // 0.00000000, -28.76930237, 0.00000000, 34.11207199, + // 0.00000000, 110.63384247, 0.00000000, -74.06267548, + // 0.00000000, 59.61531448, 0.00000000, 124.46500397 + VCMP_U32(11, v12, 0x0, 0x428f341b, 0x0, 0x42b4cc56, 0x0, 0xc1e62788, 0x0, + 0x420872c3, 0x0, 0x42dd4487, 0x0, 0xc2942017, 0x0, 0x426e7615, 0x0, + 0x42f8ee15); + + VSET(16, e32, m4); + // -55997.9824854041071376, 64501.1750668793683872, + // -29542.1742966430028901, -97235.1376669598394074, + // -76290.1568635256844573, -53719.7602741207738291, + // -71738.9507989753619768, 76087.4621994893532246, + // 88201.1680542646208778, -10676.6526112916035345, + // -87188.1793410585087258, 19855.3190320774738211, + // 17509.5892884960630909, 30185.4023848686192650, + // 54203.3669640090665780, -57884.3948619379953016 + VLOAD_64(v8, 0xc0eb57bf70853aee, 0x40ef7ea59a25db30, 0xc0dcd98b27ad1b60, + 0xc0f7bd3233e24524, 0xc0f2a0228283540c, 0xc0ea3af8542a6497, + 0xc0f183af3678fc84, 0x40f29377652b4ab0, 0x40f58892b059ab28, + 0xc0c4da5388c44d38, 0xc0f54942de94bad2, 0x40d363d46b0584c8, + 0x40d11965b6e718a0, 0x40dd7a59c0ac76c8, 0x40ea776bbe2b4e38, + 0xc0ec438ca2b580c4); + // -73536.78125000, 57454.64062500, -32693.95507812, + // -56205.09375000, 12513.70898438, 20858.82226562, + // 12284.94335938, 61625.35156250, -11893.36132812, + // -46430.30078125, 30247.85937500, -94111.64843750, + // 89016.30468750, -52090.74609375, 72764.65625000, + // -47109.86328125 + VLOAD_32(v16, 0xc78fa064, 0x47606ea4, 0xc6ff6be9, 0xc75b8d18, 0x464386d6, + 0x46a2f5a5, 0x463ff3c6, 0x4770b95a, 0xc639d572, 0xc7355e4d, + 0x46ec4fb8, 0xc7b7cfd3, 0x47addc27, 0xc74b7abf, 0x478e1e54, + 0xc73805dd); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + VCLEAR(v28); + asm volatile("vfwadd.wv v24, v8, v16, v0.t"); + // 0.0000000000000000, 121955.8156918793683872, + // 0.0000000000000000, -153440.2314169598394074, + // 0.0000000000000000, -32860.9380084957738291, + // 0.0000000000000000, 137712.8137619893532246, + // 0.0000000000000000, -57106.9533925416035345, + // 0.0000000000000000, -74256.3294054225261789, + // 0.0000000000000000, -21905.3437088813807350, + // 0.0000000000000000, -104994.2581431879953016 + VCMP_U64(12, v24, 0x0, 0x40fdc63d0d12ed98, 0x0, 0xc102bb01d9f12292, 0x0, + 0xc0e00b9e042a6497, 0x0, 0x4100cf868295a558, 0x0, 0xc0ebe25e8231134e, + 0x0, 0xc0f22105453e9ece, 0x0, 0xc0d56455ff538938, 0x0, + 0xc0f9a224215ac062); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE7(void) { + VSET(16, e16, m2); + double dscalar_16; + // -16.64103889, 69.17821503, 38.24327850, + // -60.26666641, 97.95110321, -47.38455200, 94.12043762, + // -90.39623260, -17.02018356, 28.09982681, + // -85.91639709, 73.60102081, -98.61261749, + // -81.75864410, 40.07990265, 55.56723022 + VLOAD_32(v4, 0xc18520d9, 0x428a5b3f, 0x4218f91e, 0xc2711111, 0x42c3e6f7, + 0xc23d89c8, 0x42bc3daa, 0xc2b4cadf, 0xc1882956, 0x41e0cc72, + 0xc2abd532, 0x429333b9, 0xc2c539a9, 0xc2a3846d, 0x422051d2, + 0x425e44d8); + // 53.8750 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x52bc); + asm volatile("vfwadd.wf v8, v4, %[A]" ::[A] "f"(dscalar_16)); + // 37.23396301, 123.05321503, 92.11827850, -6.39166641, + // 151.82611084, 6.49044800, 147.99543762, + // -36.52123260, 36.85481644, 81.97482300, -32.04139709, + // 127.47602081, -44.73761749, -27.88364410, 93.95490265, + // 109.44223022 + VCMP_U32(13, v8, 0x4214ef94, 0x42f61b3f, 0x42b83c8f, 0xc0cc8888, 0x4317d37c, + 0x40cfb1c0, 0x4313fed5, 0xc21215be, 0x42136b55, 0x42a3f31c, + 0xc2002a64, 0x42fef3b9, 0xc232f352, 0xc1df11b4, 0x42bbe8e9, + 0x42dae26c); + + VSET(16, e32, m4); + double dscalar_32; + // 366783.2934919928666204, -648147.5638866436202079, + // 24949.3815817765425891, -211759.8585660880198702, + // 337740.3714956413023174, -528559.3617047512670979, + // -863948.2704646114725620, -463848.5980863422155380, + // 958859.3069495267700404, -853775.5625991101842374, + // -7020.6864214694360271, 839278.6509590207133442, + // -443325.1460256360005587, 97289.3425237806513906, + // 220009.0786798361223191, 491284.4355713783297688 + VLOAD_64(v8, 0x411662fd2c892a3c, 0xc123c7a720b5c00a, 0x40d85d586bd5f8c0, + 0xc109d97ede57e5ac, 0x41149d317c695a78, 0xc120215eb9315d7b, + 0xc12a5d988a7a566a, 0xc11c4fa26470bf00, 0x412d43169d287d06, + 0xc12a0e1f200cfd96, 0xc0bb6cafb9514280, 0x41299cdd4d4a8032, + 0xc11b0ef49587be8c, 0x40f7c0957afa3740, 0x410adb48a122e4d8, + 0x411dfc51be066c64); + // 572932.37500000 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x490be046); + asm volatile("vfwadd.wf v16, v8, %[A]" ::[A] "f"(dscalar_32)); + // 939715.6684919928666204, -75215.1888866436202079, + // 597881.7565817765425891, 361172.5164339119801298, + // 910672.7464956413023174, 44373.0132952487329021, + // -291015.8954646114725620, 109083.7769136577844620, + // 1531791.6819495267700404, -280843.1875991101842374, + // 565911.6885785305639729, 1412211.0259590207133442, + // 129607.2289743639994413, 670221.7175237806513906, + // 792941.4536798361223191, 1064216.8105713783297688 + VCMP_U64(14, v16, 0x412cad875644951e, 0xc0f25cf305ae0050, 0x41223ef3835eafc6, + 0x41160b5210d40d2a, 0x412bcaa17e34ad3c, 0x40e5aaa06cea2850, + 0xc111c31f94f4acd4, 0x40faa1bc6e3d0400, 0x41375f8fae943e83, + 0xc111242cc019fb2c, 0x4121452f608d5d7b, 0x41358c7306a54019, + 0x40ffa473a9e105d0, 0x4124741b6f5f46e8, 0x412832dae848b936, + 0x41303d18cf819b19); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE8(void) { + VSET(16, e16, m2); + double dscalar_16; + // -16.64103889, 69.17821503, 38.24327850, + // -60.26666641, 97.95110321, -47.38455200, 94.12043762, + // -90.39623260, -17.02018356, 28.09982681, + // -85.91639709, 73.60102081, -98.61261749, + // -81.75864410, 40.07990265, 55.56723022 + VLOAD_32(v4, 0xc18520d9, 0x428a5b3f, 0x4218f91e, 0xc2711111, 0x42c3e6f7, + 0xc23d89c8, 0x42bc3daa, 0xc2b4cadf, 0xc1882956, 0x41e0cc72, + 0xc2abd532, 0x429333b9, 0xc2c539a9, 0xc2a3846d, 0x422051d2, + 0x425e44d8); + // 53.8750 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x52bc); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfwadd.wf v8, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + // 0.00000000, 123.05321503, 0.00000000, -6.39166641, + // 0.00000000, 6.49044800, 0.00000000, -36.52123260, + // 0.00000000, 81.97482300, 0.00000000, 127.47602081, + // 0.00000000, -27.88364410, 0.00000000, 109.44223022 + VCMP_U32(15, v8, 0x0, 0x42f61b3f, 0x0, 0xc0cc8888, 0x0, 0x40cfb1c0, 0x0, + 0xc21215be, 0x0, 0x42a3f31c, 0x0, 0x42fef3b9, 0x0, 0xc1df11b4, 0x0, + 0x42dae26c); + + VSET(16, e32, m4); + double dscalar_32; + // 366783.2934919928666204, -648147.5638866436202079, + // 24949.3815817765425891, -211759.8585660880198702, + // 337740.3714956413023174, -528559.3617047512670979, + // -863948.2704646114725620, -463848.5980863422155380, + // 958859.3069495267700404, -853775.5625991101842374, + // -7020.6864214694360271, 839278.6509590207133442, + // -443325.1460256360005587, 97289.3425237806513906, + // 220009.0786798361223191, 491284.4355713783297688 + VLOAD_64(v8, 0x411662fd2c892a3c, 0xc123c7a720b5c00a, 0x40d85d586bd5f8c0, + 0xc109d97ede57e5ac, 0x41149d317c695a78, 0xc120215eb9315d7b, + 0xc12a5d988a7a566a, 0xc11c4fa26470bf00, 0x412d43169d287d06, + 0xc12a0e1f200cfd96, 0xc0bb6cafb9514280, 0x41299cdd4d4a8032, + 0xc11b0ef49587be8c, 0x40f7c0957afa3740, 0x410adb48a122e4d8, + 0x411dfc51be066c64); + // 572932.37500000 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x490be046); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfwadd.wf v16, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + // 0.0000000000000000, -75215.1888866436202079, + // 0.0000000000000000, 361172.5164339119801298, + // 0.0000000000000000, 44373.0132952487329021, + // 0.0000000000000000, 109083.7769136577844620, + // 0.0000000000000000, -280843.1875991101842374, + // 0.0000000000000000, 1412211.0259590207133442, + // 0.0000000000000000, 670221.7175237806513906, + // 0.0000000000000000, 1064216.8105713783297688 + VCMP_U64(16, v16, 0x0, 0xc0f25cf305ae0050, 0x0, 0x41160b5210d40d2a, 0x0, + 0x40e5aaa06cea2850, 0x0, 0x40faa1bc6e3d0400, 0x0, 0xc111242cc019fb2c, + 0x0, 0x41358c7306a54019, 0x0, 0x4124741b6f5f46e8, 0x0, + 0x41303d18cf819b19); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + TEST_CASE8(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwcvt.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwcvt.c new file mode 100644 index 000000000..195759577 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwcvt.c @@ -0,0 +1,670 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "float_macros.h" +#include "vector_macros.h" + +// We assume RNE rounding when not specified by the encoding + +///////////////// +// vfwcvt.xu.f // +///////////////// + +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 56.438, -30.938, -68.438, -32.969, 56.438, + // -5.816, 53.094, -29.875, -93.562, -90.750, -65.875, + // -91.062, 16.281, -77.938, -67.000, -51.844 + VLOAD_16(v4, 0x530e, 0xcfbc, 0xd447, 0xd01f, 0x530e, 0xc5d1, 0x52a3, 0xcf78, + 0xd5d9, 0xd5ac, 0xd41e, 0xd5b1, 0x4c12, 0xd4df, 0xd430, 0xd27b); + asm volatile("vfwcvt.xu.f.v v8, v4"); + // 56, 0, 0, 0, 56, 0, 53, + // 0, 0, 0, 0, 0, 16, 0, + // 0, 0 + VCMP_U32(1, v8, 0x00000038, 0x00000000, 0x00000000, 0x00000000, 0x00000038, + 0x00000000, 0x00000035, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000010, 0x00000000, 0x00000000, + 0x00000000); + + VSET(16, e32, m4); + // -54444.973, 43481.863, 88447.461, 32690.551, + // -37979.809, 68218.094, -43036.512, -38011.395, + // -36599.363, 48418.234, 81414.820, 16330.853, + // 75606.320, -85030.219, 13033.059, 7375.421 + VLOAD_32(v8, 0xc754acf9, 0x4729d9dd, 0x47acbfbb, 0x46ff651a, 0xc7145bcf, + 0x47853d0c, 0xc7281c83, 0xc7147b65, 0xc70ef75d, 0x473d223c, + 0x479f0369, 0x467f2b69, 0x4793ab29, 0xc7a6131c, 0x464ba43c, + 0x45e67b5f); + asm volatile("vfwcvt.xu.f.v v16, v8"); + // 0, 43482, 88447, 32691, + // 0, 68218, 0, 0, 0, + // 48418, 81415, 16331, + // 75606, 0, 13033, + // 7375 + VCMP_U64(2, v16, 0x0000000000000000, 0x000000000000a9da, 0x000000000001597f, + 0x0000000000007fb3, 0x0000000000000000, 0x0000000000010a7a, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x000000000000bd22, 0x0000000000013e07, 0x0000000000003fcb, + 0x0000000000012756, 0x0000000000000000, 0x00000000000032e9, + 0x0000000000001ccf); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // -36.375, 56.438, -68.438, -32.969, 56.438, + // -5.816, 53.094, -29.875, -93.562, -90.750, -65.875, + // -91.062, 16.281, -77.938, -67.000, -51.844 + VLOAD_16(v4, 0xd08c, 0x530e, 0xd447, 0xd01f, 0x530e, 0xc5d1, 0x52a3, 0xcf78, + 0xd5d9, 0xd5ac, 0xd41e, 0xd5b1, 0x4c12, 0xd4df, 0xd430, 0xd27b); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfwcvt.xu.f.v v8, v4, v0.t"); + // 0, 56, 0, 0, 0, 0, 0, 0, + // 0, 0, 0, 0, 0, 0, 0, 0 + VCMP_U32(3, v8, 0x00000000, 0x00000038, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000); + + VSET(16, e32, m4); + // -54444.973, 43481.863, 88447.461, 32690.551, -37979.809, + // 68218.094, -43036.512, -38011.395, -36599.363, 48418.234, + // 81414.820, 16330.853, 75606.320, -85030.219, 13033.059, + // 7375.421 + VLOAD_32(v8, 0xc754acf9, 0x4729d9dd, 0x47acbfbb, 0x46ff651a, 0xc7145bcf, + 0x47853d0c, 0xc7281c83, 0xc7147b65, 0xc70ef75d, 0x473d223c, + 0x479f0369, 0x467f2b69, 0x4793ab29, 0xc7a6131c, 0x464ba43c, + 0x45e67b5f); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfwcvt.xu.f.v v16, v8, v0.t"); + // 0, 43482, 0, 32691, 0, + // 68218, 0, 0, 0, + // 48418, 0, 16331, 0, + // 0, 0, 7375 + VCMP_U64(4, v16, 0x0000000000000000, 0x000000000000a9da, 0x0000000000000000, + 0x0000000000007fb3, 0x0000000000000000, 0x0000000000010a7a, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x000000000000bd22, 0x0000000000000000, 0x0000000000003fcb, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000001ccf); +}; + +//////////////// +// vfwcvt.x.f // +//////////////// + +// Simple random test with similar values +void TEST_CASE3(void) { + VSET(16, e16, m2); + // -55.656, -23.391, 53.094, -0.356, 26.859, -81.938, + // 63.625, -54.594, -36.375, 77.312, 73.188, -79.500, + // -22.047, -30.500, 33.375, -26.281 + VLOAD_16(v4, 0xd2f5, 0xcdd9, 0x52a3, 0xb5b2, 0x4eb7, 0xd51f, 0x53f4, 0xd2d3, + 0xd08c, 0x54d5, 0x5493, 0xd4f8, 0xcd83, 0xcfa0, 0x502c, 0xce92); + asm volatile("vfwcvt.x.f.v v8, v4"); + // -56, -23, 53, 0, 27, -82, + // 64, -55, -36, 77, + // 73, -80, -22, -30, + // 33, -26 + VCMP_U32(5, v8, 0xffffffc8, 0xffffffe9, 0x00000035, 0x00000000, 0x0000001b, + 0xffffffae, 0x00000040, 0xffffffc9, 0xffffffdc, 0x0000004d, + 0x00000049, 0xffffffb0, 0xffffffea, 0xffffffe2, 0x00000021, + 0xffffffe6); + + VSET(16, e32, m4); + // -22345.104, -55208.160, 60155.754, -4924.268, + // -42337.285, -60609.004, 51795.328, 33876.547, + // -99812.922, 59419.867, -78706.844, 72266.555, + // -70664.008, -83501.727, -15981.749, -2004.535 + VLOAD_32(v8, 0xc6ae9235, 0xc757a829, 0x476afbc1, 0xc599e225, 0xc7256149, + 0xc76cc101, 0x474a5354, 0x4704548c, 0xc7c2f276, 0x47681bde, + 0xc799b96c, 0x478d2547, 0xc78a0401, 0xc7a316dd, 0xc679b6ff, + 0xc4fa9120); + asm volatile("vfwcvt.x.f.v v16, v8"); + // -22345, -55208, 60156, -4924, + // -42337, -60609, 51795, 33877, + // -99813, 59420, -78707, 72267, + // -70664, -83502, -15982, -2005 + VCMP_U64(6, v16, 0xffffffffffffa8b7, 0xffffffffffff2858, 0x000000000000eafc, + 0xffffffffffffecc4, 0xffffffffffff5a9f, 0xffffffffffff133f, + 0x000000000000ca53, 0x0000000000008455, 0xfffffffffffe7a1b, + 0x000000000000e81c, 0xfffffffffffecc8d, 0x0000000000011a4b, + 0xfffffffffffeebf8, 0xfffffffffffeb9d2, 0xffffffffffffc192, + 0xfffffffffffff82b); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE4(void) { + VSET(16, e16, m2); + // -55.656, -23.391, 53.094, -0.356, 26.859, + // -81.938, 63.625, -54.594, -36.375, 77.312, 73.188, + // -79.500, -22.047, -30.500, 33.375, -26.281 + VLOAD_16(v4, 0xd2f5, 0xcdd9, 0x52a3, 0xb5b2, 0x4eb7, 0xd51f, 0x53f4, 0xd2d3, + 0xd08c, 0x54d5, 0x5493, 0xd4f8, 0xcd83, 0xcfa0, 0x502c, 0xce92); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfwcvt.x.f.v v8, v4, v0.t"); + // 0, -23, 0, 0, 0, -82, 0, + // -55, 0, 77, 0, -80, 0, + // -30, 0, -26 + VCMP_U32(7, v8, 0x00000000, 0xffffffe9, 0x00000000, 0x00000000, 0x00000000, + 0xffffffae, 0x00000000, 0xffffffc9, 0x00000000, 0x0000004d, + 0x00000000, 0xffffffb0, 0x00000000, 0xffffffe2, 0x00000000, + 0xffffffe6); + + VSET(16, e32, m4); + // -22345.104, -55208.160, 60155.754, -4924.268, + // -42337.285, -60609.004, 51795.328, 33876.547, + // -99812.922, 59419.867, -78706.844, 72266.555, + // -70664.008, -83501.727, -15981.749, -2004.535 + VLOAD_32(v8, 0xc6ae9235, 0xc757a829, 0x476afbc1, 0xc599e225, 0xc7256149, + 0xc76cc101, 0x474a5354, 0x4704548c, 0xc7c2f276, 0x47681bde, + 0xc799b96c, 0x478d2547, 0xc78a0401, 0xc7a316dd, 0xc679b6ff, + 0xc4fa9120); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfwcvt.x.f.v v16, v8, v0.t"); + // 0, -55208, 0, -4924, 0, + // -60609, 0, 33877, 0, + // 59420, 0, 72267, 0, + // -83502, 0, -2005 + VCMP_U64(8, v16, 0x0000000000000000, 0xffffffffffff2858, 0x0000000000000000, + 0xffffffffffffecc4, 0x0000000000000000, 0xffffffffffff133f, + 0x0000000000000000, 0x0000000000008455, 0x0000000000000000, + 0x000000000000e81c, 0x0000000000000000, 0x0000000000011a4b, + 0x0000000000000000, 0xfffffffffffeb9d2, 0x0000000000000000, + 0xfffffffffffff82b); +}; + +///////////////////// +// vfwcvt.rtz.xu.f // +///////////////////// + +// Simple random test with similar values +void TEST_CASE5(void) { + VSET(16, e16, m2); + // 26304.000, -31056.000, 6932.000, 63168.000, -10920.000, + // -38528.000, inf, -inf, -1313.000, 52736.000, inf, + // -inf, -61024.000, -inf, -5672.000, 53824.000 + VLOAD_16(v4, 0x766c, 0xf795, 0x6ec5, 0x7bb6, 0xf155, 0xf8b4, 0x7c00, 0xfc00, + 0xe521, 0x7a70, 0x7c00, 0xfc00, 0xfb73, 0xfc00, 0xed8a, 0x7a92); + asm volatile("vfwcvt.rtz.xu.f.v v8, v4"); + // 26304, 0, 6932, 63168, 0, 0, + // 0, 0, 0, 52736, 0, + // 0, 0, 0, 0, + // 53824 + VCMP_U32(9, v8, 0x000066c0, 0x00000000, 0x00001b14, 0x0000f6c0, 0x00000000, + 0x00000000, 0xffffffff, 0x00000000, 0x00000000, 0x0000ce00, + 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x0000d240); + + VSET(16, e32, m4); + // -139027333120.000, 783549530112.000, 166903955456.000, + // -848099999744.000, -671032279040.000, -402660294656.000, + // -259808854016.000, 248555126784.000, 712853684224.000, + // -492155797504.000, -448682098688.000, -916605566976.000, + // 67602378752.000, 519669350400.000, 569111478272.000, + // -920229773312.000 + VLOAD_32(v8, 0xd2017ab3, 0x53366f31, 0x521b7101, 0xd34576b3, 0xd31c3ca4, + 0xd2bb80d9, 0xd271f742, 0x52677c29, 0x5325f964, 0xd2e52d8b, + 0xd2d0ef13, 0xd35569f3, 0x517bd6a7, 0x52f1fd6a, 0x530481b0, + 0xd35641f8); + asm volatile("vfwcvt.rtz.xu.f.v v16, v8"); + // 0, 783549530112, 166903955456, + // 0, 0, 0, 0, + // 248555126784, 712853684224, 0, + // 0, 0, 67602378752, + // 519669350400, 569111478272, 0 + VCMP_U64(10, v16, 0x0000000000000000, 0x000000b66f310000, 0x00000026dc404000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x00000039df0a4000, 0x000000a5f9640000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000fbd6a7000, 0x00000078feb50000, 0x0000008481b00000, + 0x0000000000000000); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE6(void) { + VSET(16, e16, m2); + // 26304.000, -31056.000, 6932.000, 63168.000, -10920.000, + // -38528.000, inf, -inf, -1313.000, 52736.000, inf, + // -inf, -61024.000, -inf, -5672.000, 53824.000 + VLOAD_16(v4, 0x766c, 0xf795, 0x6ec5, 0x7bb6, 0xf155, 0xf8b4, 0x7c00, 0xfc00, + 0xe521, 0x7a70, 0x7c00, 0xfc00, 0xfb73, 0xfc00, 0xed8a, 0x7a92); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfwcvt.rtz.xu.f.v v8, v4, v0.t"); + // 0, 0, 0, 63168, 0, 0, + // 0, 0, 0, 52736, 0, 0, + // 0, 0, 0, 53824 + VCMP_U32(11, v8, 0x00000000, 0x00000000, 0x00000000, 0x0000f6c0, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0000ce00, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x0000d240); + + VSET(16, e32, m4); + // -139027333120.000, 783549530112.000, 166903955456.000, + // -848099999744.000, -671032279040.000, -402660294656.000, + // -259808854016.000, 248555126784.000, 712853684224.000, + // -492155797504.000, -448682098688.000, -916605566976.000, + // 67602378752.000, 519669350400.000, 569111478272.000, + // -920229773312.000 + VLOAD_32(v8, 0xd2017ab3, 0x53366f31, 0x521b7101, 0xd34576b3, 0xd31c3ca4, + 0xd2bb80d9, 0xd271f742, 0x52677c29, 0x5325f964, 0xd2e52d8b, + 0xd2d0ef13, 0xd35569f3, 0x517bd6a7, 0x52f1fd6a, 0x530481b0, + 0xd35641f8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfwcvt.rtz.xu.f.v v16, v8, v0.t"); + // 0, 783549530112, 0, 0, 0, 0, 0, + // 248555126784, 0, 0, + // 0, 0, 0, + // 519669350400, 0, 0 + VCMP_U64(12, v16, 0x0000000000000000, 0x000000b66f310000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x00000039df0a4000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x00000078feb50000, 0x0000000000000000, + 0x0000000000000000); +}; + +//////////////////// +// vfwcvt.rtz.x.f // +//////////////////// + +// Simple random test with similar values +void TEST_CASE7(void) { + VSET(16, e16, m2); + // 5.844, 36.219, -86.250, 20.406, -45.688, 13.961, + // -96.562, 81.000, -32.594, 51.281, 80.750, + // -17.750, 14.516, 58.000, 69.938, -94.688 + VLOAD_16(v4, 0x45d8, 0x5087, 0xd564, 0x4d1a, 0xd1b6, 0x4afb, 0xd609, 0x5510, + 0xd013, 0x5269, 0x550c, 0xcc70, 0x4b42, 0x5340, 0x545f, 0xd5eb); + asm volatile("vfwcvt.rtz.x.f.v v8, v4"); + // 5, 36, -86, 20, -45, 13, + // -96, 81, -32, 51, 80, -17, + // 14, 58, 69, -94 + VCMP_U32(13, v8, 0x00000005, 0x00000024, 0xffffffaa, 0x00000014, 0xffffffd3, + 0x0000000d, 0xffffffa0, 0x00000051, 0xffffffe0, 0x00000033, + 0x00000050, 0xffffffef, 0x0000000e, 0x0000003a, 0x00000045, + 0xffffffa2); + + VSET(16, e32, m4); + // 2116.345, -810274979840.000, -5833.340, -6088.383, + // -9260.508, -2389.850, 9361.639, 5574.592, -6825.026, + // 2473.934, -6756.971, -7155.075, 2251.162, -2899.548, + // -3184.759, -1954.714 + VLOAD_32(v8, 0x45044584, 0xd33ca827, 0xc5b64ab9, 0xc5be4311, 0xc610b208, + 0xc5155d98, 0x4612468e, 0x45ae34bd, 0xc5d54835, 0x451a9ef1, + 0xc5d327c5, 0xc5df9899, 0x450cb297, 0xc53538c6, 0xc5470c23, + 0xc4f456dc); + asm volatile("vfwcvt.rtz.x.f.v v16, v8"); + // 2116, -810274979840, -5833, -6088, + // -9260, -2389, 9361, 5574, + // -6825, 2473, -6756, -7155, + // 2251, -2899, -3184, -1954 + VCMP_U64(14, v16, 0x0000000000000844, 0xffffff4357d90000, 0xffffffffffffe937, + 0xffffffffffffe838, 0xffffffffffffdbd4, 0xfffffffffffff6ab, + 0x0000000000002491, 0x00000000000015c6, 0xffffffffffffe557, + 0x00000000000009a9, 0xffffffffffffe59c, 0xffffffffffffe40d, + 0x00000000000008cb, 0xfffffffffffff4ad, 0xfffffffffffff390, + 0xfffffffffffff85e); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE8(void) { + VSET(16, e16, m2); + // 5.844, 36.219, -86.250, 20.406, -45.688, 13.961, + // -96.562, 81.000, -32.594, 51.281, 80.750, + // -17.750, 14.516, 58.000, 69.938, -94.688 + VLOAD_16(v4, 0x45d8, 0x5087, 0xd564, 0x4d1a, 0xd1b6, 0x4afb, 0xd609, 0x5510, + 0xd013, 0x5269, 0x550c, 0xcc70, 0x4b42, 0x5340, 0x545f, 0xd5eb); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfwcvt.rtz.x.f.v v8, v4, v0.t"); + // 0, 36, 0, 20, 0, 13, 0, + // 81, 0, 51, 0, -17, 0, + // 58, 0, -94 + VCMP_U32(15, v8, 0x00000000, 0x00000024, 0x00000000, 0x00000014, 0x00000000, + 0x0000000d, 0x00000000, 0x00000051, 0x00000000, 0x00000033, + 0x00000000, 0xffffffef, 0x00000000, 0x0000003a, 0x00000000, + 0xffffffa2); + + VSET(16, e32, m4); + // 2116.345, -6652.860, -5833.340, -6088.383, -9260.508, + // -2389.850, 9361.639, 5574.592, -6825.026, 2473.934, + // -6756.971, -7155.075, 2251.162, -2899.548, -3184.759, + // -1954.714 + VLOAD_32(v8, 0x45044584, 0xc5cfe6e1, 0xc5b64ab9, 0xc5be4311, 0xc610b208, + 0xc5155d98, 0x4612468e, 0x45ae34bd, 0xc5d54835, 0x451a9ef1, + 0xc5d327c5, 0xc5df9899, 0x450cb297, 0xc53538c6, 0xc5470c23, + 0xc4f456dc); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfwcvt.rtz.x.f.v v16, v8, v0.t"); + // 0, -6652, 0, -6088, 0, + // -2389, 0, 5574, 0, + // 2473, 0, -7155, 0, + // -2899, 0, -1954 + VCMP_U64(16, v16, 0x0000000000000000, 0xffffffffffffe604, 0x0000000000000000, + 0xffffffffffffe838, 0x0000000000000000, 0xfffffffffffff6ab, + 0x0000000000000000, 0x00000000000015c6, 0x0000000000000000, + 0x00000000000009a9, 0x0000000000000000, 0xffffffffffffe40d, + 0x0000000000000000, 0xfffffffffffff4ad, 0x0000000000000000, + 0xfffffffffffff85e); +}; + +///////////////// +// vfwcvt.f.xu // +///////////////// + +// Simple random test with similar values +void TEST_CASE9(void) { + VSET(16, e16, m2); + // 64656, 64687, 64823, 970, 543, + // 65038, 65122, 966, 180, 389, 337, + // 341, 65240, 51, 64922, 64676 + VLOAD_16(v4, 0xfc90, 0xfcaf, 0xfd37, 0x03ca, 0x021f, 0xfe0e, 0xfe62, 0x03c6, + 0x00b4, 0x0185, 0x0151, 0x0155, 0xfed8, 0x0033, 0xfd9a, 0xfca4); + asm volatile("vfwcvt.f.xu.v v8, v4"); + // 64656.000, 64687.000, 64823.000, 970.000, + // 543.000, 65038.000, 65122.000, 966.000, + // 180.000, 389.000, 337.000, 341.000, + // 65240.000, 51.000, 64922.000, 64676.000 + VCMP_U32(17, v8, 0x477c9000, 0x477caf00, 0x477d3700, 0x44728000, 0x4407c000, + 0x477e0e00, 0x477e6200, 0x44718000, 0x43340000, 0x43c28000, + 0x43a88000, 0x43aa8000, 0x477ed800, 0x424c0000, 0x477d9a00, + 0x477ca400); + + VSET(16, e32, m4); + // 97144, 4294936082, 42555, + // 4294893205, 55337, 4294948570, + // 4294931792, 4294924170, 4294912208, + // 4294947132, 4294903099, 4294944521, + // 4294923920, 4294889958, 31133, 30359 + VLOAD_32(v8, 0x00017b78, 0xffff8612, 0x0000a63b, 0xfffede95, 0x0000d829, + 0xffffb6da, 0xffff7550, 0xffff578a, 0xffff28d0, 0xffffb13c, + 0xffff053b, 0xffffa709, 0xffff5690, 0xfffed1e6, 0x0000799d, + 0x00007697); + asm volatile("vfwcvt.f.xu.v v16, v8"); + // 97144.000, 4294936082.000, 42555.000, + // 4294893205.000, 55337.000, 4294948570.000, + // 4294931792.000, 4294924170.000, 4294912208.000, + // 4294947132.000, 4294903099.000, 4294944521.000, + // 4294923920.000, 4294889958.000, 31133.000, + // 30359.000 + VCMP_U64(18, v16, 0x40f7b78000000000, 0x41effff0c2400000, 0x40e4c76000000000, + 0x41efffdbd2a00000, 0x40eb052000000000, 0x41effff6db400000, + 0x41efffeeaa000000, 0x41efffeaf1400000, 0x41efffe51a000000, + 0x41effff627800000, 0x41efffe0a7600000, 0x41effff4e1200000, + 0x41efffead2000000, 0x41efffda3cc00000, 0x40de674000000000, + 0x40dda5c000000000); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE10(void) { + VSET(16, e16, m2); + // 64656, 64687, 64823, 970, 543, + // 65038, 65122, 966, 180, 389, 337, + // 341, 65240, 51, 64922, 64676 + VLOAD_16(v4, 0xfc90, 0xfcaf, 0xfd37, 0x03ca, 0x021f, 0xfe0e, 0xfe62, 0x03c6, + 0x00b4, 0x0185, 0x0151, 0x0155, 0xfed8, 0x0033, 0xfd9a, 0xfca4); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfwcvt.f.xu.v v8, v4, v0.t"); + // 0.000, 64687.000, 0.000, 970.000, 0.000, + // 65038.000, 0.000, 966.000, 0.000, 389.000, + // 0.000, 341.000, 0.000, 51.000, 0.000, + // 64676.000 + VCMP_U32(19, v8, 0x0, 0x477caf00, 0x0, 0x44728000, 0x0, 0x477e0e00, 0x0, + 0x44718000, 0x0, 0x43c28000, 0x0, 0x43aa8000, 0x0, 0x424c0000, 0x0, + 0x477ca400); + + VSET(16, e32, m4); + // 97144, 4294936082, 42555, + // 4294893205, 55337, 4294948570, + // 4294931792, 4294924170, 4294912208, + // 4294947132, 4294903099, 4294944521, + // 4294923920, 4294889958, 31133, 30359 + VLOAD_32(v8, 0x00017b78, 0xffff8612, 0x0000a63b, 0xfffede95, 0x0000d829, + 0xffffb6da, 0xffff7550, 0xffff578a, 0xffff28d0, 0xffffb13c, + 0xffff053b, 0xffffa709, 0xffff5690, 0xfffed1e6, 0x0000799d, + 0x00007697); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfwcvt.f.xu.v v16, v8, v0.t"); + // 0.000, 4294936082.000, 0.000, 4294893205.000, + // 0.000, 4294948570.000, 0.000, 4294924170.000, + // 0.000, 4294947132.000, 0.000, 4294944521.000, + // 0.000, 4294889958.000, 0.000, 30359.000 + VCMP_U64(20, v16, 0x0, 0x41effff0c2400000, 0x0, 0x41efffdbd2a00000, 0x0, + 0x41effff6db400000, 0x0, 0x41efffeaf1400000, 0x0, 0x41effff627800000, + 0x0, 0x41effff4e1200000, 0x0, 0x41efffda3cc00000, 0x0, + 0x40dda5c000000000); +}; + +//////////////// +// vfwcvt.f.x // +//////////////// + +// Simple random test with similar values +void TEST_CASE11(void) { + VSET(16, e16, m2); + // -263, -943, 111, -140, -792, + // -320, -384, 250, -308, 578, -830, + // -865, 908, 264, 93, 833 + VLOAD_16(v4, 0xfef9, 0xfc51, 0x006f, 0xff74, 0xfce8, 0xfec0, 0xfe80, 0x00fa, + 0xfecc, 0x0242, 0xfcc2, 0xfc9f, 0x038c, 0x0108, 0x005d, 0x0341); + asm volatile("vfwcvt.f.x.v v8, v4"); + // -263.000, -943.000, 111.000, -140.000, + // -792.000, -320.000, -384.000, 250.000, + // -308.000, 578.000, -830.000, -865.000, + // 908.000, 264.000, 93.000, 833.000 + VCMP_U32(21, v8, 0xc3838000, 0xc46bc000, 0x42de0000, 0xc30c0000, 0xc4460000, + 0xc3a00000, 0xc3c00000, 0x437a0000, 0xc39a0000, 0x44108000, + 0xc44f8000, 0xc4584000, 0x44630000, 0x43840000, 0x42ba0000, + 0x44504000); + + VSET(16, e32, m4); + // -85277, 33391, 84804, + // -45155, -68903, 19141, + // -10026, 87992, 13128, + // 95737, -70832, 43360, + // 32471, 51, 50027, + // -57346 + VLOAD_32(v8, 0xfffeb2e3, 0x0000826f, 0x00014b44, 0xffff4f9d, 0xfffef2d9, + 0x00004ac5, 0xffffd8d6, 0x000157b8, 0x00003348, 0x000175f9, + 0xfffeeb50, 0x0000a960, 0x00007ed7, 0x00000033, 0x0000c36b, + 0xffff1ffe); + asm volatile("vfwcvt.f.x.v v16, v8"); + // -85277.000, 33391.000, 84804.000, -45155.000, + // -68903.000, 19141.000, -10026.000, 87992.000, + // 13128.000, 95737.000, -70832.000, 43360.000, + // 32471.000, 51.000, 50027.000, -57346.000 + VCMP_U64(22, v16, 0xc0f4d1d000000000, 0x40e04de000000000, 0x40f4b44000000000, + 0xc0e60c6000000000, 0xc0f0d27000000000, 0x40d2b14000000000, + 0xc0c3950000000000, 0x40f57b8000000000, 0x40c9a40000000000, + 0x40f75f9000000000, 0xc0f14b0000000000, 0x40e52c0000000000, + 0x40dfb5c000000000, 0x4049800000000000, 0x40e86d6000000000, + 0xc0ec004000000000); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE12(void) { + VSET(16, e16, m2); + // -263, -943, 111, -140, -792, + // -320, -384, 250, -308, 578, -830, + // -865, 908, 264, 93, 833 + VLOAD_16(v4, 0xfef9, 0xfc51, 0x006f, 0xff74, 0xfce8, 0xfec0, 0xfe80, 0x00fa, + 0xfecc, 0x0242, 0xfcc2, 0xfc9f, 0x038c, 0x0108, 0x005d, 0x0341); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfwcvt.f.x.v v8, v4, v0.t"); + // 0.000, -943.000, 0.000, -140.000, 0.000, + // -320.000, 0.000, 250.000, 0.000, 578.000, + // 0.000, -865.000, 0.000, 264.000, 0.000, + // 833.000 + VCMP_U32(23, v8, 0x0, 0xc46bc000, 0x0, 0xc30c0000, 0x0, 0xc3a00000, 0x0, + 0x437a0000, 0x0, 0x44108000, 0x0, 0xc4584000, 0x0, 0x43840000, 0x0, + 0x44504000); + + VSET(16, e32, m4); + // -85277, 33391, 84804, + // -45155, -68903, 19141, + // -10026, 87992, 13128, + // 95737, -70832, 43360, + // 32471, 51, 50027, + // -57346 + VLOAD_32(v8, 0xfffeb2e3, 0x0000826f, 0x00014b44, 0xffff4f9d, 0xfffef2d9, + 0x00004ac5, 0xffffd8d6, 0x000157b8, 0x00003348, 0x000175f9, + 0xfffeeb50, 0x0000a960, 0x00007ed7, 0x00000033, 0x0000c36b, + 0xffff1ffe); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfwcvt.f.x.v v16, v8, v0.t"); + // 0.000, 33391.000, 0.000, -45155.000, 0.000, + // 19141.000, 0.000, 87992.000, 0.000, + // 95737.000, 0.000, 43360.000, + // 0.000, 51.000, 0.000, -57346.000 + VCMP_U64(24, v16, 0x0, 0x40e04de000000000, 0x0, 0xc0e60c6000000000, 0x0, + 0x40d2b14000000000, 0x0, 0x40f57b8000000000, 0x0, 0x40f75f9000000000, + 0x0, 0x40e52c0000000000, 0x0, 0x4049800000000000, 0x0, + 0xc0ec004000000000); +}; + +//////////////// +// vfwcvt.f.f // +//////////////// + +// Simple random test with similar values +void TEST_CASE13(void) { + VSET(16, e16, m2); + // 83.312, -83.188, 62.469, 94.812, 10.797, -13.070, + // -9.039, 54.250, -92.188, 63.688, -32.875, -81.688, + // -62.219, -78.250, -29.703, -1.137 + VLOAD_16(v4, 0x5535, 0xd533, 0x53cf, 0x55ed, 0x4966, 0xca89, 0xc885, 0x52c8, + 0xd5c3, 0x53f6, 0xd01c, 0xd51b, 0xd3c7, 0xd4e4, 0xcf6d, 0xbc8c); + asm volatile("vfwcvt.f.f.v v8, v4"); + // 83.312, -83.188, 62.469, 94.812, 10.797, -13.070, + // -9.039, 54.250, -92.188, 63.688, -32.875, -81.688, + // -62.219, -78.250, -29.703, -1.137 + VCMP_U32(25, v8, 0x42a6a000, 0xc2a66000, 0x4279e000, 0x42bda000, 0x412cc000, + 0xc1512000, 0xc110a000, 0x42590000, 0xc2b86000, 0x427ec000, + 0xc2038000, 0xc2a36000, 0xc278e000, 0xc29c8000, 0xc1eda000, + 0xbf918000); + + VSET(16, e32, m4); + // -69280.273, -24625.789, 58970.254, 57986.516, 34031.016, + // 61977.340, -84548.211, 89658.250, 4958.967, -73911.508, + // -83526.188, -59814.750, 71544.742, 93401.383, 79319.078, + // 4639.214 + VLOAD_32(v8, 0xc7875023, 0xc6c06394, 0x47665a41, 0x47628284, 0x4704ef04, + 0x47721957, 0xc7a5221b, 0x47af1d20, 0x459af7bc, 0xc7905bc1, + 0xc7a32318, 0xc769a6c0, 0x478bbc5f, 0x47b66cb1, 0x479aeb8a, + 0x4590f9b7); + asm volatile("vfwcvt.f.f.v v16, v8"); + // -69280.273, -24625.789, 58970.254, 57986.516, 34031.016, + // 61977.340, -84548.211, 89658.250, 4958.967, -73911.508, + // -83526.188, -59814.750, 71544.742, 93401.383, 79319.078, + // 4639.214 + VCMP_U64(26, v16, 0xc0f0ea0460000000, 0xc0d80c7280000000, 0x40eccb4820000000, + 0x40ec505080000000, 0x40e09de080000000, 0x40ee432ae0000000, + 0xc0f4a44360000000, 0x40f5e3a400000000, 0x40b35ef780000000, + 0xc0f20b7820000000, 0xc0f4646300000000, 0xc0ed34d800000000, + 0x40f1778be0000000, 0x40f6cd9620000000, 0x40f35d7140000000, + 0x40b21f36e0000000); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE14(void) { + VSET(16, e16, m2); + // 83.312, -83.188, 62.469, 94.812, 10.797, -13.070, + // -9.039, 54.250, -92.188, 63.688, -32.875, -81.688, + // -62.219, -78.250, -29.703, -1.137 + VLOAD_16(v4, 0x5535, 0xd533, 0x53cf, 0x55ed, 0x4966, 0xca89, 0xc885, 0x52c8, + 0xd5c3, 0x53f6, 0xd01c, 0xd51b, 0xd3c7, 0xd4e4, 0xcf6d, 0xbc8c); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfwcvt.f.f.v v8, v4, v0.t"); + // 0.000, -83.188, 0.000, 94.812, 0.000, -13.070, 0.000, + // 54.250, 0.000, 63.688, 0.000, -81.688, 0.000, + // -78.250, 0.000, -1.137 + VCMP_U32(27, v8, 0x0, 0xc2a66000, 0x0, 0x42bda000, 0x0, 0xc1512000, 0x0, + 0x42590000, 0x0, 0x427ec000, 0x0, 0xc2a36000, 0x0, 0xc29c8000, 0x0, + 0xbf918000); + + VSET(16, e32, m4); + // -69280.273, -24625.789, 58970.254, 57986.516, 34031.016, + // 61977.340, -84548.211, 89658.250, 4958.967, -73911.508, + // -83526.188, -59814.750, 71544.742, 93401.383, 79319.078, + // 4639.214 + VLOAD_32(v8, 0xc7875023, 0xc6c06394, 0x47665a41, 0x47628284, 0x4704ef04, + 0x47721957, 0xc7a5221b, 0x47af1d20, 0x459af7bc, 0xc7905bc1, + 0xc7a32318, 0xc769a6c0, 0x478bbc5f, 0x47b66cb1, 0x479aeb8a, + 0x4590f9b7); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfwcvt.f.f.v v16, v8, v0.t"); + // 0.000, -24625.789, 0.000, 57986.516, 0.000, 61977.340, + // 0.000, 89658.250, 0.000, -73911.508, 0.000, + // -59814.750, 0.000, 93401.383, 0.000, 4639.214 + VCMP_U64(28, v16, 0x0, 0xc0d80c7280000000, 0x0, 0x40ec505080000000, 0x0, + 0x40ee432ae0000000, 0x0, 0x40f5e3a400000000, 0x0, 0xc0f20b7820000000, + 0x0, 0xc0ed34d800000000, 0x0, 0x40f6cd9620000000, 0x0, + 0x40b21f36e0000000); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + + TEST_CASE3(); + TEST_CASE4(); + + TEST_CASE5(); + TEST_CASE6(); + + TEST_CASE7(); + TEST_CASE8(); + + TEST_CASE9(); + TEST_CASE10(); + + TEST_CASE11(); + TEST_CASE12(); + + TEST_CASE13(); + TEST_CASE14(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwmacc.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwmacc.c new file mode 100644 index 000000000..4f1e354f6 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwmacc.c @@ -0,0 +1,351 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot +// Matteo Perotti + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 52.7812, 74.1875, 0.3564, 97.5000, 7.8477, 53.2188, + // -26.4688, -48.1250, -32.9688, 7.8750, + // -38.4375, 51.5625, 46.2188, -75.1875, 23.5625, -77.8125 + VLOAD_16(v4, 0x5299, 0x54a3, 0x35b4, 0x5618, 0x47d9, 0x52a7, 0xce9e, 0xd204, + 0xd01f, 0x47e0, 0xd0ce, 0x5272, 0x51c7, 0xd4b3, 0x4de4, 0xd4dd); + // 49.6875, 28.5312, 13.4766, -90.5625, 58.0000, + // -63.8125, 49.0625, 0.3325, 30.8906, 11.2266, -93.9375, + // -54.6875, 61.7500, 38.3438, 95.8125, 10.0938 + VLOAD_16(v12, 0x5236, 0x4f22, 0x4abd, 0xd5a9, 0x5340, 0xd3fa, 0x5222, 0x3552, + 0x4fb9, 0x499d, 0xd5df, 0xd2d6, 0x53b8, 0x50cb, 0x55fd, 0x490c); + // -83.87223053, -48.34465408, 70.48658752, -1.26614821, + // -24.13150024, -65.13838196, + // 0.84671319, 34.34510040, 72.80049896, + // -86.23424530, 25.52654839, -68.44364929, 9.81109142, + // -85.20966339, -81.00300598, 16.25512505 + VLOAD_32(v8, 0xc2a7be95, 0xc24160ed, 0x428cf922, 0xbfa21125, 0xc1c10d50, + 0xc28246da, 0x3f58c232, 0x42096162, 0x429199db, 0xc2ac77ef, + 0x41cc365f, 0xc288e326, 0x411cfa3b, 0xc2aa6b59, 0xc2a2018a, + 0x41820a7f); + asm volatile("vfwmacc.vv v8, v4, v12"); + // 2538.69604492, 2068.31738281, 75.29024506, -8831.11035156, + // 431.03256226, -3461.15991211, -1297.77636719, 18.34259796, + // -945.62481689, 2.17493439, 3636.24926758, -2888.26782227, + // 2863.81884766, -2968.18041992, 2176.57910156, -769.16479492 + VCMP_U32(1, v8, 0x451eab23, 0x45014514, 0x4296949b, 0xc609fc71, 0x43d7842b, + 0xc558528f, 0xc4a238d8, 0x4192bda4, 0xc46c67fd, 0x400b3220, + 0x456343fd, 0xc5348449, 0x4532fd1a, 0xc53982e3, 0x45080944, + 0xc4404a8c); + + VSET(16, e32, m4); + // -3306.98510742, -33314.88281250, 64578.31250000, + // 11648.08203125, -92704.16406250, 33998.11328125, + // 23406.90429688, 44169.36718750, -1206.53601074, + // 4568.00048828, -89687.13281250, 47865.25781250, + // -72205.21875000, 40772.06640625, 95904.72656250, + // 96043.19531250 + VLOAD_32(v8, 0xc54eafc3, 0xc70222e2, 0x477c4250, 0x46360054, 0xc7b51015, + 0x4704ce1d, 0x46b6ddcf, 0x472c895e, 0xc496d127, 0x458ec001, + 0xc7af2b91, 0x473af942, 0xc78d069c, 0x471f4411, 0x47bb505d, + 0x47bb9599); + // -52385.05468750, -31301.09960938, 1862.59667969, + // 86344.56250000, 9560.06835938, -93766.92187500, + // -68756.87500000, 42627.23046875, -89604.89062500, + // -47420.98437500, -40235.07421875, 44342.39453125, + // 90261.61718750, 76035.55468750, -92912.59375000, + // 40474.20703125 + VLOAD_32(v24, 0xc74ca10e, 0xc6f48a33, 0x44e8d318, 0x47a8a448, 0x46156046, + 0xc7b72376, 0xc7864a70, 0x4726833b, 0xc7af0272, 0xc7393cfc, + 0xc71d2b13, 0x472d3665, 0x47b04acf, 0x479481c7, 0xc7b5784c, + 0x471e1a35); + // -10044.0368110413110116, 13040.9349537673260784, + // 88916.1136409099854063, 79168.4367756713472772, + // 21611.0950133731239475, -26455.6752808090968756, + // 5979.6755084589240141, -99733.4556307629245566, + // 85141.1192070578690618, -87838.0155233480472816, + // 53604.5772563865466509, -30101.3490022116457112, + // 80638.7360704737366177, -75019.8948306038219016, + // 63887.5576457676361315, 1225.3713199536578031 + VLOAD_64(v16, 0xc0c39e04b6396548, 0x40c97877ac90a6f8, 0x40f5b541d179217e, + 0x40f35406fd087c82, 0x40d51ac614b2f890, 0xc0d9d5eb37ccffac, + 0x40b75bacee1f5340, 0xc0f859574a437b9d, 0x40f4c951e845a8f0, + 0xc0f571e03f956903, 0x40ea2c9278e262b4, 0xc0dd6556560d5f50, + 0x40f3afebc6f1d544, 0xc0f250be5139e52e, 0x40ef31f1d83befd8, + 0x4093257c3b4c4540); + asm volatile("vfwmacc.vv v16, v8, v24"); + // 173226551.6662319302558899, 1042805506.3236714601516724, + // 120372266.5559626817703247, 1005827715.3891682624816895, + // -886236534.5412018299102783, -3187924887.6156492233276367, + // -1609379613.2016887664794922, 1882718061.3047757148742676, + // 108196668.3968845903873444, -216706917.7953008711338043, + // 3608622049.7550821304321289, 2122430044.9128692150115967, + // -6517279175.0161266326904297, 3100051665.0599727630615234, + // -8910693010.2487506866455078, 3887273396.3922243118286133 + VCMP_U64(2, v16, 0x41a4a6746f551c5a, 0x41cf13f981296e11, 0x419cb2f0aa394e48, + 0x41cdf9db41b1d044, 0xc1ca6972bb45461a, 0xc1e7c07bf2f3b366, + 0xc1d7fb4bc74ce878, 0x41dc0dffdb538172, 0x4199cbccf19668ea, + 0xc1a9d55ecb9731ad, 0x41eae2e67c3829a2, 0x41dfa06d973a6c73, + 0xc1f8475c9c70420e, 0x41e718e11a21eb4c, 0xc20098f31491fd71, + 0x41ecf662b68c8d1a); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // 52.7812, 74.1875, 0.3564, 97.5000, 7.8477, 53.2188, + // -26.4688, -48.1250, -32.9688, 7.8750, + // -38.4375, 51.5625, 46.2188, -75.1875, 23.5625, -77.8125 + VLOAD_16(v4, 0x5299, 0x54a3, 0x35b4, 0x5618, 0x47d9, 0x52a7, 0xce9e, 0xd204, + 0xd01f, 0x47e0, 0xd0ce, 0x5272, 0x51c7, 0xd4b3, 0x4de4, 0xd4dd); + // 49.6875, 28.5312, 13.4766, -90.5625, 58.0000, + // -63.8125, 49.0625, 0.3325, 30.8906, 11.2266, -93.9375, + // -54.6875, 61.7500, 38.3438, 95.8125, 10.0938 + VLOAD_16(v12, 0x5236, 0x4f22, 0x4abd, 0xd5a9, 0x5340, 0xd3fa, 0x5222, 0x3552, + 0x4fb9, 0x499d, 0xd5df, 0xd2d6, 0x53b8, 0x50cb, 0x55fd, 0x490c); + VLOAD_8(v0, 0xAA, 0xAA); + // -83.87223053, -48.34465408, 70.48658752, -1.26614821, + // -24.13150024, -65.13838196, + // 0.84671319, 34.34510040, 72.80049896, + // -86.23424530, 25.52654839, -68.44364929, 9.81109142, + // -85.20966339, -81.00300598, 16.25512505 + VLOAD_32(v8, 0xc2a7be95, 0xc24160ed, 0x428cf922, 0xbfa21125, 0xc1c10d50, + 0xc28246da, 0x3f58c232, 0x42096162, 0x429199db, 0xc2ac77ef, + 0x41cc365f, 0xc288e326, 0x411cfa3b, 0xc2aa6b59, 0xc2a2018a, + 0x41820a7f); + asm volatile("vfwmacc.vv v8, v4, v12, v0.t"); + // -83.87223053, 2068.31738281, 70.48658752, -8831.11035156, + // -24.13150024, -3461.15991211, + // 0.84671319, 18.34259796, 72.80049896, 2.17493439, 25.52654839, + // -2888.26782227, 9.81109142, -2968.18041992, -81.00300598, + // -769.16479492 + VCMP_U32(3, v8, 0xc2a7be95, 0x45014514, 0x428cf922, 0xc609fc71, 0xc1c10d50, + 0xc558528f, 0x3f58c232, 0x4192bda4, 0x429199db, 0x400b3220, + 0x41cc365f, 0xc5348449, 0x411cfa3b, 0xc53982e3, 0xc2a2018a, + 0xc4404a8c); + + VSET(16, e32, m4); + // -3306.98510742, -33314.88281250, 64578.31250000, + // 11648.08203125, -92704.16406250, 33998.11328125, + // 23406.90429688, 44169.36718750, -1206.53601074, + // 4568.00048828, -89687.13281250, 47865.25781250, + // -72205.21875000, 40772.06640625, 95904.72656250, + // 96043.19531250 + VLOAD_32(v8, 0xc54eafc3, 0xc70222e2, 0x477c4250, 0x46360054, 0xc7b51015, + 0x4704ce1d, 0x46b6ddcf, 0x472c895e, 0xc496d127, 0x458ec001, + 0xc7af2b91, 0x473af942, 0xc78d069c, 0x471f4411, 0x47bb505d, + 0x47bb9599); + // -52385.05468750, -31301.09960938, 1862.59667969, + // 86344.56250000, 9560.06835938, -93766.92187500, + // -68756.87500000, 42627.23046875, -89604.89062500, + // -47420.98437500, -40235.07421875, 44342.39453125, + // 90261.61718750, 76035.55468750, -92912.59375000, + // 40474.20703125 + VLOAD_32(v24, 0xc74ca10e, 0xc6f48a33, 0x44e8d318, 0x47a8a448, 0x46156046, + 0xc7b72376, 0xc7864a70, 0x4726833b, 0xc7af0272, 0xc7393cfc, + 0xc71d2b13, 0x472d3665, 0x47b04acf, 0x479481c7, 0xc7b5784c, + 0x471e1a35); + VLOAD_8(v0, 0xAA, 0xAA); + // -10044.0368110413110116, 13040.9349537673260784, + // 88916.1136409099854063, 79168.4367756713472772, + // 21611.0950133731239475, -26455.6752808090968756, + // 5979.6755084589240141, -99733.4556307629245566, + // 85141.1192070578690618, -87838.0155233480472816, + // 53604.5772563865466509, -30101.3490022116457112, + // 80638.7360704737366177, -75019.8948306038219016, + // 63887.5576457676361315, 1225.3713199536578031 + VLOAD_64(v16, 0xc0c39e04b6396548, 0x40c97877ac90a6f8, 0x40f5b541d179217e, + 0x40f35406fd087c82, 0x40d51ac614b2f890, 0xc0d9d5eb37ccffac, + 0x40b75bacee1f5340, 0xc0f859574a437b9d, 0x40f4c951e845a8f0, + 0xc0f571e03f956903, 0x40ea2c9278e262b4, 0xc0dd6556560d5f50, + 0x40f3afebc6f1d544, 0xc0f250be5139e52e, 0x40ef31f1d83befd8, + 0x4093257c3b4c4540); + asm volatile("vfwmacc.vv v16, v8, v24, v0.t"); + // -10044.0368110413110116, 1042805506.3236714601516724, + // 88916.1136409099854063, 1005827715.3891682624816895, + // 21611.0950133731239475, -3187924887.6156492233276367, + // 5979.6755084589240141, 1882718061.3047757148742676, + // 85141.1192070578690618, -216706917.7953008711338043, + // 53604.5772563865466509, 2122430044.9128692150115967, + // 80638.7360704737366177, 3100051665.0599727630615234, + // 63887.5576457676361315, 3887273396.3922243118286133 + VCMP_U64(4, v16, 0xc0c39e04b6396548, 0x41cf13f981296e11, 0x40f5b541d179217e, + 0x41cdf9db41b1d044, 0x40d51ac614b2f890, 0xc1e7c07bf2f3b366, + 0x40b75bacee1f5340, 0x41dc0dffdb538172, 0x40f4c951e845a8f0, + 0xc1a9d55ecb9731ad, 0x40ea2c9278e262b4, 0x41dfa06d973a6c73, + 0x40f3afebc6f1d544, 0x41e718e11a21eb4c, 0x40ef31f1d83befd8, + 0x41ecf662b68c8d1a); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // -15.3750, 11.9375, + // -31.7656, 27.0625, 3.0684, 71.2500, 63.2500, -95.6875, + // -62.0625, -27.7344, 55.5312, -62.1875, -42.1875, + // -95.3125, 27.1406, -16.9219 + VLOAD_16(v4, 0xcbb0, 0x49f8, 0xcff1, 0x4ec4, 0x4223, 0x5474, 0x53e8, 0xd5fb, + 0xd3c2, 0xceef, 0x52f1, 0xd3c6, 0xd146, 0xd5f5, 0x4ec9, 0xcc3b); + // 32.2812 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x5009); + // -70.22966003, 68.36327362, -69.75650787, -92.51078796, + // -53.56798553, + // -92.09814453, 92.33961487, 42.48206329, 99.15431976, + // -5.94871950, -55.92549133, 59.99367523, -45.05080032, + // -68.93397522, 55.13935089, -80.23659515 + VLOAD_32(v8, 0xc28c7596, 0x4288b9ff, 0xc28b8355, 0xc2b90586, 0xc256459e, + 0xc2b83240, 0x42b8ade2, 0x4229eda2, 0x42c64f03, 0xc0be5be9, + 0xc25fb3b4, 0x426ff986, 0xc2343405, 0xc289de32, 0x425c8eb2, + 0xc2a07923); + asm volatile("vfwmacc.vf v8, %[A], v4" ::[A] "f"(dscalar_16)); + // -566.55389404, 453.72070312, -1095.19055176, + // 781.10052490, 45.48249054, 2207.94091797, 2134.12866211, + // -3046.42993164, -1904.30078125, -901.24902344, 1736.69262695, + // -1947.49658203, -1406.91601562, -3145.74072266, 931.27264404, + // -626.49584961 + VCMP_U32(5, v8, 0xc40da373, 0x43e2dc40, 0xc488e619, 0x4443466f, 0x4235ee12, + 0x4509ff0e, 0x4505620f, 0xc53e66e1, 0xc4ee09a0, 0xc4614ff0, + 0x44d9162a, 0xc4f36fe4, 0xc4afdd50, 0xc5449bda, 0x4468d173, + 0xc41c9fbc); + + VSET(16, e32, m4); + double dscalar_32; + // -260866.17187500, -221967.43750000, -907157.25000000, + // 754760.87500000, -585546.12500000, 260611.84375000, + // -768453.25000000, -117569.82812500, -469705.78125000, + // 775094.50000000, 533114.81250000, -798136.87500000, + // 66693.82812500, 246179.67187500, 728220.87500000, + // -749270.75000000 + VLOAD_32(v8, 0xc87ec08b, 0xc858c3dc, 0xc95d7954, 0x4938448e, 0xc90ef4a2, + 0x487e80f6, 0xc93b9c54, 0xc7e5a0ea, 0xc8e55939, 0x493d3b68, + 0x490227ad, 0xc942db8e, 0x478242ea, 0x487068eb, 0x4931c9ce, + 0xc936ed6c); + // -164832.20312500 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xc820f80d); + // -730249.9193813583115116, -885955.2111547881504521, + // -739704.0702666083816439, 991252.9466537751723081, + // -543412.1947198503185064, 859135.3883249030914158, + // 862259.4763332824222744, 331294.3916525302920491, + // 936699.0835190876387060, -813722.4244660194963217, + // -602138.5575914122164249, 253718.7884360067546368, + // 7255.4825419568223879, 957493.0229552322998643, + // -446793.8022573012858629, -757660.7323241395642981 + VLOAD_64(v16, 0xc1264913d6b92745, 0xc12b09866c1c7afb, 0xc12692f023f9fc22, + 0x412e4029e4afcdba, 0xc120956863b251fa, 0x412a37fec6d2858e, + 0x412a5066f3e1f4bc, 0x41143879910d5c64, 0x412c95f62ac3038c, + 0xc128d534d9539c30, 0xc12260351d7c9f20, 0x410ef8b64eb78980, + 0x40bc577b87dea380, 0x412d386a0bc0c9c8, 0xc11b45273582f020, + 0xc1271f3976f3308b); + asm volatile("vfwmacc.vf v16, %[A], v8" ::[A] "f"(dscalar_32)); + // 42998415581.0217819213867188, 36586495789.9245910644531250, + // 149527988394.2461547851562500, + // -124407906605.8560791015625000, 96516314402.8619232177734375, + // -42956365230.3924331665039062, 126666704455.5427398681640625, + // 19379625085.2629890441894531, 77423575443.0703430175781250, + // -127761347787.4947814941406250, + // -87875091201.5038757324218750, 131558913220.3411712646484375, + // -10993283369.2012958526611328, -40577380186.7228927612304688, + // -120034697981.6674957275390625, 123503190798.8887634277343750 + VCMP_U64(6, v16, 0x422405cf81ba0b27, 0x422109733e5bd964, 0x4241684804551f82, + 0xc23cf74a012ddb28, 0x423678d21522dca7, 0xc22400cc3b5cc8ed, + 0x423d7dec86478af1, 0x42120c7671f50d4d, 0x423206ce01931202, + 0xc23dbf2b74cb7eaa, 0xc23475c37b0180fe, 0x423ea185b4c45757, + 0xc2047a0189499c41, 0xc222e5335eb5721f, 0xc23bf2a022fdaae1, + 0x423cc15d230ee386); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // -15.3750, 11.9375, + // -31.7656, 27.0625, 3.0684, 71.2500, 63.2500, -95.6875, + // -62.0625, -27.7344, 55.5312, -62.1875, -42.1875, + // -95.3125, 27.1406, -16.9219 + VLOAD_16(v4, 0xcbb0, 0x49f8, 0xcff1, 0x4ec4, 0x4223, 0x5474, 0x53e8, 0xd5fb, + 0xd3c2, 0xceef, 0x52f1, 0xd3c6, 0xd146, 0xd5f5, 0x4ec9, 0xcc3b); + // 32.2812 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x5009); + VLOAD_8(v0, 0xAA, 0xAA); + // -70.22966003, 68.36327362, -69.75650787, -92.51078796, + // -53.56798553, + // -92.09814453, 92.33961487, 42.48206329, 99.15431976, + // -5.94871950, -55.92549133, 59.99367523, -45.05080032, + // -68.93397522, 55.13935089, -80.23659515 + VLOAD_32(v8, 0xc28c7596, 0x4288b9ff, 0xc28b8355, 0xc2b90586, 0xc256459e, + 0xc2b83240, 0x42b8ade2, 0x4229eda2, 0x42c64f03, 0xc0be5be9, + 0xc25fb3b4, 0x426ff986, 0xc2343405, 0xc289de32, 0x425c8eb2, + 0xc2a07923); + asm volatile("vfwmacc.vf v8, %[A], v4, v0.t" ::[A] "f"(dscalar_16)); + // -70.22966003, 453.72070312, -69.75650787, 781.10052490, + // -53.56798553, 2207.94091797, 92.33961487, + // -3046.42993164, 99.15431976, -901.24902344, -55.92549133, + // -1947.49658203, -45.05080032, -3145.74072266, 55.13935089, + // -626.49584961 + VCMP_U32(7, v8, 0xc28c7596, 0x43e2dc40, 0xc28b8355, 0x4443466f, 0xc256459e, + 0x4509ff0e, 0x42b8ade2, 0xc53e66e1, 0x42c64f03, 0xc4614ff0, + 0xc25fb3b4, 0xc4f36fe4, 0xc2343405, 0xc5449bda, 0x425c8eb2, + 0xc41c9fbc); + + VSET(16, e32, m4); + double dscalar_32; + // -260866.17187500, -221967.43750000, -907157.25000000, + // 754760.87500000, -585546.12500000, 260611.84375000, + // -768453.25000000, -117569.82812500, -469705.78125000, + // 775094.50000000, 533114.81250000, -798136.87500000, + // 66693.82812500, 246179.67187500, 728220.87500000, + // -749270.75000000 + VLOAD_32(v8, 0xc87ec08b, 0xc858c3dc, 0xc95d7954, 0x4938448e, 0xc90ef4a2, + 0x487e80f6, 0xc93b9c54, 0xc7e5a0ea, 0xc8e55939, 0x493d3b68, + 0x490227ad, 0xc942db8e, 0x478242ea, 0x487068eb, 0x4931c9ce, + 0xc936ed6c); + // -164832.20312500 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xc820f80d); + VLOAD_8(v0, 0xAA, 0xAA); + // -730249.9193813583115116, -885955.2111547881504521, + // -739704.0702666083816439, 991252.9466537751723081, + // -543412.1947198503185064, 859135.3883249030914158, + // 862259.4763332824222744, 331294.3916525302920491, + // 936699.0835190876387060, -813722.4244660194963217, + // -602138.5575914122164249, 253718.7884360067546368, + // 7255.4825419568223879, 957493.0229552322998643, + // -446793.8022573012858629, -757660.7323241395642981 + VLOAD_64(v16, 0xc1264913d6b92745, 0xc12b09866c1c7afb, 0xc12692f023f9fc22, + 0x412e4029e4afcdba, 0xc120956863b251fa, 0x412a37fec6d2858e, + 0x412a5066f3e1f4bc, 0x41143879910d5c64, 0x412c95f62ac3038c, + 0xc128d534d9539c30, 0xc12260351d7c9f20, 0x410ef8b64eb78980, + 0x40bc577b87dea380, 0x412d386a0bc0c9c8, 0xc11b45273582f020, + 0xc1271f3976f3308b); + asm volatile("vfwmacc.vf v16, %[A], v8, v0.t" ::[A] "f"(dscalar_32)); + // -730249.9193813583115116, 36586495789.9245910644531250, + // -739704.0702666083816439, -124407906605.8560791015625000, + // -543412.1947198503185064, -42956365230.3924331665039062, + // 862259.4763332824222744, 19379625085.2629890441894531, + // 936699.0835190876387060, -127761347787.4947814941406250, + // -602138.5575914122164249, 131558913220.3411712646484375, + // 7255.4825419568223879, -40577380186.7228927612304688, + // -446793.8022573012858629, 123503190798.8887634277343750 + VCMP_U64(8, v16, 0xc1264913d6b92745, 0x422109733e5bd964, 0xc12692f023f9fc22, + 0xc23cf74a012ddb28, 0xc120956863b251fa, 0xc22400cc3b5cc8ed, + 0x412a5066f3e1f4bc, 0x42120c7671f50d4d, 0x412c95f62ac3038c, + 0xc23dbf2b74cb7eaa, 0xc12260351d7c9f20, 0x423ea185b4c45757, + 0x40bc577b87dea380, 0xc222e5335eb5721f, 0xc11b45273582f020, + 0x423cc15d230ee386); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwmsac.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwmsac.c new file mode 100644 index 000000000..15c34d054 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwmsac.c @@ -0,0 +1,353 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot +// Matteo Perotti + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values +void TEST_CASE1(void) { + VSET(16, e16, m2); + // -93.0000, -55.1250, -68.5625, 76.3125, -61.2188, 48.9375, + // -56.3125, 71.0000, -74.5625, -38.7188, + // -50.3438, 93.3750, 80.2500, -7.4141, 93.8125, 83.1875 + VLOAD_16(v4, 0xd5d0, 0xd2e4, 0xd449, 0x54c5, 0xd3a7, 0x521e, 0xd30a, 0x5470, + 0xd4a9, 0xd0d7, 0xd24b, 0x55d6, 0x5504, 0xc76a, 0x55dd, 0x5533); + // -60.0312, -31.7188, -74.2500, -0.9077, 30.4844, -56.2500, + // -4.8320, 34.2812, 66.6875, 37.9375, 78.1875, 5.6094, + // -81.8125, 67.6250, 29.4531, -64.9375 + VLOAD_16(v12, 0xd381, 0xcfee, 0xd4a4, 0xbb43, 0x4f9f, 0xd308, 0xc4d5, 0x5049, + 0x542b, 0x50be, 0x54e3, 0x459c, 0xd51d, 0x543a, 0x4f5d, 0xd40f); + // 31.29529381, -66.12346649, + // -48.59321213, 21.66906929, 92.08473206, 1.95985305, + // -96.55027771, 77.65225220, -82.48660278, + // -35.32508850, 42.91923141, + // -76.65069580, 25.13817024, 72.89311981, 21.44047737, 69.71634674 + VLOAD_32(v8, 0x41fa5cc3, 0xc2843f37, 0xc2425f73, 0x41ad5a41, 0x42b82b62, + 0x3ffadc77, 0xc2c119be, 0x429b4df4, 0xc2a4f924, 0xc20d4ce4, + 0x422bad4b, 0xc2994d28, 0x41c91af9, 0x4291c947, 0x41ab8619, + 0x428b6ec5); + asm volatile("vfwmsac.vv v8, v4, v12"); + // 5551.61083984, 1814.61950684, 5139.35888672, -90.93905640, + // -1958.30004883, -2754.69433594, 368.65405273, 2356.31640625, + // -4889.89990234, -1433.56750488, -3979.17114258, 600.42608643, + // -6590.59130859, -574.26910400, 2741.63085938, -5471.70458984 + VCMP_U32(1, v8, 0x45ad7ce3, 0x44e2d3d3, 0x45a09adf, 0xc2b5e0cc, 0xc4f4c99a, + 0xc52c2b1c, 0x43b853b8, 0x45134510, 0xc598cf33, 0xc4b33229, + 0xc578b2bd, 0x44161b45, 0xc5cdf4bb, 0xc40f9139, 0x452b5a18, + 0xc5aafda3); + + VSET(16, e32, m4); + // -71423.96093750, -46625.21875000, -59851.39453125, + // -43461.99218750, -10255.72753906, 37671.59765625, + // 96842.05468750, 33293.05859375, 27126.79296875, + // -27343.42187500, 26815.15429688, 28654.72070312, + // -5699.91699219, 70582.03906250, -5936.72802734, + // 43479.90234375 + VLOAD_32(v8, 0xc78b7ffb, 0xc7362138, 0xc769cb65, 0xc729c5fe, 0xc6203ee9, + 0x47132799, 0x47bd2507, 0x47020d0f, 0x46d3ed96, 0xc6d59ed8, + 0x46d17e4f, 0x46dfdd71, 0xc5b21f56, 0x4789db05, 0xc5b985d3, + 0x4729d7e7); + // 93657.15625000, -28369.10156250, -42169.91406250, + // -21377.95507812, 16308.38183594, 64517.84375000, + // -12469.71679688, -76994.53125000, -33687.83593750, + // -84006.54687500, 31506.48437500, 2731.77905273, + // -20272.41992188, 53550.01953125, -85441.62500000, + // -33418.07031250 + VLOAD_32(v24, 0x47b6ec94, 0xc6dda234, 0xc724b9ea, 0xc6a703e9, 0x467ed187, + 0x477c05d8, 0xc642d6de, 0xc7966144, 0xc70397d6, 0xc7a41346, + 0x46f624f8, 0x452abc77, 0xc69e60d7, 0x47512e05, 0xc7a6e0d0, + 0xc7028a12); + // 15054.1952512034331448, -39042.3922682931588497, + // -83554.3539477824524511, 35787.9235785690543707, + // -34715.2784411938628182, 35880.5352577352605294, + // -52433.9701052222590079, -40831.3148960549369804, + // -3569.6808186589187244, 77018.1414445060363505, + // 58906.1301468654128257, -84146.7844421620393405, + // -23969.5482366856886074, 92255.7186088700836990, + // -35519.3091108352309675, -65623.9480113173485734 + VLOAD_64(v16, 0x40cd6718fdfdcea0, 0xc0e3104c8d763c4c, 0xc0f46625a9c52662, + 0x40e1797d8df4a4ac, 0xc0e0f368e8fd81b0, 0x40e1851120d4d47c, + 0xc0e99a3f0b1a1b69, 0xc0e3efea13a0e433, 0xc0abe35c94436520, + 0x40f2cda2435b507a, 0x40ecc3442a29c254, 0xc0f48b2c8d133979, + 0xc0d76863164f52e0, 0x40f685fb7f6c03ba, 0xc0e157e9e43c6805, + 0xc0f0057f2b0dea44); + asm volatile("vfwmsac.vv v16, v8, v24"); + // -6689380123.7125854492187500, 1322754608.4847974777221680, + // 2524011718.2575426101684570, 929092728.6666160821914673, + // -167219605.4339296817779541, 2430454370.8635458946228027, + // -1207540562.0105009078979492, -2563342608.9896693229675293, + // -913839381.3609598875045776, 2296949433.3236432075500488, + // 844792333.7375594377517700, 78362512.5631930530071259, + // 115575080.3336923867464066, 3779577314.6337165832519531, + // 507279209.1484052538871765, -1452948809.7560596466064453 + VCMP_U64(2, v16, 0xc1f8eb7d71bb66c0, 0x41d3b5e88c1f06ec, 0x41e2ce2b98c83dca, + 0x41cbb06a3c5553ad, 0xc1a3ef232ade2c08, 0x41e21bb94c5ba22b, + 0xc1d1fe66d480ac0c, 0xc1e31930221fab5f, 0xc1cb3c0a8aae33ef, + 0x41e11d15572a5b49, 0x41c92d4106de6859, 0x4192aede4240b5ae, + 0x419b8e24a155b375, 0x41ec28f8bc544768, 0x41be3c776925fde3, + 0xc1d5a68f52706348); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // -93.0000, -55.1250, -68.5625, 76.3125, -61.2188, 48.9375, + // -56.3125, 71.0000, -74.5625, -38.7188, + // -50.3438, 93.3750, 80.2500, -7.4141, 93.8125, 83.1875 + VLOAD_16(v4, 0xd5d0, 0xd2e4, 0xd449, 0x54c5, 0xd3a7, 0x521e, 0xd30a, 0x5470, + 0xd4a9, 0xd0d7, 0xd24b, 0x55d6, 0x5504, 0xc76a, 0x55dd, 0x5533); + // -60.0312, -31.7188, -74.2500, -0.9077, 30.4844, -56.2500, + // -4.8320, 34.2812, 66.6875, 37.9375, 78.1875, 5.6094, + // -81.8125, 67.6250, 29.4531, -64.9375 + VLOAD_16(v12, 0xd381, 0xcfee, 0xd4a4, 0xbb43, 0x4f9f, 0xd308, 0xc4d5, 0x5049, + 0x542b, 0x50be, 0x54e3, 0x459c, 0xd51d, 0x543a, 0x4f5d, 0xd40f); + VLOAD_8(v0, 0xAA, 0xAA); + // 31.29529381, -66.12346649, + // -48.59321213, 21.66906929, 92.08473206, 1.95985305, + // -96.55027771, 77.65225220, -82.48660278, + // -35.32508850, 42.91923141, + // -76.65069580, 25.13817024, 72.89311981, 21.44047737, 69.71634674 + VLOAD_32(v8, 0x41fa5cc3, 0xc2843f37, 0xc2425f73, 0x41ad5a41, 0x42b82b62, + 0x3ffadc77, 0xc2c119be, 0x429b4df4, 0xc2a4f924, 0xc20d4ce4, + 0x422bad4b, 0xc2994d28, 0x41c91af9, 0x4291c947, 0x41ab8619, + 0x428b6ec5); + asm volatile("vfwmsac.vv v8, v4, v12, v0.t"); + // 31.29529381, 1814.61950684, -48.59321213, + // -90.93905640, 92.08473206, -2754.69433594, -96.55027771, + // 2356.31640625, -82.48660278, -1433.56750488, 42.91923141, + // 600.42608643, 25.13817024, -574.26910400, 21.44047737, + // -5471.70458984 + VCMP_U32(3, v8, 0x41fa5cc3, 0x44e2d3d3, 0xc2425f73, 0xc2b5e0cc, 0x42b82b62, + 0xc52c2b1c, 0xc2c119be, 0x45134510, 0xc2a4f924, 0xc4b33229, + 0x422bad4b, 0x44161b45, 0x41c91af9, 0xc40f9139, 0x41ab8619, + 0xc5aafda3); + + VSET(16, e32, m4); + // -71423.96093750, -46625.21875000, -59851.39453125, + // -43461.99218750, -10255.72753906, 37671.59765625, + // 96842.05468750, 33293.05859375, 27126.79296875, + // -27343.42187500, 26815.15429688, 28654.72070312, + // -5699.91699219, 70582.03906250, -5936.72802734, + // 43479.90234375 + VLOAD_32(v8, 0xc78b7ffb, 0xc7362138, 0xc769cb65, 0xc729c5fe, 0xc6203ee9, + 0x47132799, 0x47bd2507, 0x47020d0f, 0x46d3ed96, 0xc6d59ed8, + 0x46d17e4f, 0x46dfdd71, 0xc5b21f56, 0x4789db05, 0xc5b985d3, + 0x4729d7e7); + // 93657.15625000, -28369.10156250, -42169.91406250, + // -21377.95507812, 16308.38183594, 64517.84375000, + // -12469.71679688, -76994.53125000, -33687.83593750, + // -84006.54687500, 31506.48437500, 2731.77905273, + // -20272.41992188, 53550.01953125, -85441.62500000, + // -33418.07031250 + VLOAD_32(v24, 0x47b6ec94, 0xc6dda234, 0xc724b9ea, 0xc6a703e9, 0x467ed187, + 0x477c05d8, 0xc642d6de, 0xc7966144, 0xc70397d6, 0xc7a41346, + 0x46f624f8, 0x452abc77, 0xc69e60d7, 0x47512e05, 0xc7a6e0d0, + 0xc7028a12); + VLOAD_8(v0, 0xAA, 0xAA); + // 15054.1952512034331448, -39042.3922682931588497, + // -83554.3539477824524511, 35787.9235785690543707, + // -34715.2784411938628182, 35880.5352577352605294, + // -52433.9701052222590079, -40831.3148960549369804, + // -3569.6808186589187244, 77018.1414445060363505, + // 58906.1301468654128257, -84146.7844421620393405, + // -23969.5482366856886074, 92255.7186088700836990, + // -35519.3091108352309675, -65623.9480113173485734 + VLOAD_64(v16, 0x40cd6718fdfdcea0, 0xc0e3104c8d763c4c, 0xc0f46625a9c52662, + 0x40e1797d8df4a4ac, 0xc0e0f368e8fd81b0, 0x40e1851120d4d47c, + 0xc0e99a3f0b1a1b69, 0xc0e3efea13a0e433, 0xc0abe35c94436520, + 0x40f2cda2435b507a, 0x40ecc3442a29c254, 0xc0f48b2c8d133979, + 0xc0d76863164f52e0, 0x40f685fb7f6c03ba, 0xc0e157e9e43c6805, + 0xc0f0057f2b0dea44); + asm volatile("vfwmsac.vv v16, v8, v24, v0.t"); + // 15054.1952512034331448, 1322754608.4847974777221680, + // -83554.3539477824524511, 929092728.6666160821914673, + // -34715.2784411938628182, 2430454370.8635458946228027, + // -52433.9701052222590079, -2563342608.9896693229675293, + // -3569.6808186589187244, 2296949433.3236432075500488, + // 58906.1301468654128257, 78362512.5631930530071259, + // -23969.5482366856886074, 3779577314.6337165832519531, + // -35519.3091108352309675, -1452948809.7560596466064453 + VCMP_U64(4, v16, 0x40cd6718fdfdcea0, 0x41d3b5e88c1f06ec, 0xc0f46625a9c52662, + 0x41cbb06a3c5553ad, 0xc0e0f368e8fd81b0, 0x41e21bb94c5ba22b, + 0xc0e99a3f0b1a1b69, 0xc1e31930221fab5f, 0xc0abe35c94436520, + 0x41e11d15572a5b49, 0x40ecc3442a29c254, 0x4192aede4240b5ae, + 0xc0d76863164f52e0, 0x41ec28f8bc544768, 0xc0e157e9e43c6805, + 0xc1d5a68f52706348); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // 8.1562, 2.6836, 56.7188, 38.4688, 33.8125, + // -83.0625, 37.7812, -28.0938, -33.0625, 61.1562, 13.0859, + // -80.5000, 78.3125, -38.0625, 30.0625, -78.6250 + VLOAD_16(v4, 0x4814, 0x415e, 0x5317, 0x50cf, 0x503a, 0xd531, 0x50b9, 0xcf06, + 0xd022, 0x53a5, 0x4a8b, 0xd508, 0x54e5, 0xd0c2, 0x4f84, 0xd4ea); + // 39.8125 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x50fa); + // 56.66989136, 59.83663559, -8.21133614, -19.17305374, + // -93.35797119, -34.25491333, 46.99548721, + // -6.17113161, 55.22229004, 7.96844339, -92.84493256, + // -90.90106201, 78.59468842, -58.67407608, 39.90958405, + // -93.58789825 + VLOAD_32(v8, 0x4262adf8, 0x426f58b7, 0xc10361a2, 0xc199626a, 0xc2bab748, + 0xc2090508, 0x423bfb61, 0xc0c579e9, 0x425ce3a0, 0x40fefd7d, + 0xc2b9b09b, 0xc2b5cd58, 0x429d307b, 0xc26ab241, 0x421fa36a, + 0xc2bb2d01); + asm volatile("vfwmsac.vf v8, %[A], v4" ::[A] "f"(dscalar_16)); + // 268.05081177, 47.00394058, 2266.32666016, 1550.71020508, + // 1439.51806641, -3272.67089844, 1457.17053223, + // -1112.31127930, -1371.52307129, 2426.81469727, 613.82879639, + // -3114.00512695, 3039.22167969, -1456.68920898, + // 1156.95373535, -3036.66992188 + VCMP_U32(5, v8, 0x43860681, 0x423c0409, 0x450da53a, 0x44c1d6ba, 0x44b3f094, + 0xc54c8abc, 0x44b62575, 0xc48b09f6, 0xc4ab70bd, 0x4517ad09, + 0x4419750b, 0xc542a015, 0x453df38c, 0xc4b6160e, 0x44909e85, + 0xc53dcab8); + + VSET(16, e32, m4); + double dscalar_32; + // 580253.06250000, -300331.93750000, 485801.21875000, + // -751037.87500000, -360868.65625000, 893035.68750000, + // 541162.00000000, 417622.93750000, -933287.18750000, + // -790074.12500000, 496987.96875000, 455066.96875000, + // -928285.18750000, 300725.40625000, -645096.93750000, + // 102530.55468750 + VLOAD_32(v8, 0x490da9d1, 0xc892a57e, 0x48ed3527, 0xc9375bde, 0xc8b03495, + 0x495a06bb, 0x49041ea0, 0x48cbeade, 0xc963da73, 0xc940e3a2, + 0x48f2ab7f, 0x48de335f, 0xc962a1d3, 0x4892d6ad, 0xc91d7e8f, + 0x47c84147); + // 670995.56250000 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x4923d139); + // 579132.0708449089433998, 521241.3016625398304313, + // 409779.0302067114971578, 454935.4394149139989167, + // -640831.0776052488945425, 262502.9360184965189546, + // -132061.7241549796890467, -523289.4277524493518285, + // 796635.9535234714858234, 170970.3947326899506152, + // -520724.0386287728324533, -616193.5881990450434387, + // 79952.4583538805600256, -869849.3916852036491036, + // 535808.2751473840326071, -306070.6657954099355265 + VLOAD_64(v16, 0x4121ac782445c8ae, 0x411fd06534e7065c, 0x411902cc1eee8218, + 0x411bc45dc1f5fbb4, 0xc1238e7e27bbe00c, 0x4110059bbe7ba1fc, + 0xc1001eedcb11c418, 0xc11ff065b604bcf3, 0x41284fb7e8343a7c, + 0x4104ded328699cd0, 0xc11fc850278e4d10, 0xc122ce032d286cdc, + 0x40f38507556ae0f0, 0xc12a8bb2c88af688, 0x41205a008ce01e30, + 0xc112ae5aa9c6459e); + asm volatile("vfwmsac.vf v16, %[A], v8" ::[A] "f"(dscalar_32)); + // 389346650932.4642944335937500, + // -201521918580.8290100097656250, + // 325970052259.3115844726562500, + // -503943536329.8690795898437500, + // -242140626158.0102844238281250, + // 599222720963.7006835937500000, 363117432655.3491821289062500, + // 280223661150.1425781250000000, + // -626232357986.5589599609375000, + // -530136402891.4650268554687500, + // 333477242371.1773071289062500, 305348532865.1643676757812500, + // -622875321499.4388427734375000, + // 201786282974.1514587402343750, + // -432857718253.1149902343750000, 68797853286.6418762207031250 + VCMP_U64(6, v16, 0x4256a9b79acd1db7, 0xc24775d2393a6a1d, 0x4252f9551128d3f1, + 0xc25d555807b2779f, 0xc24c305a5e770151, 0x4261708ea338766c, + 0x425522df13d3d659, 0x42504fa86f178920, 0xc26239cb6e8c51e3, + 0xc25edba5e2f2ddc3, 0x42536932b980cb59, 0x4251c60c36a04a85, + 0xc26220c864936e0b, 0x42477db329ef1363, 0xc2593215277b475c, + 0x423004abee66a452); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // 8.1562, 2.6836, 56.7188, 38.4688, 33.8125, + // -83.0625, 37.7812, -28.0938, -33.0625, 61.1562, 13.0859, + // -80.5000, 78.3125, -38.0625, 30.0625, -78.6250 + VLOAD_16(v4, 0x4814, 0x415e, 0x5317, 0x50cf, 0x503a, 0xd531, 0x50b9, 0xcf06, + 0xd022, 0x53a5, 0x4a8b, 0xd508, 0x54e5, 0xd0c2, 0x4f84, 0xd4ea); + // 39.8125 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x50fa); + VLOAD_8(v0, 0xAA, 0xAA); + // 56.66989136, 59.83663559, -8.21133614, -19.17305374, + // -93.35797119, -34.25491333, 46.99548721, + // -6.17113161, 55.22229004, 7.96844339, -92.84493256, + // -90.90106201, 78.59468842, -58.67407608, 39.90958405, + // -93.58789825 + VLOAD_32(v8, 0x4262adf8, 0x426f58b7, 0xc10361a2, 0xc199626a, 0xc2bab748, + 0xc2090508, 0x423bfb61, 0xc0c579e9, 0x425ce3a0, 0x40fefd7d, + 0xc2b9b09b, 0xc2b5cd58, 0x429d307b, 0xc26ab241, 0x421fa36a, + 0xc2bb2d01); + asm volatile("vfwmsac.vf v8, %[A], v4, v0.t" ::[A] "f"(dscalar_16)); + // 56.66989136, 47.00394058, -8.21133614, 1550.71020508, + // -93.35797119, -3272.67089844, 46.99548721, + // -1112.31127930, 55.22229004, 2426.81469727, -92.84493256, + // -3114.00512695, 78.59468842, -1456.68920898, 39.90958405, + // -3036.66992188 + VCMP_U32(7, v8, 0x4262adf8, 0x423c0409, 0xc10361a2, 0x44c1d6ba, 0xc2bab748, + 0xc54c8abc, 0x423bfb61, 0xc48b09f6, 0x425ce3a0, 0x4517ad09, + 0xc2b9b09b, 0xc542a015, 0x429d307b, 0xc4b6160e, 0x421fa36a, + 0xc53dcab8); + + VSET(16, e32, m4); + double dscalar_32; + // 580253.06250000, -300331.93750000, 485801.21875000, + // -751037.87500000, -360868.65625000, 893035.68750000, + // 541162.00000000, 417622.93750000, -933287.18750000, + // -790074.12500000, 496987.96875000, 455066.96875000, + // -928285.18750000, 300725.40625000, -645096.93750000, + // 102530.55468750 + VLOAD_32(v8, 0x490da9d1, 0xc892a57e, 0x48ed3527, 0xc9375bde, 0xc8b03495, + 0x495a06bb, 0x49041ea0, 0x48cbeade, 0xc963da73, 0xc940e3a2, + 0x48f2ab7f, 0x48de335f, 0xc962a1d3, 0x4892d6ad, 0xc91d7e8f, + 0x47c84147); + // 670995.56250000 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x4923d139); + VLOAD_8(v0, 0xAA, 0xAA); + // 579132.0708449089433998, 521241.3016625398304313, + // 409779.0302067114971578, 454935.4394149139989167, + // -640831.0776052488945425, 262502.9360184965189546, + // -132061.7241549796890467, -523289.4277524493518285, + // 796635.9535234714858234, 170970.3947326899506152, + // -520724.0386287728324533, -616193.5881990450434387, + // 79952.4583538805600256, -869849.3916852036491036, + // 535808.2751473840326071, -306070.6657954099355265 + VLOAD_64(v16, 0x4121ac782445c8ae, 0x411fd06534e7065c, 0x411902cc1eee8218, + 0x411bc45dc1f5fbb4, 0xc1238e7e27bbe00c, 0x4110059bbe7ba1fc, + 0xc1001eedcb11c418, 0xc11ff065b604bcf3, 0x41284fb7e8343a7c, + 0x4104ded328699cd0, 0xc11fc850278e4d10, 0xc122ce032d286cdc, + 0x40f38507556ae0f0, 0xc12a8bb2c88af688, 0x41205a008ce01e30, + 0xc112ae5aa9c6459e); + asm volatile("vfwmsac.vf v16, %[A], v8, v0.t" ::[A] "f"(dscalar_32)); + // 579132.0708449089433998, -201521918580.8290100097656250, + // 409779.0302067114971578, -503943536329.8690795898437500, + // -640831.0776052488945425, 599222720963.7006835937500000, + // -132061.7241549796890467, 280223661150.1425781250000000, + // 796635.9535234714858234, -530136402891.4650268554687500, + // -520724.0386287728324533, 305348532865.1643676757812500, + // 79952.4583538805600256, 201786282974.1514587402343750, + // 535808.2751473840326071, 68797853286.6418762207031250 + VCMP_U64(8, v16, 0x4121ac782445c8ae, 0xc24775d2393a6a1d, 0x411902cc1eee8218, + 0xc25d555807b2779f, 0xc1238e7e27bbe00c, 0x4261708ea338766c, + 0xc1001eedcb11c418, 0x42504fa86f178920, 0x41284fb7e8343a7c, + 0xc25edba5e2f2ddc3, 0xc11fc850278e4d10, 0x4251c60c36a04a85, + 0x40f38507556ae0f0, 0x42477db329ef1363, 0x41205a008ce01e30, + 0x423004abee66a452); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwmul.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwmul.c new file mode 100644 index 000000000..3c901db0f --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwmul.c @@ -0,0 +1,258 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot +// Matteo Perotti + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values +void TEST_CASE1(void) { + VSET(16, e16, m2); + // -56.5312, 95.3750, 86.3750, -33.4375, 4.7656, 58.8438, + // -80.4375, -96.4375, 74.3750, -92.7500, -57.2812, -90.0625, + // -93.2500, 40.6875, -32.2812, -36.8125 + VLOAD_16(v4, 0xd311, 0x55f6, 0x5566, 0xd02e, 0x44c4, 0x535b, 0xd507, 0xd607, + 0x54a6, 0xd5cc, 0xd329, 0xd5a1, 0xd5d4, 0x5116, 0xd009, 0xd09a); + // 96.4375, -98.8125, -49.1250, -78.8750, + // -5.9180, 32.8750, 32.8750, -74.8125, + // -10.3750, 39.5938, 43.2812, 15.0547, -31.9062, + // -11.2500, 16.3594, 28.6094 + VLOAD_16(v8, 0x5607, 0xd62d, 0xd224, 0xd4ee, 0xc5eb, 0x501c, 0x501c, 0xd4ad, + 0xc930, 0x50f3, 0x5169, 0x4b87, 0xcffa, 0xc9a0, 0x4c17, 0x4f27); + asm volatile("vfwmul.vv v12, v4, v8"); + // -5451.73242188, -9424.24218750, -4243.17187500, 2637.38281250, + // -28.20281982, 1934.48828125, -2644.38281250, 7214.73046875, + // -771.64062500, -3672.32031250, -2479.20410156, -1355.86279297, + // 2975.25781250, -457.73437500, -528.10107422, -1053.18261719 + VCMP_U32(1, v12, 0xc5aa5ddc, 0xc61340f8, 0xc5849960, 0x4524d620, 0xc1e19f60, + 0x44f1cfa0, 0xc5254620, 0x45e175d8, 0xc440e900, 0xc5658520, + 0xc51af344, 0xc4a97b9c, 0x4539f420, 0xc3e4de00, 0xc4040678, + 0xc483a5d8); + + VSET(16, e32, m4); + // -89875.40625000, 87678.49218750, -37342.58593750, + // -47507.81640625, -80717.72656250, 2230.02978516, + // -68805.99218750, 79032.60156250, -43338.95703125, + // 42250.94531250, -6447.03955078, 25544.21679688, + // 5945.30566406, -47409.30468750, -43415.17187500, + // 92669.35156250 + VLOAD_32(v8, 0xc7af89b4, 0x47ab3f3f, 0xc711de96, 0xc73993d1, 0xc79da6dd, + 0x450b607a, 0xc78662ff, 0x479a5c4d, 0xc7294af5, 0x47250af2, + 0xc5c97851, 0x46c7906f, 0x45b9ca72, 0xc739314e, 0xc729972c, + 0x47b4fead); + // 99630.39843750, 37076.73437500, -66118.01562500, + // -99829.85156250, -78879.75000000, 75633.85937500, + // -90564.15625000, -84653.48437500, 34630.80859375, + // 85817.48437500, -23627.74023438, -79522.11718750, + // 51590.63671875, 7574.55957031, -93117.57812500, + // 28056.31054688 + VLOAD_32(v16, 0x47c29733, 0x4710d4bc, 0xc7812302, 0xc7c2faed, 0xc79a0fe0, + 0x4793b8ee, 0xc7b0e214, 0xc7a556be, 0x470746cf, 0x47a79cbe, + 0xc6b8977b, 0xc79b510f, 0x474986a3, 0x45ecb47a, 0xc7b5deca, + 0x46db309f); + asm volatile("vfwmul.vv v24, v8, v16"); + // -8954322534.4196777343750000, 3250832165.2364501953125000, + // 2469017680.4935302734375000, 4742698259.8944396972656250, + // 6366994091.8183593750000000, 168665759.1725692749023438, + // 6231356627.4050292968750000, -6690385101.4866943359375000, + // -1500863125.6019744873046875, 3625869839.1844482421875000, + // 152328975.7866010665893555, -2031330201.5839996337890625, + // 306722104.6965751647949219, -359104602.5425643920898438, + // 4042715658.8806152343750000, 2599960105.6150360107421875 + VCMP_U64(2, v24, 0xc200adc0f3335b80, 0x41e8387864a79100, 0x41e265470a0fcb00, + 0x41f1aafd513e4fa0, 0x41f7b809eabd1800, 0x41a41b453e585b00, + 0x41f736af4d367b00, 0xc1f8ec72ccd7c980, 0xc1d65d56a56686c0, + 0x41eb03cc41e5e700, 0x41a228b61f92bd60, 0xc1de44e8e6656040, + 0x41b2483538b252c0, 0xc1b567805a8ae580, 0x41ee1ede415c2e00, + 0x41e35f07c533ae60); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // -56.5312, 95.3750, 86.3750, -33.4375, 4.7656, 58.8438, + // -80.4375, -96.4375, 74.3750, -92.7500, -57.2812, -90.0625, + // -93.2500, 40.6875, -32.2812, -36.8125 + VLOAD_16(v4, 0xd311, 0x55f6, 0x5566, 0xd02e, 0x44c4, 0x535b, 0xd507, 0xd607, + 0x54a6, 0xd5cc, 0xd329, 0xd5a1, 0xd5d4, 0x5116, 0xd009, 0xd09a); + // 96.4375, -98.8125, -49.1250, -78.8750, + // -5.9180, 32.8750, 32.8750, -74.8125, + // -10.3750, 39.5938, 43.2812, 15.0547, -31.9062, + // -11.2500, 16.3594, 28.6094 + VLOAD_16(v8, 0x5607, 0xd62d, 0xd224, 0xd4ee, 0xc5eb, 0x501c, 0x501c, 0xd4ad, + 0xc930, 0x50f3, 0x5169, 0x4b87, 0xcffa, 0xc9a0, 0x4c17, 0x4f27); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + VCLEAR(v14); + asm volatile("vfwmul.vv v12, v4, v8, v0.t"); + // 0.00000000, -9424.24218750, 0.00000000, 2637.38281250, + // 0.00000000, 1934.48828125, 0.00000000, 7214.73046875, + // 0.00000000, -3672.32031250, 0.00000000, -1355.86279297, + // 0.00000000, -457.73437500, 0.00000000, -1053.18261719 + VCMP_U32(3, v12, 0x0, 0xc61340f8, 0x0, 0x4524d620, 0x0, 0x44f1cfa0, 0x0, + 0x45e175d8, 0x0, 0xc5658520, 0x0, 0xc4a97b9c, 0x0, 0xc3e4de00, 0x0, + 0xc483a5d8); + + VSET(16, e32, m4); + // -89875.40625000, 87678.49218750, -37342.58593750, + // -47507.81640625, -80717.72656250, 2230.02978516, + // -68805.99218750, 79032.60156250, -43338.95703125, + // 42250.94531250, -6447.03955078, 25544.21679688, + // 5945.30566406, -47409.30468750, -43415.17187500, + // 92669.35156250 + VLOAD_32(v8, 0xc7af89b4, 0x47ab3f3f, 0xc711de96, 0xc73993d1, 0xc79da6dd, + 0x450b607a, 0xc78662ff, 0x479a5c4d, 0xc7294af5, 0x47250af2, + 0xc5c97851, 0x46c7906f, 0x45b9ca72, 0xc739314e, 0xc729972c, + 0x47b4fead); + // 99630.39843750, 37076.73437500, -66118.01562500, + // -99829.85156250, -78879.75000000, 75633.85937500, + // -90564.15625000, -84653.48437500, 34630.80859375, + // 85817.48437500, -23627.74023438, -79522.11718750, + // 51590.63671875, 7574.55957031, -93117.57812500, + // 28056.31054688 + VLOAD_32(v16, 0x47c29733, 0x4710d4bc, 0xc7812302, 0xc7c2faed, 0xc79a0fe0, + 0x4793b8ee, 0xc7b0e214, 0xc7a556be, 0x470746cf, 0x47a79cbe, + 0xc6b8977b, 0xc79b510f, 0x474986a3, 0x45ecb47a, 0xc7b5deca, + 0x46db309f); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + VCLEAR(v28); + asm volatile("vfwmul.vv v24, v8, v16, v0.t"); + // 0.0000000000000000, 3250832165.2364501953125000, + // 0.0000000000000000, 4742698259.8944396972656250, + // 0.0000000000000000, 168665759.1725692749023438, + // 0.0000000000000000, -6690385101.4866943359375000, + // 0.0000000000000000, 3625869839.1844482421875000, + // 0.0000000000000000, -2031330201.5839996337890625, + // 0.0000000000000000, -359104602.5425643920898438, + // 0.0000000000000000, 2599960105.6150360107421875 + VCMP_U64(4, v24, 0x0, 0x41e8387864a79100, 0x0, 0x41f1aafd513e4fa0, 0x0, + 0x41a41b453e585b00, 0x0, 0xc1f8ec72ccd7c980, 0x0, 0x41eb03cc41e5e700, + 0x0, 0xc1de44e8e6656040, 0x0, 0xc1b567805a8ae580, 0x0, + 0x41e35f07c533ae60); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // -44.4062, -27.0781, -21.6562, 75.5625, -84.5000, + // -1.0713, 72.5625, -84.6250, 83.9375, -52.3438, + // -40.5625, 1.6523, 79.6875, -36.2812, 33.5938, -72.4375 + VLOAD_16(v4, 0xd18d, 0xcec5, 0xcd6a, 0x54b9, 0xd548, 0xbc49, 0x5489, 0xd54a, + 0x553f, 0xd28b, 0xd112, 0x3e9c, 0x54fb, 0xd089, 0x5033, 0xd487); + // -58.9688 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xd35f); + asm volatile("vfwmul.vf v8, v4, %[A]" ::[A] "f"(dscalar_16)); + // 2618.58105469, 1596.76318359, 1277.04199219, + // -4455.82617188, 4982.85937500, 63.17257690, -4278.91992188, + // 4990.23046875, -4949.68945312, 3086.64550781, 2391.91992188, + // -97.43664551, -4699.07226562, 2139.45996094, -1980.98144531, + // 4271.54882812 + VCMP_U32(5, v8, 0x4523a94c, 0x44c7986c, 0x449fa158, 0xc58b3e9c, 0x459bb6e0, + 0x427cb0b8, 0xc585b75c, 0x459bf1d8, 0xc59aad84, 0x4540ea54, + 0x45157eb8, 0xc2c2df90, 0xc592d894, 0x4505b75c, 0xc4f79f68, + 0x45857c64); + + VSET(16, e32, m4); + double dscalar_32; + // -187018.20312500, -714032.18750000, -891429.25000000, + // -378265.00000000, 211566.90625000, 231934.78125000, + // 947047.75000000, -241945.03125000, -489658.75000000, + // -788001.68750000, -817411.37500000, -522168.21875000, + // -668021.56250000, 744069.12500000, -620354.68750000, + // 913454.68750000 + VLOAD_32(v8, 0xc836a28d, 0xc92e5303, 0xc959a254, 0xc8b8b320, 0x484e9bba, + 0x48627fb2, 0x4967367c, 0xc86c4642, 0xc8ef1758, 0xc940621b, + 0xc9479036, 0xc8fef707, 0xc9231759, 0x4935a852, 0xc917742b, + 0x495f02eb); + // -50557.21484375 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xc7457d37); + asm volatile("vfwmul.vf v16, v8, %[A]" ::[A] "f"(dscalar_32)); + // 9455119475.0827026367187500, 36099478708.7902832031250000, + // 45068180110.2529296875000000, 19124024872.8710937500000000, + // -10696233533.1087646484375000, -11725976565.3944091796875000, + // -47880096564.0400390625000000, 12232066925.2840576171875000, + // 24755782623.8720703125000000, 39839170612.1750488281250000, + // 41326042501.6000976562500000, 26399370819.9219970703125000, + // 33773309655.5700683593750000, -37618062611.2260742187500000, + // 31363405215.2648925781250000, -46181724885.9680175781250000 + VCMP_U64(6, v16, 0x42019c8d6398a960, 0x4220cf64a96994a0, 0x4224fc8bb51c8180, + 0x4211cf85d8a37c00, 0xc203ec5c91e8dec0, 0xc205d7619fab27c0, + 0xc2264bc096681480, 0x4206c8b43b6a45c0, 0x42170e3d0f7f7d00, + 0x42228d33006859a0, 0x42233e72bb0b3340, 0x42189619b90fb020, + 0x421f742f3b5e47c0, 0xc221846c2a2673c0, 0x421d359e567d0f40, + 0xc225814a65abefa0); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // -44.4062, -27.0781, -21.6562, 75.5625, -84.5000, + // -1.0713, 72.5625, -84.6250, 83.9375, -52.3438, + // -40.5625, 1.6523, 79.6875, -36.2812, 33.5938, -72.4375 + VLOAD_16(v4, 0xd18d, 0xcec5, 0xcd6a, 0x54b9, 0xd548, 0xbc49, 0x5489, 0xd54a, + 0x553f, 0xd28b, 0xd112, 0x3e9c, 0x54fb, 0xd089, 0x5033, 0xd487); + // -58.9688 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xd35f); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfwmul.vf v8, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + // 0.00000000, 1596.76318359, 0.00000000, -4455.82617188, + // 0.00000000, 63.17257690, 0.00000000, 4990.23046875, + // 0.00000000, 3086.64550781, 0.00000000, -97.43664551, + // 0.00000000, 2139.45996094, 0.00000000, 4271.54882812 + VCMP_U32(7, v8, 0x0, 0x44c7986c, 0x0, 0xc58b3e9c, 0x0, 0x427cb0b8, 0x0, + 0x459bf1d8, 0x0, 0x4540ea54, 0x0, 0xc2c2df90, 0x0, 0x4505b75c, 0x0, + 0x45857c64); + + VSET(16, e32, m4); + double dscalar_32; + // -187018.20312500, -714032.18750000, -891429.25000000, + // -378265.00000000, 211566.90625000, 231934.78125000, + // 947047.75000000, -241945.03125000, -489658.75000000, + // -788001.68750000, -817411.37500000, -522168.21875000, + // -668021.56250000, 744069.12500000, -620354.68750000, + // 913454.68750000 + VLOAD_32(v8, 0xc836a28d, 0xc92e5303, 0xc959a254, 0xc8b8b320, 0x484e9bba, + 0x48627fb2, 0x4967367c, 0xc86c4642, 0xc8ef1758, 0xc940621b, + 0xc9479036, 0xc8fef707, 0xc9231759, 0x4935a852, 0xc917742b, + 0x495f02eb); + // -50557.21484375 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xc7457d37); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfwmul.vf v16, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + // 0.0000000000000000, 36099478708.7902832031250000, + // 0.0000000000000000, 19124024872.8710937500000000, + // 0.0000000000000000, -11725976565.3944091796875000, + // 0.0000000000000000, 12232066925.2840576171875000, + // 0.0000000000000000, 39839170612.1750488281250000, + // 0.0000000000000000, 26399370819.9219970703125000, + // 0.0000000000000000, -37618062611.2260742187500000, + // 0.0000000000000000, -46181724885.9680175781250000 + VCMP_U64(8, v16, 0x0, 0x4220cf64a96994a0, 0x0, 0x4211cf85d8a37c00, 0x0, + 0xc205d7619fab27c0, 0x0, 0x4206c8b43b6a45c0, 0x0, 0x42228d33006859a0, + 0x0, 0x42189619b90fb020, 0x0, 0xc221846c2a2673c0, 0x0, + 0xc225814a65abefa0); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwnmacc.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwnmacc.c new file mode 100644 index 000000000..b036328fb --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwnmacc.c @@ -0,0 +1,352 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot +// Matteo Perotti + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 33.9375, 31.7344, -56.0000, -62.0625, 77.6875, -7.7383, + // -75.3750, 4.1953, 79.5625, -87.3750, + // -37.2188, 90.5000, 68.0625, 69.0625, 54.0312, -64.6875 + VLOAD_16(v4, 0x503e, 0x4fef, 0xd300, 0xd3c2, 0x54db, 0xc7bd, 0xd4b6, 0x4432, + 0x54f9, 0xd576, 0xd0a7, 0x55a8, 0x5441, 0x5451, 0x52c1, 0xd40b); + // -92.3125, -75.9375, 26.1094, + // -79.6875, 3.9375, 37.2812, 50.7812, -3.9375, -55.9688, + // -31.5312, 76.0000, 69.1875, -8.2578, -52.5000, + // -98.4375, 40.3438 + VLOAD_16(v12, 0xd5c5, 0xd4bf, 0x4e87, 0xd4fb, 0x43e0, 0x50a9, 0x5259, 0xc3e0, + 0xd2ff, 0xcfe2, 0x54c0, 0x5453, 0xc821, 0xd290, 0xd627, 0x510b); + // 75.62483215, 29.19676971, 69.45310211, -70.36167145, + // -0.92180759, -77.84928131, 86.66299438, -43.34124756, + // -3.36894345, 7.33576536, -64.43717194, -80.48993683, + // -5.57641745, 89.34833527, -39.19780731, -55.64332581 + VLOAD_32(v8, 0x42973fea, 0x41e992fc, 0x428ae7fd, 0xc28cb92d, 0xbf6bfb95, + 0xc29bb2d5, 0x42ad5374, 0xc22d5d70, 0xc0579cc5, 0x40eabe97, + 0xc280dfd5, 0xc2a0fad9, 0xc0b27203, 0x42b2b259, 0xc21cca8e, + 0xc25e92c4); + asm volatile("vfwnmacc.vv v8, v4, v12"); + // 3057.23071289, 2380.63232422, 1392.67187500, + // -4875.24365234, -304.97271729, 366.34207153, + // 3740.97363281, 59.86029053, 4456.38281250, -2762.37866211, + // 2893.06225586, -6180.97900391, 567.62377930, 3536.43286133, + // 5357.89892578, 2665.37963867 + VCMP_U32(1, v8, 0x453f13b1, 0x4514ca1e, 0x44ae1580, 0xc59859f3, 0xc3987c82, + 0x43b72bc9, 0x4569cf94, 0x426f70f0, 0x458b4310, 0xc52ca60f, + 0x4534d0ff, 0xc5c127d5, 0x440de7ec, 0x455d06ed, 0x45a76f31, + 0x45269613); + + VSET(16, e32, m4); + // 24686.12304688, 45012.43359375, 5708.16113281, + // -32777.98828125, 74121.31250000, -74877.15625000, + // -60082.02734375, 46400.20312500, -45509.65234375, + // -63994.57031250, -8693.70019531, 57683.04296875, + // 70424.14843750, 90967.72656250, 16158.18359375, + // -90782.41406250 + VLOAD_32(v8, 0x46c0dc3f, 0x472fd46f, 0x45b2614a, 0xc70009fd, 0x4790c4a8, + 0xc7923e94, 0xc76ab207, 0x47354034, 0xc731c5a7, 0xc779fa92, + 0xc607d6cd, 0x4761530b, 0x47898c13, 0x47b1abdd, 0x467c78bc, + 0xc7b14f35); + // -87108.13281250, -7857.04492188, -40309.92968750, + // -77418.73437500, 28954.62109375, -28385.13085938, + // 42368.34375000, -32644.74804688, 89327.02343750, + // -91567.60156250, -25929.78515625, -88250.83593750, + // -49992.60156250, 34217.12500000, 49765.98046875, + // 8088.22802734 + VLOAD_32(v24, 0xc7aa2211, 0xc5f5885c, 0xc71d75ee, 0xc797355e, 0x46e2353e, + 0xc6ddc243, 0x47258058, 0xc6ff097f, 0x47ae7783, 0xc7b2d7cd, + 0xc6ca9392, 0xc7ac5d6b, 0xc743489a, 0x4705a920, 0x474265fb, + 0x45fcc1d3); + // -95159.7034957902651513, -25746.0480722606444033, + // -89272.3172746254567755, -57390.5516721799431252, + // 98139.9797031646012329, -66607.6782029465102823, + // 67788.7602550606534351, -90593.7788542728667380, + // -68056.0128839309472824, -37127.9738570771587547, + // 21060.8546093095501419, -76483.1707763712329324, + // 83261.7813338561973069, -99608.0446094776270911, + // 32602.1877863906847779, 52037.0826651407405734 + VLOAD_64(v16, 0xc0f73b7b4184cd41, 0xc0d92483139dacd4, 0xc0f5cb85138e8ec3, + 0xc0ec05d1a74c6a5f, 0x40f7f5bfacdd39bc, 0xc0f042fad9eb5535, + 0x40f08ccc2a0135e2, 0xc0f61e1c762fe5e5, 0xc0f09d8034c5c7e1, + 0xc0e220ff29d6512c, 0x40d49136b1eb3ed8, 0xc0f2ac32bb800116, + 0x40f453dc8057edfa, 0xc0f85180b6b86d78, 0x40dfd68c04b135a8, + 0x40e968a2a5315d80); + asm volatile("vfwnmacc.vv v16, v8, v24"); + // 2150457244.6964006423950195, 353690458.8370813727378845, + // 230184846.2258668541908264, -2537572977.5412845611572266, + // -2146252658.3886387348175049, -2125331270.8559157848358154, + // 2545508198.9366445541381836, 1514813534.1183013916015625, + // 4065309837.5555171966552734, -5859792188.5645341873168945, + // -225446839.1319110989570618, 5090653244.5816955566406250, + // 3520603131.4329605102539062, -3112554462.7102732658386230, + // -804160451.3248255252838135, 734216828.7275727987289429 + VCMP_U64(2, v16, 0x41e005abf39648ea, 0x41b514e35ad64af7, 0x41ab70af1c73a4d2, + 0xc1e2e8094e315234, 0xc1dffb4ddc98df75, 0xc1dfab7ed1b6c753, + 0x41e2f72becddf8fe, 0x41d6928e17879240, 0x41ee49f691b1c6cc, + 0xc1f5d45553c90855, 0xc1aae0176e4389da, 0x41f2f6d343c94ea0, + 0x41ea3b047f6ddad0, 0xc1e730b9fbd6ba8f, 0xc1c7f741e1a993e2, + 0x41c5e1a13e5d211b); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // 33.9375, 31.7344, -56.0000, -62.0625, 77.6875, -7.7383, + // -75.3750, 4.1953, 79.5625, -87.3750, + // -37.2188, 90.5000, 68.0625, 69.0625, 54.0312, -64.6875 + VLOAD_16(v4, 0x503e, 0x4fef, 0xd300, 0xd3c2, 0x54db, 0xc7bd, 0xd4b6, 0x4432, + 0x54f9, 0xd576, 0xd0a7, 0x55a8, 0x5441, 0x5451, 0x52c1, 0xd40b); + // -92.3125, -75.9375, 26.1094, + // -79.6875, 3.9375, 37.2812, 50.7812, -3.9375, -55.9688, + // -31.5312, 76.0000, 69.1875, -8.2578, -52.5000, + // -98.4375, 40.3438 + VLOAD_16(v12, 0xd5c5, 0xd4bf, 0x4e87, 0xd4fb, 0x43e0, 0x50a9, 0x5259, 0xc3e0, + 0xd2ff, 0xcfe2, 0x54c0, 0x5453, 0xc821, 0xd290, 0xd627, 0x510b); + VLOAD_8(v0, 0xAA, 0xAA); + // 75.62483215, 29.19676971, 69.45310211, -70.36167145, + // -0.92180759, -77.84928131, 86.66299438, -43.34124756, + // -3.36894345, 7.33576536, -64.43717194, -80.48993683, + // -5.57641745, 89.34833527, -39.19780731, -55.64332581 + VLOAD_32(v8, 0x42973fea, 0x41e992fc, 0x428ae7fd, 0xc28cb92d, 0xbf6bfb95, + 0xc29bb2d5, 0x42ad5374, 0xc22d5d70, 0xc0579cc5, 0x40eabe97, + 0xc280dfd5, 0xc2a0fad9, 0xc0b27203, 0x42b2b259, 0xc21cca8e, + 0xc25e92c4); + asm volatile("vfwnmacc.vv v8, v4, v12, v0.t"); + // 75.62483215, 2380.63232422, 69.45310211, -4875.24365234, + // -0.92180759, 366.34207153, 86.66299438, 59.86029053, + // -3.36894345, -2762.37866211, -64.43717194, -6180.97900391, + // -5.57641745, 3536.43286133, -39.19780731, 2665.37963867 + VCMP_U32(3, v8, 0x42973fea, 0x4514ca1e, 0x428ae7fd, 0xc59859f3, 0xbf6bfb95, + 0x43b72bc9, 0x42ad5374, 0x426f70f0, 0xc0579cc5, 0xc52ca60f, + 0xc280dfd5, 0xc5c127d5, 0xc0b27203, 0x455d06ed, 0xc21cca8e, + 0x45269613); + + VSET(16, e32, m4); + // 24686.12304688, 45012.43359375, 5708.16113281, + // -32777.98828125, 74121.31250000, -74877.15625000, + // -60082.02734375, 46400.20312500, -45509.65234375, + // -63994.57031250, -8693.70019531, 57683.04296875, + // 70424.14843750, 90967.72656250, 16158.18359375, + // -90782.41406250 + VLOAD_32(v8, 0x46c0dc3f, 0x472fd46f, 0x45b2614a, 0xc70009fd, 0x4790c4a8, + 0xc7923e94, 0xc76ab207, 0x47354034, 0xc731c5a7, 0xc779fa92, + 0xc607d6cd, 0x4761530b, 0x47898c13, 0x47b1abdd, 0x467c78bc, + 0xc7b14f35); + // -87108.13281250, -7857.04492188, -40309.92968750, + // -77418.73437500, 28954.62109375, -28385.13085938, + // 42368.34375000, -32644.74804688, 89327.02343750, + // -91567.60156250, -25929.78515625, -88250.83593750, + // -49992.60156250, 34217.12500000, 49765.98046875, + // 8088.22802734 + VLOAD_32(v24, 0xc7aa2211, 0xc5f5885c, 0xc71d75ee, 0xc797355e, 0x46e2353e, + 0xc6ddc243, 0x47258058, 0xc6ff097f, 0x47ae7783, 0xc7b2d7cd, + 0xc6ca9392, 0xc7ac5d6b, 0xc743489a, 0x4705a920, 0x474265fb, + 0x45fcc1d3); + VLOAD_8(v0, 0xAA, 0xAA); + // -95159.7034957902651513, -25746.0480722606444033, + // -89272.3172746254567755, -57390.5516721799431252, + // 98139.9797031646012329, -66607.6782029465102823, + // 67788.7602550606534351, -90593.7788542728667380, + // -68056.0128839309472824, -37127.9738570771587547, + // 21060.8546093095501419, -76483.1707763712329324, + // 83261.7813338561973069, -99608.0446094776270911, + // 32602.1877863906847779, 52037.0826651407405734 + VLOAD_64(v16, 0xc0f73b7b4184cd41, 0xc0d92483139dacd4, 0xc0f5cb85138e8ec3, + 0xc0ec05d1a74c6a5f, 0x40f7f5bfacdd39bc, 0xc0f042fad9eb5535, + 0x40f08ccc2a0135e2, 0xc0f61e1c762fe5e5, 0xc0f09d8034c5c7e1, + 0xc0e220ff29d6512c, 0x40d49136b1eb3ed8, 0xc0f2ac32bb800116, + 0x40f453dc8057edfa, 0xc0f85180b6b86d78, 0x40dfd68c04b135a8, + 0x40e968a2a5315d80); + asm volatile("vfwnmacc.vv v16, v8, v24, v0.t"); + // -95159.7034957902651513, 353690458.8370813727378845, + // -89272.3172746254567755, -2537572977.5412845611572266, + // 98139.9797031646012329, -2125331270.8559157848358154, + // 67788.7602550606534351, 1514813534.1183013916015625, + // -68056.0128839309472824, -5859792188.5645341873168945, + // 21060.8546093095501419, 5090653244.5816955566406250, + // 83261.7813338561973069, -3112554462.7102732658386230, + // 32602.1877863906847779, 734216828.7275727987289429 + VCMP_U64(4, v16, 0xc0f73b7b4184cd41, 0x41b514e35ad64af7, 0xc0f5cb85138e8ec3, + 0xc1e2e8094e315234, 0x40f7f5bfacdd39bc, 0xc1dfab7ed1b6c753, + 0x40f08ccc2a0135e2, 0x41d6928e17879240, 0xc0f09d8034c5c7e1, + 0xc1f5d45553c90855, 0x40d49136b1eb3ed8, 0x41f2f6d343c94ea0, + 0x40f453dc8057edfa, 0xc1e730b9fbd6ba8f, 0x40dfd68c04b135a8, + 0x41c5e1a13e5d211b); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // 85.2500, -7.6602, -81.8125, -37.2500, + // -48.0000, 14.9531, 25.9844, 96.1875, 46.5000, + // -77.4375, 45.5312, -68.7500, 58.8438, -70.5625, -45.9375, + // -90.5000 + VLOAD_16(v4, 0x5554, 0xc7a9, 0xd51d, 0xd0a8, 0xd200, 0x4b7a, 0x4e7f, 0x5603, + 0x51d0, 0xd4d7, 0x51b1, 0xd44c, 0x535b, 0xd469, 0xd1be, 0xd5a8); + // -47.9375 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xd1fe); + // 6.19335365, -81.26284790, + // -77.74858093, 15.38204670, 27.37081337, 48.81098938, + // -82.18785095, 3.87765026, -34.03960037, + // -92.34690857, 46.98464203, 28.09385681, 58.44809723, 57.04935455, + // -44.62148285, 83.52678680 + VLOAD_32(v8, 0x40c62ff4, 0xc2a28694, 0xc29b7f46, 0x41761cdd, 0x41daf76d, + 0x42433e74, 0xc2a4602e, 0x40782b6c, 0xc208288d, 0xc2b8b19e, + 0x423bf046, 0x41e0c038, 0x4269cada, 0x4264328a, 0xc2327c66, + 0x42a70db7); + asm volatile("vfwnmacc.vf v8, %[A], v4" ::[A] "f"(dscalar_16)); + // 4080.47851562, -285.94589233, -3844.13818359, -1801.05395508, + // -2328.37084961, 668.00445557, 1327.81384277, 4607.11083984, + // 2263.13330078, -3619.81323242, 2135.66967773, + // -3323.79687500, 2762.37426758, -3439.63916016, + // -2157.50732422, -4421.87060547 + VCMP_U32(5, v8, 0x457f07a8, 0xc38ef913, 0xc5704236, 0xc4e121ba, 0xc51185ef, + 0x44270049, 0x44a5fa0b, 0x458ff8e3, 0x450d7222, 0xc5623d03, + 0x45057ab7, 0xc54fbcc0, 0x452ca5fd, 0xc556fa3a, 0xc506d81e, + 0xc58a2ef7); + + VSET(16, e32, m4); + double dscalar_32; + // 415907.75000000, 16644.92773438, -320087.15625000, + // -560497.81250000, 51200.66406250, 175961.67187500, + // -62272.61328125, -40134.65234375, 67972.27343750, + // 832511.06250000, -279323.15625000, -48243.37500000, + // 685093.43750000, 272952.25000000, 518086.00000000, + // -349626.18750000 + VLOAD_32(v8, 0x48cb1478, 0x468209db, 0xc89c4ae5, 0xc908d71d, 0x474800aa, + 0x482bd66b, 0xc773409d, 0xc71cc6a7, 0x4784c223, 0x494b3ff1, + 0xc8886365, 0xc73c7360, 0x49274257, 0x48854708, 0x48fcf8c0, + 0xc8aab746); + // -648299.93750000 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xc91e46bf); + // -3761.4446916116867214, 251037.4171459318604320, + // -832277.7590174797223881, -817938.7112226528115571, + // -640813.8540152770001441, -87111.6097838475834578, + // -748981.3983645441476256, 259451.7965630525723100, + // -469164.5467169298790395, -204901.8221613015048206, + // 97767.4262132318690419, -208046.9794710964197293, + // -303699.6372622016351670, -710697.5104083393234760, + // -907884.7086961114546284, -326406.2730544115183875 + VLOAD_64(v16, 0xc0ad62e3ae9e7200, 0x410ea4eb56509b38, 0xc129662b849df069, + 0xc128f6256c256024, 0xc1238e5bb5417d8a, 0xc0f54479c1acb530, + 0xc126db6acbf67002, 0x410fabde5f5c7320, 0xc11ca2b22fd69018, + 0xc109032e93c94df0, 0x40f7de76d1c4f740, 0xc1096577d5f4f134, + 0xc112894e8c8e766c, 0xc125b05305543dea, 0xc12bb4d96ada377b, + 0xc113ec19179b935e); + asm volatile("vfwnmacc.vf v16, %[A], v8" ::[A] "f"(dscalar_32)); + // 269632972092.2103271484375000, 10790654572.4701824188232422, + // -207511651113.6687316894531250, + // -363369878873.9254760742187500, 33194028125.5312614440917969, + // 114076027990.5677947998046875, -40370582316.7976837158203125, + // -26019552057.8339157104492188, 44066889785.8108749389648438, + // 539717074688.6307373046875000, + // -181085282506.6039428710937500, + // -31275968950.3095932006835938, 444146336412.5474243164062500, + // 176955637312.9947814941406250, 335876029304.3336791992187500, + // -226662309098.3402404785156250 + VCMP_U64(6, v16, 0x424f63b0529e1aec, 0x420419629363c2ef, 0xc24828544194d599, + 0xc25526a215567b3b, 0x421eea12b1762003, 0x423a8f760c56915b, + 0xc222cc8c6659986a, 0xc2183b8b6ce755ee, 0x4224852ec0739f2b, + 0x425f6a693fc0285e, 0xc24514c310654d4e, 0xc21d20c5a6d93d06, + 0x4259da4bd0a72309, 0x424499b05f207f55, 0x42538cf15ede155b, + 0xc24a63102e752b8d); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // 85.2500, -7.6602, -81.8125, -37.2500, + // -48.0000, 14.9531, 25.9844, 96.1875, 46.5000, + // -77.4375, 45.5312, -68.7500, 58.8438, -70.5625, -45.9375, + // -90.5000 + VLOAD_16(v4, 0x5554, 0xc7a9, 0xd51d, 0xd0a8, 0xd200, 0x4b7a, 0x4e7f, 0x5603, + 0x51d0, 0xd4d7, 0x51b1, 0xd44c, 0x535b, 0xd469, 0xd1be, 0xd5a8); + // -47.9375 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xd1fe); + VLOAD_8(v0, 0xAA, 0xAA); + // 6.19335365, -81.26284790, + // -77.74858093, 15.38204670, 27.37081337, 48.81098938, + // -82.18785095, 3.87765026, -34.03960037, + // -92.34690857, 46.98464203, 28.09385681, 58.44809723, 57.04935455, + // -44.62148285, 83.52678680 + VLOAD_32(v8, 0x40c62ff4, 0xc2a28694, 0xc29b7f46, 0x41761cdd, 0x41daf76d, + 0x42433e74, 0xc2a4602e, 0x40782b6c, 0xc208288d, 0xc2b8b19e, + 0x423bf046, 0x41e0c038, 0x4269cada, 0x4264328a, 0xc2327c66, + 0x42a70db7); + asm volatile("vfwnmacc.vf v8, %[A], v4, v0.t" ::[A] "f"(dscalar_16)); + // 6.19335365, -285.94589233, -77.74858093, + // -1801.05395508, 27.37081337, 668.00445557, -82.18785095, + // 4607.11083984, -34.03960037, -3619.81323242, 46.98464203, + // -3323.79687500, 58.44809723, -3439.63916016, -44.62148285, + // -4421.87060547 + VCMP_U32(7, v8, 0x40c62ff4, 0xc38ef913, 0xc29b7f46, 0xc4e121ba, 0x41daf76d, + 0x44270049, 0xc2a4602e, 0x458ff8e3, 0xc208288d, 0xc5623d03, + 0x423bf046, 0xc54fbcc0, 0x4269cada, 0xc556fa3a, 0xc2327c66, + 0xc58a2ef7); + + VSET(16, e32, m4); + double dscalar_32; + // 415907.75000000, 16644.92773438, -320087.15625000, + // -560497.81250000, 51200.66406250, 175961.67187500, + // -62272.61328125, -40134.65234375, 67972.27343750, + // 832511.06250000, -279323.15625000, -48243.37500000, + // 685093.43750000, 272952.25000000, 518086.00000000, + // -349626.18750000 + VLOAD_32(v8, 0x48cb1478, 0x468209db, 0xc89c4ae5, 0xc908d71d, 0x474800aa, + 0x482bd66b, 0xc773409d, 0xc71cc6a7, 0x4784c223, 0x494b3ff1, + 0xc8886365, 0xc73c7360, 0x49274257, 0x48854708, 0x48fcf8c0, + 0xc8aab746); + // -648299.93750000 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xc91e46bf); + VLOAD_8(v0, 0xAA, 0xAA); + // -3761.4446916116867214, 251037.4171459318604320, + // -832277.7590174797223881, -817938.7112226528115571, + // -640813.8540152770001441, -87111.6097838475834578, + // -748981.3983645441476256, 259451.7965630525723100, + // -469164.5467169298790395, -204901.8221613015048206, + // 97767.4262132318690419, -208046.9794710964197293, + // -303699.6372622016351670, -710697.5104083393234760, + // -907884.7086961114546284, -326406.2730544115183875 + VLOAD_64(v16, 0xc0ad62e3ae9e7200, 0x410ea4eb56509b38, 0xc129662b849df069, + 0xc128f6256c256024, 0xc1238e5bb5417d8a, 0xc0f54479c1acb530, + 0xc126db6acbf67002, 0x410fabde5f5c7320, 0xc11ca2b22fd69018, + 0xc109032e93c94df0, 0x40f7de76d1c4f740, 0xc1096577d5f4f134, + 0xc112894e8c8e766c, 0xc125b05305543dea, 0xc12bb4d96ada377b, + 0xc113ec19179b935e); + asm volatile("vfwnmacc.vf v16, %[A], v8, v0.t" ::[A] "f"(dscalar_32)); + // -3761.4446916116867214, 10790654572.4701824188232422, + // -832277.7590174797223881, -363369878873.9254760742187500, + // -640813.8540152770001441, 114076027990.5677947998046875, + // -748981.3983645441476256, -26019552057.8339157104492188, + // -469164.5467169298790395, 539717074688.6307373046875000, + // 97767.4262132318690419, -31275968950.3095932006835938, + // -303699.6372622016351670, 176955637312.9947814941406250, + // -907884.7086961114546284, -226662309098.3402404785156250 + VCMP_U64(8, v16, 0xc0ad62e3ae9e7200, 0x420419629363c2ef, 0xc129662b849df069, + 0xc25526a215567b3b, 0xc1238e5bb5417d8a, 0x423a8f760c56915b, + 0xc126db6acbf67002, 0xc2183b8b6ce755ee, 0xc11ca2b22fd69018, + 0x425f6a693fc0285e, 0x40f7de76d1c4f740, 0xc21d20c5a6d93d06, + 0xc112894e8c8e766c, 0x424499b05f207f55, 0xc12bb4d96ada377b, + 0xc24a63102e752b8d); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwnmsac.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwnmsac.c new file mode 100644 index 000000000..9bff05119 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwnmsac.c @@ -0,0 +1,347 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot +// Matteo Perotti + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values +void TEST_CASE1(void) { + VSET(16, e16, m2); + // -27.1719, 16.3438, -76.1250, 73.7500, 39.2500, 32.8438, + // -48.0312, -62.9062, -52.3125, 50.8750, -32.1562, -86.3750, + // -42.7812, 97.2500, -83.6250, 46.6250 + VLOAD_16(v4, 0xcecb, 0x4c16, 0xd4c2, 0x549c, 0x50e8, 0x501b, 0xd201, 0xd3dd, + 0xd28a, 0x525c, 0xd005, 0xd566, 0xd159, 0x5614, 0xd53a, 0x51d4); + // -18.1719, -46.5312, -72.7500, + // -78.0625, 13.7344, 6.3164, 19.1250, 23.3125, 72.4375, + // -53.2812, -16.3438, -95.0625, -96.2500, 10.4141, + // -44.4688, 42.5938 + VLOAD_16(v12, 0xcc8b, 0xd1d1, 0xd48c, 0xd4e1, 0x4ade, 0x4651, 0x4cc8, 0x4dd4, + 0x5487, 0xd2a9, 0xcc16, 0xd5f1, 0xd604, 0x4935, 0xd18f, 0x5153); + // 69.72727966, 14.41778183, + // -64.82620239, 5.66590357, 73.33881378, + // -23.97786140, 94.91672516, 17.38204765, -39.07393646, + // -50.71182251, -11.98221493, -36.07648849, + // -86.86090088, 55.96418381, 61.43484116, -88.02533722 + VLOAD_32(v8, 0x428b745e, 0x4166af3c, 0xc281a704, 0x40b54f15, 0x4292ad79, + 0xc1bfd2a9, 0x42bdd55d, 0x418b0e6f, 0xc21c4bb6, 0xc24ad8e8, + 0xc13fb727, 0xc2104e53, 0xc2adb8c8, 0x425fdb53, 0x4275bd47, + 0xc2b00cf9); + asm volatile("vfwnmsac.vv v8, v4, v12"); + // -424.03662109, 774.91290283, -5602.91992188, 5762.77539062, + // -465.73541260, -231.43232727, 1013.51440430, 1483.88403320, + // 3750.31274414, 2659.97167969, -537.53594971, -8247.09960938, + // -4204.55615234, -956.80340576, -3657.26440430, -2073.95898438 + VCMP_U32(1, v8, 0xc3d404b0, 0x4441ba6d, 0xc5af175c, 0x45b41634, 0xc3e8de22, + 0xc3676ead, 0x447d60ec, 0x44b97c4a, 0x456a6501, 0x45263f8c, + 0xc406624d, 0xc600dc66, 0xc5836473, 0xc46f336b, 0xc564943b, + 0xc5019f58); + + VSET(16, e32, m4); + // 76109.13281250, 56176.41406250, -69127.14843750, + // -80327.49218750, 42920.59375000, -22857.18164062, + // -74227.70312500, -2650.23828125, 34254.71093750, + // -45853.78125000, 16339.80859375, + // -48032.71875000, 49.54582977, -47754.19921875, + // -95663.35156250, 82512.11718750 + VLOAD_32(v8, 0x4794a691, 0x475b706a, 0xc7870393, 0xc79ce3bf, 0x4727a898, + 0xc6b2925d, 0xc790f9da, 0xc525a3d0, 0x4705ceb6, 0xc7331dc8, + 0x467f4f3c, 0xc73ba0b8, 0x42462eee, 0xc73a8a33, 0xc7bad7ad, + 0x47a1280f); + // -36622.54296875, -60900.32421875, -36611.69921875, + // -74411.05468750, -25865.60937500, -67159.76562500, + // 6145.43457031, -31624.23242188, -69962.68750000, 468.94165039, + // 10443.93554688, -6054.45410156, -26090.46093750, + // 83534.57031250, 49878.42968750, -62082.53125000 + VLOAD_32(v24, 0xc70f0e8b, 0xc76de453, 0xc70f03b3, 0xc7915587, 0xc6ca1338, + 0xc7832be2, 0x45c00b7a, 0xc6f71077, 0xc788a558, 0x43ea7888, + 0x46232fbe, 0xc5bd33a2, 0xc6cbd4ec, 0x47a32749, 0x4742d66e, + 0xc7728288); + // 69521.3925020728202071, 98263.6759213360201102, + // -97991.2678309752518544, -63510.9471883209771477, + // 65329.9928102507547010, -34993.7523106171429390, + // -15831.2510480509663466, -3510.3868967669695849, + // 47068.9415519913600292, -19802.3942476644588169, + // 25915.8242703938303748, 82619.8738822988234460, + // 36865.7501246419560630, 41236.4660055586136878, + // -5735.0030344506667461, 97965.1847665070963558 + VLOAD_64(v16, 0x40f0f91647b040e6, 0x40f7fd7ad092e40e, 0xc0f7ec74490921f9, + 0xc0ef02de4f5de1b8, 0x40efe63fc51a00c4, 0xc0e1163812edb722, + 0xc0ceeba02257b050, 0xc0ab6cc617554220, 0x40e6fb9e2131a44c, + 0xc0d356993b5a8e58, 0x40d94ef4c0d89c24, 0x40f42bbdfb6c0160, + 0x40e20038010564a4, 0x40e4228ee9847d40, 0xc0b66700c6dda260, + 0x40f7ead2f4cdb996); + asm volatile("vfwnmsac.vv v16, v8, v24"); + // 2787379508.1325840950012207, 3421260093.5289182662963867, + // -2530960357.7114648818969727, -5977316925.0209798812866211, + // 1110232642.0733766555786133, -1535117955.5847384929656982, + // 456145661.6082201600074768, -83815261.7664972990751266, + // 2396598705.6646966934204102, 21482945.4617780297994614, + // -170625991.9771288335323334, -290729271.1712532043457031, + // 1329539.2864317980129272, 3989167748.8218097686767578, + // 4771532019.5777149200439453, 5122659058.9813976287841797 + VCMP_U64(2, v16, 0x41e4c48126843e21, 0x41e97d8927b0ece6, 0xc1e2db6c7cb6c452, + 0xc1f64469e3d055ef, 0x41d08b339084b234, 0xc1d6e002a0e56c5b, + 0x41bb303afd9bb451, 0xc193fbad7710e4ab, 0x41e1db2636354532, + 0x41747cdc1763715c, 0xc1a457178ff44a3b, 0xc1b1542d372bd740, + 0x4134498349539825, 0x41edb8bbd09a4c44, 0x41f1c67ccf393e52, + 0x41f315592f2fb3ce); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // -27.1719, 16.3438, -76.1250, 73.7500, 39.2500, 32.8438, + // -48.0312, -62.9062, -52.3125, 50.8750, -32.1562, -86.3750, + // -42.7812, 97.2500, -83.6250, 46.6250 + VLOAD_16(v4, 0xcecb, 0x4c16, 0xd4c2, 0x549c, 0x50e8, 0x501b, 0xd201, 0xd3dd, + 0xd28a, 0x525c, 0xd005, 0xd566, 0xd159, 0x5614, 0xd53a, 0x51d4); + // -18.1719, -46.5312, -72.7500, + // -78.0625, 13.7344, 6.3164, 19.1250, 23.3125, 72.4375, + // -53.2812, -16.3438, -95.0625, -96.2500, 10.4141, + // -44.4688, 42.5938 + VLOAD_16(v12, 0xcc8b, 0xd1d1, 0xd48c, 0xd4e1, 0x4ade, 0x4651, 0x4cc8, 0x4dd4, + 0x5487, 0xd2a9, 0xcc16, 0xd5f1, 0xd604, 0x4935, 0xd18f, 0x5153); + VLOAD_8(v0, 0xAA, 0xAA); + // 69.72727966, 14.41778183, + // -64.82620239, 5.66590357, 73.33881378, + // -23.97786140, 94.91672516, 17.38204765, -39.07393646, + // -50.71182251, -11.98221493, -36.07648849, + // -86.86090088, 55.96418381, 61.43484116, -88.02533722 + VLOAD_32(v8, 0x428b745e, 0x4166af3c, 0xc281a704, 0x40b54f15, 0x4292ad79, + 0xc1bfd2a9, 0x42bdd55d, 0x418b0e6f, 0xc21c4bb6, 0xc24ad8e8, + 0xc13fb727, 0xc2104e53, 0xc2adb8c8, 0x425fdb53, 0x4275bd47, + 0xc2b00cf9); + asm volatile("vfwnmsac.vv v8, v4, v12, v0.t"); + // 69.72727966, 774.91290283, -64.82620239, + // 5762.77539062, 73.33881378, -231.43232727, 94.91672516, + // 1483.88403320, -39.07393646, 2659.97167969, -11.98221493, + // -8247.09960938, -86.86090088, -956.80340576, 61.43484116, + // -2073.95898438 + VCMP_U32(3, v8, 0x428b745e, 0x4441ba6d, 0xc281a704, 0x45b41634, 0x4292ad79, + 0xc3676ead, 0x42bdd55d, 0x44b97c4a, 0xc21c4bb6, 0x45263f8c, + 0xc13fb727, 0xc600dc66, 0xc2adb8c8, 0xc46f336b, 0x4275bd47, + 0xc5019f58); + + VSET(16, e32, m4); + // 76109.13281250, 56176.41406250, -69127.14843750, + // -80327.49218750, 42920.59375000, -22857.18164062, + // -74227.70312500, -2650.23828125, 34254.71093750, + // -45853.78125000, 16339.80859375, + // -48032.71875000, 49.54582977, -47754.19921875, + // -95663.35156250, 82512.11718750 + VLOAD_32(v8, 0x4794a691, 0x475b706a, 0xc7870393, 0xc79ce3bf, 0x4727a898, + 0xc6b2925d, 0xc790f9da, 0xc525a3d0, 0x4705ceb6, 0xc7331dc8, + 0x467f4f3c, 0xc73ba0b8, 0x42462eee, 0xc73a8a33, 0xc7bad7ad, + 0x47a1280f); + // -36622.54296875, -60900.32421875, -36611.69921875, + // -74411.05468750, -25865.60937500, -67159.76562500, + // 6145.43457031, -31624.23242188, -69962.68750000, 468.94165039, + // 10443.93554688, -6054.45410156, -26090.46093750, + // 83534.57031250, 49878.42968750, -62082.53125000 + VLOAD_32(v24, 0xc70f0e8b, 0xc76de453, 0xc70f03b3, 0xc7915587, 0xc6ca1338, + 0xc7832be2, 0x45c00b7a, 0xc6f71077, 0xc788a558, 0x43ea7888, + 0x46232fbe, 0xc5bd33a2, 0xc6cbd4ec, 0x47a32749, 0x4742d66e, + 0xc7728288); + VLOAD_8(v0, 0xAA, 0xAA); + // 69521.3925020728202071, 98263.6759213360201102, + // -97991.2678309752518544, -63510.9471883209771477, + // 65329.9928102507547010, -34993.7523106171429390, + // -15831.2510480509663466, -3510.3868967669695849, + // 47068.9415519913600292, -19802.3942476644588169, + // 25915.8242703938303748, 82619.8738822988234460, + // 36865.7501246419560630, 41236.4660055586136878, + // -5735.0030344506667461, 97965.1847665070963558 + VLOAD_64(v16, 0x40f0f91647b040e6, 0x40f7fd7ad092e40e, 0xc0f7ec74490921f9, + 0xc0ef02de4f5de1b8, 0x40efe63fc51a00c4, 0xc0e1163812edb722, + 0xc0ceeba02257b050, 0xc0ab6cc617554220, 0x40e6fb9e2131a44c, + 0xc0d356993b5a8e58, 0x40d94ef4c0d89c24, 0x40f42bbdfb6c0160, + 0x40e20038010564a4, 0x40e4228ee9847d40, 0xc0b66700c6dda260, + 0x40f7ead2f4cdb996); + asm volatile("vfwnmsac.vv v16, v8, v24, v0.t"); + // 69521.3925020728202071, 3421260093.5289182662963867, + // -97991.2678309752518544, -5977316925.0209798812866211, + // 65329.9928102507547010, -1535117955.5847384929656982, + // -15831.2510480509663466, -83815261.7664972990751266, + // 47068.9415519913600292, 21482945.4617780297994614, + // 25915.8242703938303748, -290729271.1712532043457031, + // 36865.7501246419560630, 3989167748.8218097686767578, + // -5735.0030344506667461, 5122659058.9813976287841797 + VCMP_U64(4, v16, 0x40f0f91647b040e6, 0x41e97d8927b0ece6, 0xc0f7ec74490921f9, + 0xc1f64469e3d055ef, 0x40efe63fc51a00c4, 0xc1d6e002a0e56c5b, + 0xc0ceeba02257b050, 0xc193fbad7710e4ab, 0x40e6fb9e2131a44c, + 0x41747cdc1763715c, 0x40d94ef4c0d89c24, 0xc1b1542d372bd740, + 0x40e20038010564a4, 0x41edb8bbd09a4c44, 0xc0b66700c6dda260, + 0x41f315592f2fb3ce); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // 15.1797, -57.5312, -39.9688, 95.8125, 22.3906, + // -30.2344, 61.3438, 67.1250, -80.6250, -20.6875, -34.1250, + // -7.6758, -25.1562, 64.8125, 28.0156, -51.9688 + VLOAD_16(v4, 0x4b97, 0xd331, 0xd0ff, 0x55fd, 0x4d99, 0xcf8f, 0x53ab, 0x5432, + 0xd50a, 0xcd2c, 0xd044, 0xc7ad, 0xce4a, 0x540d, 0x4f01, 0xd27f); + // -27.7344 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xceef); + // -90.47762299, -89.97399139, -34.20752716, + // -93.73470306, 81.75606537, 80.60296631, 73.45400238, + // -61.63031769, -55.39078903, 21.99703789, 29.49930191, + // -64.56553650, -17.54965782, 84.51310730, -88.96613312, + // -6.75917578 + VLOAD_32(v8, 0xc2b4f48b, 0xc2b3f2af, 0xc208d482, 0xc2bb782b, 0x42a3831b, + 0x42a134b8, 0x4292e873, 0xc2768572, 0xc25d902b, 0x41aff9ef, + 0x41ebfe92, 0xc281218e, 0xc18c65b3, 0x42a906b6, 0xc2b1eea9, + 0xc0d84b2b); + asm volatile("vfwnmsac.vf v8, %[A], v4" ::[A] "f"(dscalar_16)); + // 330.52151489, -1685.56726074, -1142.71582031, 2563.56518555, + // 702.74603271, -757.92852783, 1774.78454590, 1800.03955078, + // -2291.47485352, -551.75787354, -916.93621826, -277.44854736, + // -715.24255371, 1882.04724121, 688.02972412, -1448.07995605 + VCMP_U32(5, v8, 0x43a542c1, 0xc4d2b227, 0xc48ed6e8, 0x4520390b, 0x442fafbf, + 0xc43d7b6d, 0x44ddd91b, 0x44e10144, 0xc50f3799, 0xc409f081, + 0xc4653beb, 0xc38ab96a, 0xc432cf86, 0x44eb4183, 0x442c01e7, + 0xc4b5028f); + + VSET(16, e32, m4); + double dscalar_32; + // 467373.87500000, -160965.29687500, 883060.25000000, + // -737665.37500000, -482502.81250000, -983579.31250000, + // -407525.09375000, 564889.31250000, -121145.03125000, + // 744798.75000000, 160985.04687500, -9122.68847656, + // -708214.37500000, 763142.93750000, -340832.59375000, + // -663023.75000000 + VLOAD_32(v8, 0x48e435bc, 0xc81d3153, 0x49579744, 0xc9341816, 0xc8eb98da, + 0xc97021b5, 0xc8c6fca3, 0x4909e995, 0xc7ec9c84, 0x4935d5ec, + 0x481d3643, 0xc60e8ac1, 0xc92ce766, 0x493a506f, 0xc8a66c13, + 0xc921defc); + // 235169.78125000 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x4865a872); + // -460724.6105727200629190, -944938.6498861069558188, + // -303510.4811713555827737, -748025.6652074699522927, + // 387702.0000469267833978, -894167.6638924945145845, + // 98379.0701996718998998, -950753.1128427713410929, + // -749333.7338243273552507, 898522.0366696736309677, + // -388606.5700500296661630, 47697.1169114386430010, + // -665347.3327810273040086, 976438.6193965608254075, + // -498588.0437998892739415, 793291.0511387982405722 + VLOAD_64(v16, 0xc11c1ed27139f9a2, 0xc12cd6554cbddf2f, 0xc1128659ecb82f10, + 0xc126d3f3549612d1, 0x4117a9d8000c4d34, 0xc12b49af53e9b790, + 0x40f804b11f89b0f0, 0xc12d03c239c68719, 0xc126de2b77b7d27e, + 0x412b6bb412c65e12, 0xc117b7fa47bb31ea, 0x40e74a23bdbd0eb0, + 0xc1244e06aa62465a, 0x412dcc6d3d218bc8, 0xc11e6e702cd9e0d0, + 0x412835961a2edd54); + asm volatile("vfwnmsac.vf v16, %[A], v8" ::[A] "f"(dscalar_32)); + // -109912672670.3254089355468750, 37853228716.2851715087890625, + // -207669389333.5514831542968750, 173475856848.7839965820312500, + // 113470468570.1348114013671875, 231307237594.9865112304687500, + // 95837685530.1434478759765625, -132845846804.2007293701171875, + // 28488901164.8530883789062500, -175153260590.7367553710937500, + // -37859206864.6847991943359375, 2145428350.5620102882385254, + // 166549954299.5226745605468750, -179467181235.7380371093750000, + // 80153027927.0138244628906250, 155923943542.1058349609375000 + VCMP_U64(6, v16, 0xc239974e499e534e, 0x4221a074dd589202, 0xc2482d07b40ac697, + 0x424431fbc0e8645a, 0x423a6b5df1da2283, 0x424aed7e2c6d7e46, + 0x4236505f071a24b9, 0xc23eee3ac1143363, 0x421a884888b36990, + 0xc24463f954175e4e, 0xc221a12b4da15e9e, 0x41dff828dfa3f7fa, + 0x424363934f7dc2e7, 0xc244e489ee59de78, 0x4232a97e2557038a, + 0x424226e5483b0d8c); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // 15.1797, -57.5312, -39.9688, 95.8125, 22.3906, + // -30.2344, 61.3438, 67.1250, -80.6250, -20.6875, -34.1250, + // -7.6758, -25.1562, 64.8125, 28.0156, -51.9688 + VLOAD_16(v4, 0x4b97, 0xd331, 0xd0ff, 0x55fd, 0x4d99, 0xcf8f, 0x53ab, 0x5432, + 0xd50a, 0xcd2c, 0xd044, 0xc7ad, 0xce4a, 0x540d, 0x4f01, 0xd27f); + // -27.7344 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xceef); + VLOAD_8(v0, 0xAA, 0xAA); + // -90.47762299, -89.97399139, -34.20752716, + // -93.73470306, 81.75606537, 80.60296631, 73.45400238, + // -61.63031769, -55.39078903, 21.99703789, 29.49930191, + // -64.56553650, -17.54965782, 84.51310730, -88.96613312, + // -6.75917578 + VLOAD_32(v8, 0xc2b4f48b, 0xc2b3f2af, 0xc208d482, 0xc2bb782b, 0x42a3831b, + 0x42a134b8, 0x4292e873, 0xc2768572, 0xc25d902b, 0x41aff9ef, + 0x41ebfe92, 0xc281218e, 0xc18c65b3, 0x42a906b6, 0xc2b1eea9, + 0xc0d84b2b); + asm volatile("vfwnmsac.vf v8, %[A], v4, v0.t" ::[A] "f"(dscalar_16)); + // -90.47762299, -1685.56726074, -34.20752716, + // 2563.56518555, 81.75606537, -757.92852783, 73.45400238, + // 1800.03955078, -55.39078903, -551.75787354, 29.49930191, + // -277.44854736, -17.54965782, 1882.04724121, -88.96613312, + // -1448.07995605 + VCMP_U32(7, v8, 0xc2b4f48b, 0xc4d2b227, 0xc208d482, 0x4520390b, 0x42a3831b, + 0xc43d7b6d, 0x4292e873, 0x44e10144, 0xc25d902b, 0xc409f081, + 0x41ebfe92, 0xc38ab96a, 0xc18c65b3, 0x44eb4183, 0xc2b1eea9, + 0xc4b5028f); + + VSET(16, e32, m4); + double dscalar_32; + // 467373.87500000, -160965.29687500, 883060.25000000, + // -737665.37500000, -482502.81250000, -983579.31250000, + // -407525.09375000, 564889.31250000, -121145.03125000, + // 744798.75000000, 160985.04687500, -9122.68847656, + // -708214.37500000, 763142.93750000, -340832.59375000, + // -663023.75000000 + VLOAD_32(v8, 0x48e435bc, 0xc81d3153, 0x49579744, 0xc9341816, 0xc8eb98da, + 0xc97021b5, 0xc8c6fca3, 0x4909e995, 0xc7ec9c84, 0x4935d5ec, + 0x481d3643, 0xc60e8ac1, 0xc92ce766, 0x493a506f, 0xc8a66c13, + 0xc921defc); + // 235169.78125000 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x4865a872); + VLOAD_8(v0, 0xAA, 0xAA); + // -460724.6105727200629190, -944938.6498861069558188, + // -303510.4811713555827737, -748025.6652074699522927, + // 387702.0000469267833978, -894167.6638924945145845, + // 98379.0701996718998998, -950753.1128427713410929, + // -749333.7338243273552507, 898522.0366696736309677, + // -388606.5700500296661630, 47697.1169114386430010, + // -665347.3327810273040086, 976438.6193965608254075, + // -498588.0437998892739415, 793291.0511387982405722 + VLOAD_64(v16, 0xc11c1ed27139f9a2, 0xc12cd6554cbddf2f, 0xc1128659ecb82f10, + 0xc126d3f3549612d1, 0x4117a9d8000c4d34, 0xc12b49af53e9b790, + 0x40f804b11f89b0f0, 0xc12d03c239c68719, 0xc126de2b77b7d27e, + 0x412b6bb412c65e12, 0xc117b7fa47bb31ea, 0x40e74a23bdbd0eb0, + 0xc1244e06aa62465a, 0x412dcc6d3d218bc8, 0xc11e6e702cd9e0d0, + 0x412835961a2edd54); + asm volatile("vfwnmsac.vf v16, %[A], v8, v0.t" ::[A] "f"(dscalar_32)); + // -460724.6105727200629190, 37853228716.2851715087890625, + // -303510.4811713555827737, 173475856848.7839965820312500, + // 387702.0000469267833978, 231307237594.9865112304687500, + // 98379.0701996718998998, -132845846804.2007293701171875, + // -749333.7338243273552507, -175153260590.7367553710937500, + // -388606.5700500296661630, 2145428350.5620102882385254, + // -665347.3327810273040086, -179467181235.7380371093750000, + // -498588.0437998892739415, 155923943542.1058349609375000 + VCMP_U64(8, v16, 0xc11c1ed27139f9a2, 0x4221a074dd589202, 0xc1128659ecb82f10, + 0x424431fbc0e8645a, 0x4117a9d8000c4d34, 0x424aed7e2c6d7e46, + 0x40f804b11f89b0f0, 0xc23eee3ac1143363, 0xc126de2b77b7d27e, + 0xc24463f954175e4e, 0xc117b7fa47bb31ea, 0x41dff828dfa3f7fa, + 0xc1244e06aa62465a, 0xc244e489ee59de78, 0xc11e6e702cd9e0d0, + 0x424226e5483b0d8c); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwredosum.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwredosum.c new file mode 100644 index 000000000..6e337252c --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwredosum.c @@ -0,0 +1,268 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Xiaorui Yin +// Date: 2022/05/03 + +#include "float_macros.h" +#include "vector_macros.h" + +// Naive test +void TEST_CASE1(void) { + VSET(1, e32, m4); + VLOAD_32(v4, 0x3F800000); + VSET(16, e16, m2); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v12, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + asm volatile("vfwredosum.vs v8, v12, v4"); + VCMP_U32(1, v8, 0x42920000); +} + +// Masked naive test +void TEST_CASE2(void) { + VSET(1, e64, m8); + VLOAD_64(v8, 0x3FF0000000000000); + VSET(16, e32, m4); + VLOAD_32(v24, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + asm volatile("vfwredosum.vs v16, v24, v8"); + VCMP_U64(2, v16, 0x4052400000000000); + + VSET(1, e32, m4); + VLOAD_32(v4, 0x3F800000); + VSET(2, e8, m1); + VLOAD_8(v0, 0xaa, 0x55); + VSET(16, e16, m2); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v12, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + asm volatile("vfwredosum.vs v8, v12, v4, v0.t"); + VCMP_U32(3, v8, 0x42140000); + + VSET(1, e64, m8); + VLOAD_64(v8, 0x3FF0000000000000); + VSET(2, e8, m1); + VLOAD_8(v0, 0xaa, 0x55); + VSET(16, e32, m4); + VLOAD_32(v24, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + asm volatile("vfwredosum.vs v16, v24, v8, v0.t"); + VCMP_U64(4, v16, 0x4042800000000000); +} + +// Are we respecting the undisturbed tail policy? +void TEST_CASE3(void) { + VSET(16, e16, m2); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v12, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_32(v4, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + asm volatile("vfwredosum.vs v8, v12, v4"); + VCMP_U32(5, v8, 0x42920000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + + VSET(16, e32, m4); + VLOAD_32(v24, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfwredosum.vs v16, v24, v8"); + VCMP_U64(6, v16, 0x4052400000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); +} + +// Odd number of elements, undisturbed policy +void TEST_CASE4(void) { + VSET(1, e32, m4); + VLOAD_32(v24, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfwredosum.vs v16, v24, v8"); + VCMP_U64(7, v16, 0x4000000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + + VSET(3, e32, m4); + VLOAD_32(v24, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfwredosum.vs v16, v24, v8"); + VCMP_U64(8, v16, 0x401C000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + + VSET(7, e32, m4); + VLOAD_32(v24, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfwredosum.vs v16, v24, v8"); + VCMP_U64(9, v16, 0x403d000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + + VSET(15, e32, m4); + VLOAD_32(v24, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfwredosum.vs v16, v24, v8"); + VCMP_U64(10, v16, 0x4050400000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); +} + +// Odd number of elements, undisturbed policy, and mask +void TEST_CASE5(void) { + VSET(7, e16, m2); + VLOAD_8(v0, 0x00, 0xff); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v12, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_32(v4, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + asm volatile("vfwredosum.vs v8, v12, v4, v0.t"); + VCMP_U32(11, v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + + VSET(1, e32, m4); + VLOAD_8(v0, 0xff, 0x00); + VLOAD_32(v24, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfwredosum.vs v16, v24, v8, v0.t"); + VCMP_U64(12, v16, 0x4000000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwredusum.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwredusum.c new file mode 100644 index 000000000..e6eeb6405 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwredusum.c @@ -0,0 +1,272 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Xiaorui Yin +// Date: 2022/05/03 + +#include "float_macros.h" +#include "vector_macros.h" + +// Naive test +void TEST_CASE1(void) { + // WARNING: setting vl == 1 is mandatory here since + // these variables are initialized on the stack, which is + // immediately before the UART. + // Loading more values would load from the UART + // addr space!!!! + VSET(1, e32, m4); + VLOAD_32(v4, 0x3F800000); + VSET(16, e16, m2); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v12, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + asm volatile("vfwredsum.vs v8, v12, v4"); + VCMP_U32(1, v8, 0x42920000); +} + +// Masked naive test +void TEST_CASE2(void) { + VSET(1, e64, m8); + VLOAD_64(v8, 0x3FF0000000000000); + VSET(16, e32, m4); + VLOAD_32(v24, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + asm volatile("vfwredsum.vs v16, v24, v8"); + VCMP_U64(2, v16, 0x4052400000000000); + + VSET(1, e32, m4); + VLOAD_32(v4, 0x3F800000); + VSET(16, e16, m2); + VLOAD_8(v0, 0xaa, 0x55); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v12, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + asm volatile("vfwredsum.vs v8, v12, v4, v0.t"); + VCMP_U32(3, v8, 0x42140000); + + VSET(1, e64, m8); + VLOAD_64(v8, 0x3FF0000000000000); + VSET(16, e32, m4); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_32(v24, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_64(v16, 0x3FF0000000000000); + asm volatile("vfwredsum.vs v8, v24, v16, v0.t"); + VCMP_U64(4, v8, 0x4042800000000000); +} + +// Are we respecting the undisturbed tail policy? +void TEST_CASE3(void) { + VSET(16, e16, m2); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v12, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_32(v4, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + asm volatile("vfwredsum.vs v8, v12, v4"); + VCMP_U32(5, v8, 0x42920000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + + VSET(16, e32, m4); + VLOAD_32(v24, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfwredsum.vs v16, v24, v8"); + VCMP_U64(6, v16, 0x4052400000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); +} + +// Odd number of elements, undisturbed policy +void TEST_CASE4(void) { + VSET(1, e32, m4); + VLOAD_32(v24, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfwredsum.vs v16, v24, v8"); + VCMP_U64(7, v16, 0x4000000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + + VSET(3, e32, m4); + VLOAD_32(v24, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfwredsum.vs v16, v24, v8"); + VCMP_U64(8, v16, 0x401C000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + + VSET(7, e32, m4); + VLOAD_32(v24, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfwredsum.vs v16, v24, v8"); + VCMP_U64(9, v16, 0x403d000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + + VSET(15, e32, m4); + VLOAD_32(v24, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfwredsum.vs v16, v24, v8"); + VCMP_U64(10, v16, 0x4050400000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); +} + +// Odd number of elements, undisturbed policy, and mask +void TEST_CASE5(void) { + VSET(7, e16, m2); + VLOAD_8(v0, 0x00, 0xff); + // 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 + VLOAD_16(v12, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800, + 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700, 0x4800); + VLOAD_32(v4, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_32(v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + asm volatile("vfwredsum.vs v8, v12, v4, v0.t"); + VCMP_U32(11, v8, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + + VSET(1, e32, m4); + VLOAD_8(v0, 0xff, 0x00); + VLOAD_32(v24, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000, + 0x40C00000, 0x40E00000, 0x41000000, 0x3F800000, 0x40000000, + 0x40400000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, + 0x41000000); + VLOAD_64(v8, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + VLOAD_64(v16, 0x3FF0000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); + asm volatile("vfwredsum.vs v16, v24, v8, v0.t"); + VCMP_U64(12, v16, 0x4000000000000000, 0x4000000000000000, 0x4008000000000000, + 0x4010000000000000, 0x4014000000000000, 0x4018000000000000, + 0x401C000000000000, 0x4020000000000000, 0x3FF0000000000000, + 0x4000000000000000, 0x4008000000000000, 0x4010000000000000, + 0x4014000000000000, 0x4018000000000000, 0x401C000000000000, + 0x4020000000000000); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwsub.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwsub.c new file mode 100644 index 000000000..e744d9ced --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfwsub.c @@ -0,0 +1,527 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot +// Matteo Perotti + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values +void TEST_CASE1(void) { + VSET(16, e16, m2); + // -15.5625, 95.7500, -42.4375, 30.7188, -50.7500, -90.2500, + // -95.5000, 29.5938, -41.4062, -94.0000, 34.3438, + // -69.5625, 31.5625, -75.0625, 46.2500, -63.6875 + VLOAD_16(v2, 0xcbc8, 0x55fc, 0xd14e, 0x4fae, 0xd258, 0xd5a4, 0xd5f8, 0x4f66, + 0xd12d, 0xd5e0, 0x504b, 0xd459, 0x4fe4, 0xd4b1, 0x51c8, 0xd3f6); + // 57.2500, 43.2812, -49.4062, -53.5625, -54.7812, + // -12.1406, 92.1875, 67.1875, -19.7656, -41.2812, 98.0625, + // -41.9062, 10.1719, -84.6250, -7.1016, 62.8750 + VLOAD_16(v4, 0x5328, 0x5169, 0xd22d, 0xd2b2, 0xd2d9, 0xca12, 0x55c3, 0x5433, + 0xccf1, 0xd129, 0x5621, 0xd13d, 0x4916, 0xd54a, 0xc71a, 0x53dc); + asm volatile("vfwsub.vv v8, v2, v4"); + // -72.81250000, 52.46875000, 6.96875000, 84.28125000, 4.03125000, + // -78.10937500, -187.68750000, -37.59375000, -21.64062500, + // -52.71875000, -63.71875000, + // -27.65625000, 21.39062500, 9.56250000, 53.35156250, + // -126.56250000 + VCMP_U32(1, v8, 0xc291a000, 0x4251e000, 0x40df0000, 0x42a89000, 0x40810000, + 0xc29c3800, 0xc33bb000, 0xc2166000, 0xc1ad2000, 0xc252e000, + 0xc27ee000, 0xc1dd4000, 0x41ab2000, 0x41190000, 0x42556800, + 0xc2fd2000); + + VSET(16, e32, m4); + // 74632.77343750, -65636.60937500, 16165.84765625, + // -17815.85937500, -85604.03125000, -76754.03125000, + // 21778.01171875, -70512.52343750, 85301.90625000, + // -8385.11035156, 98258.05468750, -50421.53125000, + // 69842.53906250, -65219.96093750, -65266.08984375, + // -90740.60156250 + VLOAD_32(v4, 0x4791c463, 0xc780324e, 0x467c9764, 0xc68b2fb8, 0xc7a73204, + 0xc795e904, 0x46aa2406, 0xc789b843, 0x47a69af4, 0xc6030471, + 0x47bfe907, 0xc744f588, 0x47886945, 0xc77ec3f6, 0xc77ef217, + 0xc7b13a4d); + // 5391.72216797, -90760.36718750, -22961.19531250, + // 12708.62500000, 87107.59375000, 54867.48437500, + // 55424.39453125, -71436.00781250, -61505.46484375, + // 57701.78906250, -81581.38281250, 53319.19531250, + // -86229.57031250, 44376.69531250, 46809.38671875, + // -92887.27343750 + VLOAD_32(v8, 0x45a87dc7, 0xc7b1442f, 0xc6b36264, 0x46469280, 0x47aa21cc, + 0x4756537c, 0x47588065, 0xc78b8601, 0xc7704177, 0x476165ca, + 0xc79f56b1, 0x47504732, 0xc7a86ac9, 0x472d58b2, 0x4736d963, + 0xc7b56ba3); + asm volatile("vfwsub.vv v16, v4, v8"); + // 69241.0512695312500000, 25123.7578125000000000, + // 39127.0429687500000000, -30524.4843750000000000, + // -172711.6250000000000000, -131621.5156250000000000, + // -33646.3828125000000000, 923.4843750000000000, + // 146807.3710937500000000, -66086.8994140625000000, + // 179839.4375000000000000, -103740.7265625000000000, + // 156072.1093750000000000, -109596.6562500000000000, + // -112075.4765625000000000, 2146.6718750000000000 + VCMP_U64(2, v16, 0x40f0e790d2000000, 0x40d888f080000000, 0x40e31ae160000000, + 0xc0ddcf1f00000000, 0xc105153d00000000, 0xc100112c20000000, + 0xc0e06dcc40000000, 0x408cdbe000000000, 0x4101ebbaf8000000, + 0xc0f0226e64000000, 0x4105f3fb80000000, 0xc0f953cba0000000, + 0x41030d40e0000000, 0xc0fac1ca80000000, 0xc0fb5cb7a0000000, + 0x40a0c55800000000); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE2(void) { + VSET(16, e16, m2); + // -15.5625, 95.7500, -42.4375, 30.7188, -50.7500, -90.2500, + // -95.5000, 29.5938, -41.4062, -94.0000, 34.3438, + // -69.5625, 31.5625, -75.0625, 46.2500, -63.6875 + VLOAD_16(v4, 0xcbc8, 0x55fc, 0xd14e, 0x4fae, 0xd258, 0xd5a4, 0xd5f8, 0x4f66, + 0xd12d, 0xd5e0, 0x504b, 0xd459, 0x4fe4, 0xd4b1, 0x51c8, 0xd3f6); + // 57.2500, 43.2812, -49.4062, -53.5625, -54.7812, + // -12.1406, 92.1875, 67.1875, -19.7656, -41.2812, 98.0625, + // -41.9062, 10.1719, -84.6250, -7.1016, 62.8750 + VLOAD_16(v8, 0x5328, 0x5169, 0xd22d, 0xd2b2, 0xd2d9, 0xca12, 0x55c3, 0x5433, + 0xccf1, 0xd129, 0x5621, 0xd13d, 0x4916, 0xd54a, 0xc71a, 0x53dc); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + VCLEAR(v14); + asm volatile("vfwsub.vv v12, v4, v8, v0.t"); + // 0.00000000, 52.46875000, 0.00000000, 84.28125000, + // 0.00000000, -78.10937500, 0.00000000, -37.59375000, + // 0.00000000, -52.71875000, 0.00000000, -27.65625000, + // 0.00000000, 9.56250000, 0.00000000, -126.56250000 + VCMP_U32(3, v12, 0x0, 0x4251e000, 0x0, 0x42a89000, 0x0, 0xc29c3800, 0x0, + 0xc2166000, 0x0, 0xc252e000, 0x0, 0xc1dd4000, 0x0, 0x41190000, 0x0, + 0xc2fd2000); + + VSET(16, e32, m4); + // 74632.77343750, -65636.60937500, 16165.84765625, + // -17815.85937500, -85604.03125000, -76754.03125000, + // 21778.01171875, -70512.52343750, 85301.90625000, + // -8385.11035156, 98258.05468750, -50421.53125000, + // 69842.53906250, -65219.96093750, -65266.08984375, + // -90740.60156250 + VLOAD_32(v8, 0x4791c463, 0xc780324e, 0x467c9764, 0xc68b2fb8, 0xc7a73204, + 0xc795e904, 0x46aa2406, 0xc789b843, 0x47a69af4, 0xc6030471, + 0x47bfe907, 0xc744f588, 0x47886945, 0xc77ec3f6, 0xc77ef217, + 0xc7b13a4d); + // 5391.72216797, -90760.36718750, -22961.19531250, + // 12708.62500000, 87107.59375000, 54867.48437500, + // 55424.39453125, -71436.00781250, -61505.46484375, + // 57701.78906250, -81581.38281250, 53319.19531250, + // -86229.57031250, 44376.69531250, 46809.38671875, + // -92887.27343750 + VLOAD_32(v16, 0x45a87dc7, 0xc7b1442f, 0xc6b36264, 0x46469280, 0x47aa21cc, + 0x4756537c, 0x47588065, 0xc78b8601, 0xc7704177, 0x476165ca, + 0xc79f56b1, 0x47504732, 0xc7a86ac9, 0x472d58b2, 0x4736d963, + 0xc7b56ba3); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + VCLEAR(v28); + asm volatile("vfwsub.vv v24, v8, v16, v0.t"); + // 0.0000000000000000, 25123.7578125000000000, + // 0.0000000000000000, -30524.4843750000000000, + // 0.0000000000000000, -131621.5156250000000000, + // 0.0000000000000000, 923.4843750000000000, + // 0.0000000000000000, -66086.8994140625000000, + // 0.0000000000000000, -103740.7265625000000000, + // 0.0000000000000000, -109596.6562500000000000, + // 0.0000000000000000, 2146.6718750000000000 + VCMP_U64(4, v24, 0x0, 0x40d888f080000000, 0x0, 0xc0ddcf1f00000000, 0x0, + 0xc100112c20000000, 0x0, 0x408cdbe000000000, 0x0, 0xc0f0226e64000000, + 0x0, 0xc0f953cba0000000, 0x0, 0xc0fac1ca80000000, 0x0, + 0x40a0c55800000000); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // 36.4375 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x508e); + // 69.8125, -37.3125, -77.2500, 32.7188, + // -83.0000, 76.3125, 14.9375, 72.5000, 39.6250, + // -61.2188, 36.3438, 93.5000, -87.1875, -6.9258, 25.1094, + // -96.8750 + VLOAD_16(v4, 0x545d, 0xd0aa, 0xd4d4, 0x5017, 0xd530, 0x54c5, 0x4b78, 0x5488, + 0x50f4, 0xd3a7, 0x508b, 0x55d8, 0xd573, 0xc6ed, 0x4e47, 0xd60e); + asm volatile("vfwsub.vf v8, v4, %[A]" ::[A] "f"(dscalar_16)); + // 33.37500000, -73.75000000, -113.68750000, -3.71875000, + // -119.43750000, 39.87500000, + // -21.50000000, 36.06250000, 3.18750000, -97.65625000, + // -0.09375000, 57.06250000, -123.62500000, -43.36328125, + // -11.32812500, -133.31250000 + VCMP_U32(5, v8, 0x42058000, 0xc2938000, 0xc2e36000, 0xc06e0000, 0xc2eee000, + 0x421f8000, 0xc1ac0000, 0x42104000, 0x404c0000, 0xc2c35000, + 0xbdc00000, 0x42644000, 0xc2f74000, 0xc22d7400, 0xc1354000, + 0xc3055000); + + VSET(16, e32, m4); + double dscalar_32; + // -138614.20312500 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xc8075d8d); + // 473107.93750000, 161975.07812500, -173044.89062500, + // -322046.09375000, -485607.56250000, -613808.37500000, + // -182790.53125000, 121114.44531250, -958537.81250000, + // 295217.40625000, 281159.84375000, -735195.87500000, + // -783982.56250000, 420983.65625000, 954426.12500000, + // -297052.53125000 + VLOAD_32(v8, 0x48e7027e, 0x481e2dc5, 0xc828fd39, 0xc89d3fc3, 0xc8ed1cf2, + 0xc915db06, 0xc83281a2, 0x47ec8d39, 0xc96a049d, 0x4890262d, + 0x488948fb, 0xc9337dbe, 0xc93f66e9, 0x48cd8ef5, 0x496903a2, + 0xc8910b91); + asm volatile("vfwsub.vf v16, v8, %[A]" ::[A] "f"(dscalar_32)); + // 611722.1406250000000000, 300589.2812500000000000, + // -34430.6875000000000000, -183431.8906250000000000, + // -346993.3593750000000000, -475194.1718750000000000, + // -44176.3281250000000000, 259728.6484375000000000, + // -819923.6093750000000000, 433831.6093750000000000, + // 419774.0468750000000000, -596581.6718750000000000, + // -645368.3593750000000000, 559597.8593750000000000, + // 1093040.3281250000000000, -158438.3281250000000000 + VCMP_U64(6, v16, 0x4122ab1448000000, 0x411258b520000000, 0xc0e0cfd600000000, + 0xc106643f20000000, 0xc1152dc570000000, 0xc11d00e8b0000000, + 0xc0e5920a80000000, 0x410fb48530000000, 0xc12905a738000000, + 0x411a7a9e70000000, 0x41199ef830000000, 0xc12234cb58000000, + 0xc123b1f0b8000000, 0x412113dbb8000000, 0x4130adb054000000, + 0xc1035732a0000000); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // 36.4375 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x508e); + // 69.8125, -37.3125, -77.2500, 32.7188, + // -83.0000, 76.3125, 14.9375, 72.5000, 39.6250, + // -61.2188, 36.3438, 93.5000, -87.1875, -6.9258, 25.1094, + // -96.8750 + VLOAD_16(v4, 0x545d, 0xd0aa, 0xd4d4, 0x5017, 0xd530, 0x54c5, 0x4b78, 0x5488, + 0x50f4, 0xd3a7, 0x508b, 0x55d8, 0xd573, 0xc6ed, 0x4e47, 0xd60e); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfwsub.vf v8, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + // 0.00000000, -73.75000000, 0.00000000, -3.71875000, + // 0.00000000, 39.87500000, 0.00000000, 36.06250000, + // 0.00000000, -97.65625000, 0.00000000, 57.06250000, + // 0.00000000, -43.36328125, 0.00000000, -133.31250000 + VCMP_U32(7, v8, 0x0, 0xc2938000, 0x0, 0xc06e0000, 0x0, 0x421f8000, 0x0, + 0x42104000, 0x0, 0xc2c35000, 0x0, 0x42644000, 0x0, 0xc22d7400, 0x0, + 0xc3055000); + + VSET(16, e32, m4); + double dscalar_32; + // -138614.20312500 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xc8075d8d); + // 473107.93750000, 161975.07812500, -173044.89062500, + // -322046.09375000, -485607.56250000, -613808.37500000, + // -182790.53125000, 121114.44531250, -958537.81250000, + // 295217.40625000, 281159.84375000, -735195.87500000, + // -783982.56250000, 420983.65625000, 954426.12500000, + // -297052.53125000 + VLOAD_32(v8, 0x48e7027e, 0x481e2dc5, 0xc828fd39, 0xc89d3fc3, 0xc8ed1cf2, + 0xc915db06, 0xc83281a2, 0x47ec8d39, 0xc96a049d, 0x4890262d, + 0x488948fb, 0xc9337dbe, 0xc93f66e9, 0x48cd8ef5, 0x496903a2, + 0xc8910b91); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfwsub.vf v16, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + // 0.0000000000000000, 300589.2812500000000000, + // 0.0000000000000000, -183431.8906250000000000, + // 0.0000000000000000, -475194.1718750000000000, + // 0.0000000000000000, 259728.6484375000000000, + // 0.0000000000000000, 433831.6093750000000000, + // 0.0000000000000000, -596581.6718750000000000, + // 0.0000000000000000, 559597.8593750000000000, + // 0.0000000000000000, -158438.3281250000000000 + VCMP_U64(8, v16, 0x0, 0x411258b520000000, 0x0, 0xc106643f20000000, 0x0, + 0xc11d00e8b0000000, 0x0, 0x410fb48530000000, 0x0, 0x411a7a9e70000000, + 0x0, 0xc12234cb58000000, 0x0, 0x412113dbb8000000, 0x0, + 0xc1035732a0000000); +}; +// Simple random test with similar values +void TEST_CASE5(void) { + VSET(16, e16, m2); + // -92.15529633, 27.66998672, + // -5.68499708, 78.95133209, 57.52299500, 15.45270920, 50.26883316, + // 46.63587189, 71.16806793, -80.68485260, + // -22.34193420, 40.17027283, 93.54611969, 25.86016083, 41.82838821, + // 82.50254822 + VLOAD_32(v4, 0xc2b84f83, 0x41dd5c22, 0xc0b5eb7f, 0x429de715, 0x4266178c, + 0x41773e4c, 0x42491349, 0x423a8b22, 0x428e560d, 0xc2a15ea5, + 0xc1b2bc48, 0x4220ae5c, 0x42bb179d, 0x41cee19c, 0x42275045, + 0x42a5014e); + // -72.5625, -83.4375, 28.8281, 33.5938, + // -85.7500, 67.5000, 91.0625, -91.8750, -9.2578, -64.2500, + // -58.6250, 50.3438, -70.5000, 36.6250, 5.7930, 86.6875 + VLOAD_16(v8, 0xd489, 0xd537, 0x4f35, 0x5033, 0xd55c, 0x5438, 0x55b1, 0xd5be, + 0xc8a1, 0xd404, 0xd354, 0x524b, 0xd468, 0x5094, 0x45cb, 0x556b); + asm volatile("vfwsub.wv v12, v4, v8"); + // -19.59279633, 111.10748291, -34.51312256, 45.35758209, + // 143.27299500, -52.04729080, -40.79366684, + // 138.51086426, 80.42588043, -16.43485260, 36.28306580, + // -10.17347717, 164.04611206, -10.76483917, 36.03541946, + // -4.18495178 + VCMP_U32(9, v12, 0xc19cbe0c, 0x42de3708, 0xc20a0d70, 0x42356e2a, 0x430f45e3, + 0xc250306d, 0xc2232cb7, 0x430a82c8, 0x42a0da0d, 0xc1837a94, + 0x421121dc, 0xc122c690, 0x43240bce, 0xc12c3cc8, 0x42102445, + 0xc085eb20); + + VSET(16, e32, m4); + // -79494.9435096215456724, 81629.4152202270051930, + // 60506.1876363231276628, -81020.4028176319407066, + // -6814.2587861350475578, 11974.4045779409498209, + // 97975.7066144426062237, -93357.8779376419261098, + // 95959.4397212496260181, -58528.4286213813902577, + // 28958.3763895476586185, -36387.3665319164574612, + // -90399.7993234442838002, -78772.1006454367889091, + // -62854.6154750282003079, 37858.6386504948022775 + VLOAD_64(v8, 0xc0f3686f189d8b80, 0x40f3edd6a4bdf6fa, 0x40ed8b46011de3ec, + 0xc0f3c7c671f0e6b7, 0xc0ba9e423fcee2b0, 0x40c76333c935c088, + 0x40f7eb7b4e4af21e, 0xc0f6cade0c085740, 0x40f76d7709192628, + 0xc0ec940db7442fee, 0x40dc479816c42f70, 0xc0e1c46bbaa12444, + 0xc0f611fcca076142, 0xc0f33b419c3e63b8, 0xc0eeb0d3b1f8afb4, + 0x40e27c546fd32998); + // 95822.63281250, 21789.49804688, -42409.42968750, + // 60172.89062500, -46359.57812500, -71236.33593750, + // 4124.35888672, -80527.00000000, 27430.70507812, + // 39975.67578125, -71197.53125000, -66640.12500000, + // 47459.75390625, -34899.84375000, -21371.85937500, + // 17582.65820312 + VLOAD_32(v16, 0x47bb2751, 0x46aa3aff, 0xc725a96e, 0x476b0ce4, 0xc7351794, + 0xc78b222b, 0x4580e2df, 0xc79d4780, 0x46d64d69, 0x471c27ad, + 0xc78b0ec4, 0xc7822810, 0x473963c1, 0xc70853d8, 0xc6a6f7b8, + 0x46895d51); + asm volatile("vfwsub.wv v24, v8, v16"); + // -175317.5763221215456724, 59839.9171733520051930, + // 102915.6173238231276628, -141193.2934426319552585, + // 39545.3193388649524422, 83210.7405154409498209, + // 93851.3477277238562237, -12830.8779376419261098, + // 68528.7346431246260181, -98504.1044026313902577, + // 100155.9076395476586185, 30252.7584680835425388, + // -137859.5532296942838002, -43872.2568954367889091, + // -41482.7561000282003079, 20275.9804473698022775 + VCMP_U64(10, v24, 0xc10566ac9c4ec5c0, 0x40ed37fd597bedf4, 0x40f92039e08ef1f6, + 0xc1013c4a58f8735c, 0x40e34f2a380623aa, 0x40f450abd926b811, + 0x40f6e9b5904af21e, 0xc0c90f706042ba00, 0x40f0bb0bc1192628, + 0xc0f80c81aba217f7, 0x40f873be85b10bdc, 0x40dd8b308abdb778, + 0xc100d41c6d03b0a1, 0xc0e56c08387cc770, 0xc0e4415831f8afb4, + 0x40d3ccfebfa65330); +}; + +// Simple random test with similar values (masked) +// The numbers are the same of TEST_CASE1 +void TEST_CASE6(void) { + VSET(16, e16, m2); + // -92.15529633, 27.66998672, + // -5.68499708, 78.95133209, 57.52299500, 15.45270920, 50.26883316, + // 46.63587189, 71.16806793, -80.68485260, + // -22.34193420, 40.17027283, 93.54611969, 25.86016083, 41.82838821, + // 82.50254822 + VLOAD_32(v4, 0xc2b84f83, 0x41dd5c22, 0xc0b5eb7f, 0x429de715, 0x4266178c, + 0x41773e4c, 0x42491349, 0x423a8b22, 0x428e560d, 0xc2a15ea5, + 0xc1b2bc48, 0x4220ae5c, 0x42bb179d, 0x41cee19c, 0x42275045, + 0x42a5014e); + // -72.5625, -83.4375, 28.8281, 33.5938, + // -85.7500, 67.5000, 91.0625, -91.8750, -9.2578, -64.2500, + // -58.6250, 50.3438, -70.5000, 36.6250, 5.7930, 86.6875 + VLOAD_16(v8, 0xd489, 0xd537, 0x4f35, 0x5033, 0xd55c, 0x5438, 0x55b1, 0xd5be, + 0xc8a1, 0xd404, 0xd354, 0x524b, 0xd468, 0x5094, 0x45cb, 0x556b); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + VCLEAR(v14); + asm volatile("vfwsub.wv v12, v4, v8, v0.t"); + // 0.00000000, 111.10748291, 0.00000000, 45.35758209, + // 0.00000000, -52.04729080, 0.00000000, 138.51086426, + // 0.00000000, -16.43485260, 0.00000000, -10.17347717, + // 0.00000000, -10.76483917, 0.00000000, -4.18495178 + VCMP_U32(11, v12, 0x0, 0x42de3708, 0x0, 0x42356e2a, 0x0, 0xc250306d, 0x0, + 0x430a82c8, 0x0, 0xc1837a94, 0x0, 0xc122c690, 0x0, 0xc12c3cc8, 0x0, + 0xc085eb20); + + VSET(16, e32, m4); + // -79494.9435096215456724, 81629.4152202270051930, + // 60506.1876363231276628, -81020.4028176319407066, + // -6814.2587861350475578, 11974.4045779409498209, + // 97975.7066144426062237, -93357.8779376419261098, + // 95959.4397212496260181, -58528.4286213813902577, + // 28958.3763895476586185, -36387.3665319164574612, + // -90399.7993234442838002, -78772.1006454367889091, + // -62854.6154750282003079, 37858.6386504948022775 + VLOAD_64(v8, 0xc0f3686f189d8b80, 0x40f3edd6a4bdf6fa, 0x40ed8b46011de3ec, + 0xc0f3c7c671f0e6b7, 0xc0ba9e423fcee2b0, 0x40c76333c935c088, + 0x40f7eb7b4e4af21e, 0xc0f6cade0c085740, 0x40f76d7709192628, + 0xc0ec940db7442fee, 0x40dc479816c42f70, 0xc0e1c46bbaa12444, + 0xc0f611fcca076142, 0xc0f33b419c3e63b8, 0xc0eeb0d3b1f8afb4, + 0x40e27c546fd32998); + // 95822.63281250, 21789.49804688, -42409.42968750, + // 60172.89062500, -46359.57812500, -71236.33593750, + // 4124.35888672, -80527.00000000, 27430.70507812, + // 39975.67578125, -71197.53125000, -66640.12500000, + // 47459.75390625, -34899.84375000, -21371.85937500, + // 17582.65820312 + VLOAD_32(v16, 0x47bb2751, 0x46aa3aff, 0xc725a96e, 0x476b0ce4, 0xc7351794, + 0xc78b222b, 0x4580e2df, 0xc79d4780, 0x46d64d69, 0x471c27ad, + 0xc78b0ec4, 0xc7822810, 0x473963c1, 0xc70853d8, 0xc6a6f7b8, + 0x46895d51); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + VCLEAR(v28); + asm volatile("vfwsub.wv v24, v8, v16, v0.t"); + // 0.0000000000000000, 59839.9171733520051930, + // 0.0000000000000000, -141193.2934426319552585, + // 0.0000000000000000, 83210.7405154409498209, + // 0.0000000000000000, -12830.8779376419261098, + // 0.0000000000000000, -98504.1044026313902577, + // 0.0000000000000000, 30252.7584680835425388, + // 0.0000000000000000, -43872.2568954367889091, + // 0.0000000000000000, 20275.9804473698022775 + VCMP_U64(12, v24, 0x0, 0x40ed37fd597bedf4, 0x0, 0xc1013c4a58f8735c, 0x0, + 0x40f450abd926b811, 0x0, 0xc0c90f706042ba00, 0x0, 0xc0f80c81aba217f7, + 0x0, 0x40dd8b308abdb778, 0x0, 0xc0e56c08387cc770, 0x0, + 0x40d3ccfebfa65330); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE7(void) { + VSET(16, e16, m2); + double dscalar_16; + // -8.76965809, 55.45920181, 71.29286957, -84.65414429, + // -81.93881226, 75.13192749, -75.44019318, -48.81898499, + // 0.10306206, -25.18898392, 49.68006516, 72.66278076, + // -24.90880966, -32.59431458, 14.58876038, -55.07221603 + VLOAD_32(v4, 0xc10c5085, 0x425dd639, 0x428e95f3, 0xc2a94eec, 0xc2a3e0ac, + 0x4296438c, 0xc296e161, 0xc24346a4, 0x3dd31233, 0xc1c9830a, + 0x4246b863, 0x42915358, 0xc1c7453e, 0xc2026094, 0x41696b90, + 0xc25c49f3); + // 34.7812 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x5059); + asm volatile("vfwsub.wf v8, v4, %[A]" ::[A] "f"(dscalar_16)); + // -43.55090714, 20.67795181, 36.51161957, -119.43539429, + // -116.72006226, 40.35067749, -110.22144318, -83.60023499, + // -34.67818832, -59.97023392, 14.89881516, 37.88153076, + // -59.69005966, -67.37556458, -20.19248962, -89.85346985 + VCMP_U32(13, v8, 0xc22e3421, 0x41a56c72, 0x42120be6, 0xc2eedeec, 0xc2e970ac, + 0x42216718, 0xc2dc7161, 0xc2a73352, 0xc20ab677, 0xc26fe185, + 0x416e618c, 0x421786b0, 0xc26ec29f, 0xc286c04a, 0xc1a18a38, + 0xc2b3b4fa); + + VSET(16, e32, m4); + double dscalar_32; + // 322189.5706008458510041, 914899.9451866354793310, + // -620811.0881863175891340, -456926.2657179111847654, + // -549945.8717311944346875, -386814.9759888321859762, + // 748677.5319772073999047, 821298.7777016961481422, + // 968861.0598710167687386, -343694.5546012039994821, + // -782815.4022130169905722, -561429.7869165195152164, + // 755371.9691831718664616, -954868.1761190977413207, + // -606267.0986005428712815, 818185.4808380266185850 + VLOAD_64(v8, 0x4113aa36484b9690, 0x412beba7e3ef80b0, 0xc122f2162d26c1cc, + 0xc11be37910185b2a, 0xc120c873be538d16, 0xc1179bfbe7699dce, + 0x4126d90b105f5108, 0x412910658e2eeaae, 0x412d913a1ea769f6, + 0xc114fa3a37e960c6, 0xc127e3becdeedd54, 0xc121222b92e6b8d8, + 0x41270d57f038c6d6, 0xc12d23e85a2c484a, 0xc1228076327bc536, + 0x4128f812f63066de); + // -83388.08593750 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xc7a2de0b); + asm volatile("vfwsub.wf v16, v8, %[A]" ::[A] "f"(dscalar_32)); + // 405577.6565383458510041, 998288.0311241354793310, + // -537423.0022488175891340, -373538.1797804111847654, + // -466557.7857936944346875, -303426.8900513321859762, + // 832065.6179147073999047, 904686.8636391961481422, + // 1052249.1458085167687386, -260306.4686637039994821, + // -699427.3162755169905722, -478041.7009790195152164, + // 838760.0551206718664616, -871480.0901815977413207, + // -522879.0126630428712815, 901573.5667755266185850 + VCMP_U64(14, v16, 0x4118c126a04b9690, 0x412e77200fef80b0, 0xc120669e0126c1cc, + 0xc116cc88b8185b2a, 0xc11c79f724a71a2c, 0xc112850b8f699dce, + 0x412964833c5f5108, 0x412b9bddba2eeaae, 0x41300e592553b4fb, + 0xc10fc693bfd2c18c, 0xc1255846a1eedd54, 0xc11d2d66cdcd71b0, + 0x412998d01c38c6d6, 0xc12a98702e2c484a, 0xc11fe9fc0cf78a6c, + 0x412b838b223066de); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE8(void) { + VSET(16, e16, m2); + double dscalar_16; + // -8.76965809, 55.45920181, 71.29286957, -84.65414429, + // -81.93881226, 75.13192749, -75.44019318, -48.81898499, + // 0.10306206, -25.18898392, 49.68006516, 72.66278076, + // -24.90880966, -32.59431458, 14.58876038, -55.07221603 + VLOAD_32(v4, 0xc10c5085, 0x425dd639, 0x428e95f3, 0xc2a94eec, 0xc2a3e0ac, + 0x4296438c, 0xc296e161, 0xc24346a4, 0x3dd31233, 0xc1c9830a, + 0x4246b863, 0x42915358, 0xc1c7453e, 0xc2026094, 0x41696b90, + 0xc25c49f3); + // 34.7812 + BOX_HALF_IN_DOUBLE(dscalar_16, 0x5059); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vfwsub.wf v8, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + // 0.00000000, 20.67795181, 0.00000000, -119.43539429, + // 0.00000000, 40.35067749, 0.00000000, -83.60023499, + // 0.00000000, -59.97023392, 0.00000000, 37.88153076, + // 0.00000000, -67.37556458, 0.00000000, -89.85346985 + VCMP_U32(15, v8, 0x0, 0x41a56c72, 0x0, 0xc2eedeec, 0x0, 0x42216718, 0x0, + 0xc2a73352, 0x0, 0xc26fe185, 0x0, 0x421786b0, 0x0, 0xc286c04a, 0x0, + 0xc2b3b4fa); + + VSET(16, e32, m4); + double dscalar_32; + // 322189.5706008458510041, 914899.9451866354793310, + // -620811.0881863175891340, -456926.2657179111847654, + // -549945.8717311944346875, -386814.9759888321859762, + // 748677.5319772073999047, 821298.7777016961481422, + // 968861.0598710167687386, -343694.5546012039994821, + // -782815.4022130169905722, -561429.7869165195152164, + // 755371.9691831718664616, -954868.1761190977413207, + // -606267.0986005428712815, 818185.4808380266185850 + VLOAD_64(v8, 0x4113aa36484b9690, 0x412beba7e3ef80b0, 0xc122f2162d26c1cc, + 0xc11be37910185b2a, 0xc120c873be538d16, 0xc1179bfbe7699dce, + 0x4126d90b105f5108, 0x412910658e2eeaae, 0x412d913a1ea769f6, + 0xc114fa3a37e960c6, 0xc127e3becdeedd54, 0xc121222b92e6b8d8, + 0x41270d57f038c6d6, 0xc12d23e85a2c484a, 0xc1228076327bc536, + 0x4128f812f63066de); + // -83388.08593750 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0xc7a2de0b); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vfwsub.wf v16, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + // 0.0000000000000000, 998288.0311241354793310, + // 0.0000000000000000, -373538.1797804111847654, + // 0.0000000000000000, -303426.8900513321859762, + // 0.0000000000000000, 904686.8636391961481422, + // 0.0000000000000000, -260306.4686637039994821, + // 0.0000000000000000, -478041.7009790195152164, + // 0.0000000000000000, -871480.0901815977413207, + // 0.0000000000000000, 901573.5667755266185850 + VCMP_U64(16, v16, 0x0, 0x412e77200fef80b0, 0x0, 0xc116cc88b8185b2a, 0x0, + 0xc112850b8f699dce, 0x0, 0x412b9bddba2eeaae, 0x0, 0xc10fc693bfd2c18c, + 0x0, 0xc11d2d66cdcd71b0, 0x0, 0xc12a98702e2c484a, 0x0, + 0x412b838b223066de); +}; + +int main(void) { + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + TEST_CASE8(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vid.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vid.c new file mode 100644 index 000000000..8aae09e69 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vid.c @@ -0,0 +1,31 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(8, e8, m1); + __asm__ volatile("vid.v v1"); + VCMP_U8(1, v1, 0, 1, 2, 3, 4, 5, 6, 7); +} + +void TEST_CASE2() { + VSET(8, e8, m1); + VLOAD_8(v0, 85, 0, 0, 0, 0, 0, 0, 0); + VCLEAR(v1); + __asm__ volatile("vid.v v1, v0.t"); + VCMP_U8(2, v1, 0, 0, 2, 0, 4, 0, 6, 0); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + TEST_CASE1(); + TEST_CASE2(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/viota.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/viota.c new file mode 100644 index 000000000..9a1ab49d6 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/viota.c @@ -0,0 +1,37 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(1, e8, m1); + VLOAD_8(v1, 0b10001001); + VSET(8, e8, m1); + asm volatile("viota.m v2, v1"); + VCMP_U8(1, v2, 0, 1, 1, 1, 2, 2, 2, 2); +} + +void TEST_CASE2() { + VSET(8, e8, m1); + VCLEAR(v2); + VLOAD_8(v2, 0, 1, 2, 3, 4, 5, 6, 7); + VSET(1, e8, m1); + VLOAD_8(v1, 0b10001001); + VLOAD_8(v0, 0b11000111); + VSET(8, e8, m1); + asm volatile("viota.m v2, v1, v0.t"); + VCMP_U8(2, v2, 0, 1, 1, 3, 4, 5, 1, 1); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + TEST_CASE1(); + TEST_CASE2(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vl.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vl.c new file mode 100644 index 000000000..887110b14 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vl.c @@ -0,0 +1,79 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +#define AXI_DWIDTH 128 + +static volatile uint8_t ALIGNED_I8[16] __attribute__((aligned(AXI_DWIDTH))) = { + 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, + 0x88, 0xae, 0x08, 0x91, 0x02, 0x59, 0x11, 0x89}; + +static volatile uint16_t ALIGNED_I16[16] + __attribute__((aligned(AXI_DWIDTH))) = { + 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989}; + +static volatile uint32_t ALIGNED_I32[16] + __attribute__((aligned(AXI_DWIDTH))) = { + 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, 0x38197598, + 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, 0x90139301, 0xab8b9148, + 0x90318509, 0x31897598, 0x83195999, 0x89139848}; + +static volatile uint64_t ALIGNED_I64[16] + __attribute__((aligned(AXI_DWIDTH))) = { + 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989}; + +// Misaligned access wrt 128-bit +void TEST_CASE1(void) { + VSET(15, e8, m1); + asm volatile("vle8.v v1, (%0)" ::"r"(&ALIGNED_I8[1])); + VCMP_U8(1, v1, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, 0x08, + 0x91, 0x02, 0x59, 0x11, 0x89); +} + +void TEST_CASE2(void) { + VSET(15, e16, m2); + asm volatile("vle16.v v2, (%0)" ::"r"(&ALIGNED_I16[1])); + VCMP_U16(2, v2, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989); +} + +void TEST_CASE3(void) { + VSET(15, e32, m4); + asm volatile("vle32.v v4, (%0)" ::"r"(&ALIGNED_I32[1])); + VCMP_U32(3, v4, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, 0x38197598, + 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, 0x90139301, + 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, 0x89139848); +} + +void TEST_CASE4(void) { + VSET(15, e64, m8); + asm volatile("vle64.v v8, (%0)" ::"r"(&ALIGNED_I64[1])); + VCMP_U64(4, v8, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, 0x99991348a9f38cd1, + 0x9fa831c7a11a9384, 0x3819759853987548, 0x1893179501093489, + 0x81937598aa819388, 0x1874754791888188, 0x3eeeeeeee33111ae, + 0x9013930148815808, 0xab8b914891484891, 0x9031850931584902, + 0x3189759837598759, 0x8319599991911111, 0x8913984898951989); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vl1r.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vl1r.c new file mode 100644 index 000000000..52db36c24 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vl1r.c @@ -0,0 +1,439 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "vector_macros.h" + +uint64_t counter; + +// Vectors are statically allocated not to exceed the stack and go in the UART +// address space + +// Maximum size: (VLEN/8 Bytes * (MAX_LMUL == 8)) = VLEN +// Define VLEN before compiling me +// #define VLEN 128 +uint8_t gold_vec_8b[VLEN]; +uint16_t gold_vec_16b[VLEN / 2]; +uint32_t gold_vec_32b[VLEN / 4]; +uint64_t gold_vec_64b[VLEN / 8]; + +uint8_t zero_vec_8b[VLEN]; +uint16_t zero_vec_16b[VLEN / 2]; +uint32_t zero_vec_32b[VLEN / 4]; +uint64_t zero_vec_64b[VLEN / 8]; + +uint8_t buf_vec_8b[VLEN]; +uint16_t buf_vec_16b[VLEN / 2]; +uint32_t buf_vec_32b[VLEN / 4]; +uint64_t buf_vec_64b[VLEN / 8]; + +//////////// +// vl1reX // +//////////// + +// 1 whole register load +void TEST_CASE1(void) { + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_8b, VLEN / 8); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_8b, VLEN / 8); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_8b, VLEN / 8); + // Set vl and vtype to super short values + VSET(1, e64, m2); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl1re8.v v16, (%0)" ::"r"(gold_vec_8b)); + // Change vtype and vl to match the whole register + VSET(VLEN / 8, e8, m1); + // Check that the whole register was loaded + VSTORE(uint8_t, e8, v16, buf_vec_8b); + VMCMP(uint8_t, % hhu, 0, buf_vec_8b, gold_vec_8b, VLEN / 8); + // Check that the neighbour registers are okay + VSTORE(uint8_t, e8, v17, buf_vec_8b); + VMCMP(uint8_t, % hhu, 0, buf_vec_8b, zero_vec_8b, VLEN / 8); + + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_16b, VLEN / 16); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_16b, VLEN / 16); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_16b, VLEN / 16); + // Set vl and vtype to super short values + VSET(1, e64, m2); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl1re16.v v16, (%0)" ::"r"(gold_vec_16b)); + // Change vtype and vl to match the whole register + VSET(VLEN / 16, e16, m1); + // Check that the whole register was loaded + VSTORE(uint16_t, e16, v16, buf_vec_16b); + VMCMP(uint16_t, % hu, 1, buf_vec_16b, gold_vec_16b, VLEN / 16); + // Check that the neighbour registers are okay + VSTORE(uint16_t, e16, v17, buf_vec_16b); + VMCMP(uint16_t, % hu, 1, buf_vec_16b, zero_vec_16b, VLEN / 16); + + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_32b, VLEN / 32); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_32b, VLEN / 32); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_32b, VLEN / 32); + // Set vl and vtype to super short values + VSET(1, e64, m2); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl1re32.v v16, (%0)" ::"r"(gold_vec_32b)); + // Change vtype and vl to match the whole register + VSET(VLEN / 32, e32, m1); + // Check that the whole register was loaded + VSTORE(uint32_t, e32, v16, buf_vec_32b); + VMCMP(uint32_t, % u, 2, buf_vec_32b, gold_vec_32b, VLEN / 32); + // Check that the neighbour registers are okay + VSTORE(uint32_t, e32, v17, buf_vec_32b); + VMCMP(uint32_t, % u, 2, buf_vec_32b, zero_vec_32b, VLEN / 32); + + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_64b, VLEN / 64); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_64b, VLEN / 64); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_64b, VLEN / 64); + // Set vl and vtype to super short values + VSET(1, e64, m2); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl1re64.v v16, (%0)" ::"r"(gold_vec_64b)); + // Change vtype and vl to match the whole register + VSET(VLEN / 64, e64, m1); + // Check that the whole register was loaded3 + VSTORE(uint64_t, e64, v16, buf_vec_64b); + VMCMP(uint64_t, % lu, 3, buf_vec_64b, gold_vec_64b, VLEN / 64); + // Check that the neighbour registers are okay + VSTORE(uint64_t, e64, v17, buf_vec_64b); + VMCMP(uint64_t, % lu, 3, buf_vec_64b, zero_vec_64b, VLEN / 64); +} + +//////////// +// vl2reX // +//////////// + +// 2 whole registers load +void TEST_CASE2(void) { + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_8b, VLEN / 4); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_8b, VLEN / 4); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_8b, VLEN / 4); + // Set vl and vtype to super short values + VSET(1, e64, m4); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl2re8.v v16, (%0)" ::"r"(gold_vec_8b)); + // Change vtype and vl to match the whole register + VSET(VLEN / 4, e8, m2); + // Check that the whole register was loaded + VSTORE(uint8_t, e8, v16, buf_vec_8b); + VMCMP(uint8_t, % hhu, 4, buf_vec_8b, gold_vec_8b, VLEN / 4); + // Check that the neighbour registers are okay + VSTORE(uint8_t, e8, v18, buf_vec_8b); + VMCMP(uint8_t, % hhu, 4, buf_vec_8b, zero_vec_8b, VLEN / 4); + + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_16b, VLEN / 8); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_16b, VLEN / 8); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_16b, VLEN / 8); + // Set vl and vtype to super short values + VSET(1, e64, m4); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl2re16.v v16, (%0)" ::"r"(gold_vec_16b)); + // Change vtype and vl to match the whole register + VSET(VLEN / 8, e16, m2); + // Check that the whole register was loaded + VSTORE(uint16_t, e16, v16, buf_vec_16b); + VMCMP(uint16_t, % hu, 5, buf_vec_16b, gold_vec_16b, VLEN / 8); + // Check that the neighbour registers are okay + VSTORE(uint16_t, e16, v18, buf_vec_16b); + VMCMP(uint16_t, % hu, 5, buf_vec_16b, zero_vec_16b, VLEN / 8); + + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_32b, VLEN / 16); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_32b, VLEN / 16); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_32b, VLEN / 16); + // Set vl and vtype to super short values + VSET(1, e64, m4); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl2re32.v v16, (%0)" ::"r"(gold_vec_32b)); + // Change vtype and vl to match the whole register + VSET(VLEN / 16, e32, m2); + // Check that the whole register was loaded + VSTORE(uint32_t, e32, v16, buf_vec_32b); + VMCMP(uint32_t, % u, 6, buf_vec_32b, gold_vec_32b, VLEN / 16); + // Check that the neighbour registers are okay + VSTORE(uint32_t, e32, v18, buf_vec_32b); + VMCMP(uint32_t, % u, 6, buf_vec_32b, zero_vec_32b, VLEN / 16); + + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_64b, VLEN / 32); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_64b, VLEN / 32); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_64b, VLEN / 32); + // Set vl and vtype to super short values + VSET(1, e64, m4); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl2re64.v v16, (%0)" ::"r"(gold_vec_64b)); + // Change vtype and vl to match the whole register + VSET(VLEN / 32, e64, m2); + // Check that the whole register was loaded3 + VSTORE(uint64_t, e64, v16, buf_vec_64b); + VMCMP(uint64_t, % lu, 7, buf_vec_64b, gold_vec_64b, VLEN / 32); + // Check that the neighbour registers are okay + VSTORE(uint64_t, e64, v18, buf_vec_64b); + VMCMP(uint64_t, % lu, 7, buf_vec_64b, zero_vec_64b, VLEN / 32); +} + +//////////// +// vl4reX // +//////////// + +// 4 whole registers load +void TEST_CASE3(void) { + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_8b, VLEN / 2); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_8b, VLEN / 2); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_8b, VLEN / 2); + // Set vl and vtype to super short values + VSET(1, e64, m8); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl4re8.v v16, (%0)" ::"r"(gold_vec_8b)); + // Change vtype and vl to match the whole register + VSET(VLEN / 2, e8, m4); + // Check that the whole register was loaded + VSTORE(uint8_t, e8, v16, buf_vec_8b); + VMCMP(uint8_t, % hhu, 8, buf_vec_8b, gold_vec_8b, VLEN / 2); + // Check that the neighbour registers are okay + VSTORE(uint8_t, e8, v20, buf_vec_8b); + VMCMP(uint8_t, % hhu, 8, buf_vec_8b, zero_vec_8b, VLEN / 2); + + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_16b, VLEN / 4); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_16b, VLEN / 4); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_16b, VLEN / 4); + // Set vl and vtype to super short values + VSET(1, e64, m8); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl4re16.v v16, (%0)" ::"r"(gold_vec_16b)); + // Change vtype and vl to match the whole register + VSET(VLEN / 4, e16, m4); + // Check that the whole register was loaded + VSTORE(uint16_t, e16, v16, buf_vec_16b); + VMCMP(uint16_t, % hu, 9, buf_vec_16b, gold_vec_16b, VLEN / 4); + // Check that the neighbour registers are okay + VSTORE(uint16_t, e16, v20, buf_vec_16b); + VMCMP(uint16_t, % hu, 9, buf_vec_16b, zero_vec_16b, VLEN / 4); + + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_32b, VLEN / 8); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_32b, VLEN / 8); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_32b, VLEN / 8); + // Set vl and vtype to super short values + VSET(1, e64, m8); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl4re32.v v16, (%0)" ::"r"(gold_vec_32b)); + // Change vtype and vl to match the whole register + VSET(VLEN / 8, e32, m4); + // Check that the whole register was loaded + VSTORE(uint32_t, e32, v16, buf_vec_32b); + VMCMP(uint32_t, % u, 10, buf_vec_32b, gold_vec_32b, VLEN / 8); + // Check that the neighbour registers are okay + VSTORE(uint32_t, e32, v20, buf_vec_32b); + VMCMP(uint32_t, % u, 10, buf_vec_32b, zero_vec_32b, VLEN / 8); + + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_64b, VLEN / 16); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_64b, VLEN / 16); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_64b, VLEN / 16); + // Set vl and vtype to super short values + VSET(1, e64, m8); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl4re64.v v16, (%0)" ::"r"(gold_vec_64b)); + // Change vtype and vl to match the whole register + VSET(VLEN / 16, e64, m4); + // Check that the whole register was loaded + VSTORE(uint64_t, e64, v16, buf_vec_64b); + VMCMP(uint64_t, % lu, 11, buf_vec_64b, gold_vec_64b, VLEN / 16); + // Check that the neighbour registers are okay + VSTORE(uint64_t, e64, v20, buf_vec_64b); + VMCMP(uint64_t, % lu, 11, buf_vec_64b, zero_vec_64b, VLEN / 16); +} + +//////////// +// vl8reX // +//////////// + +// 8 whole registers load +void TEST_CASE4(void) { + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_8b, VLEN); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_8b, VLEN); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_8b, VLEN); + // Set vl and vtype to super short values + VSET(1, e64, m8); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + VCLEAR(v24); + // Load a buffer from memory - whole register load + asm volatile("vl8re8.v v16, (%0)" ::"r"(gold_vec_8b)); + // Change vtype and vl to match the whole register + VSET(VLEN, e8, m8); + // Check that the whole register was loaded + VSTORE(uint8_t, e8, v16, buf_vec_8b); + VMCMP(uint8_t, % hhu, 12, buf_vec_8b, gold_vec_8b, VLEN); + // Check that the neighbour registers are okay + VSTORE(uint8_t, e8, v24, buf_vec_8b); + VMCMP(uint8_t, % hhu, 12, buf_vec_8b, zero_vec_8b, VLEN); + + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_16b, VLEN / 2); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_16b, VLEN / 2); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_16b, VLEN / 2); + // Set vl and vtype to super short values + VSET(1, e64, m8); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + VCLEAR(v24); + // Load a buffer from memory - whole register load + asm volatile("vl8re16.v v16, (%0)" ::"r"(gold_vec_16b)); + // Change vtype and vl to match the whole register + VSET(VLEN / 2, e16, m8); + // Check that the whole register was loaded + VSTORE(uint16_t, e16, v16, buf_vec_16b); + VMCMP(uint16_t, % hu, 13, buf_vec_16b, gold_vec_16b, VLEN / 2); + // Check that the neighbour registers are okay + VSTORE(uint16_t, e16, v24, buf_vec_16b); + VMCMP(uint16_t, % hu, 13, buf_vec_16b, zero_vec_16b, VLEN / 2); + + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_32b, VLEN / 4); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_32b, VLEN / 4); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_32b, VLEN / 4); + // Set vl and vtype to super short values + VSET(1, e64, m8); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + VCLEAR(v24); + // Load a buffer from memory - whole register load + asm volatile("vl8re32.v v16, (%0)" ::"r"(gold_vec_32b)); + // Change vtype and vl to match the whole register + VSET(VLEN / 4, e32, m8); + // Check that the whole register was loaded + VSTORE(uint32_t, e32, v16, buf_vec_32b); + VMCMP(uint32_t, % u, 14, buf_vec_32b, gold_vec_32b, VLEN / 4); + // Check that the neighbour registers are okay + VSTORE(uint32_t, e32, v24, buf_vec_32b); + VMCMP(uint32_t, % u, 14, buf_vec_32b, zero_vec_32b, VLEN / 4); + + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_64b, VLEN / 8); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_64b, VLEN / 8); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_64b, VLEN / 8); + // Set vl and vtype to super short values + VSET(1, e64, m8); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + VCLEAR(v24); + // Load a buffer from memory - whole register load + asm volatile("vl8re64.v v16, (%0)" ::"r"(gold_vec_64b)); + // Change vtype and vl to match the whole register + VSET(VLEN / 8, e64, m8); + // Check that the whole register was loaded3 + VSTORE(uint64_t, e64, v16, buf_vec_64b); + VMCMP(uint64_t, % lu, 15, buf_vec_64b, gold_vec_64b, VLEN / 8); + // Check that the neighbour registers are okay + VSTORE(uint64_t, e64, v24, buf_vec_64b); + VMCMP(uint64_t, % lu, 15, buf_vec_64b, zero_vec_64b, VLEN / 8); +} + +//////////// +// Others // +//////////// + +// Check with initial vl == 0 +void TEST_CASE5(void) { + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_8b, VLEN / 8); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_8b, VLEN / 8); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_8b, VLEN / 8); + // Set vl and vtype to super short values + VSET(0, e64, m2); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl1re8.v v16, (%0)" ::"r"(gold_vec_8b)); + // Change vtype and vl to match the whole register + VSET(VLEN / 8, e8, m1); + // Check that the whole register was loaded + VSTORE(uint8_t, e8, v16, buf_vec_8b); + VMCMP(uint8_t, % hhu, 16, buf_vec_8b, gold_vec_8b, VLEN / 8); + // Check that the neighbour registers are okay + VSTORE(uint8_t, e8, v17, buf_vec_8b); + VMCMP(uint8_t, % hhu, 16, buf_vec_8b, zero_vec_8b, VLEN / 8); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vl_nocheck.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vl_nocheck.c new file mode 100644 index 000000000..7260e19f8 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vl_nocheck.c @@ -0,0 +1,68 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +// or add inp here +void TEST_CASE1(void) { + VSET(4, e8, m1); + volatile int8_t INP1[] = {0xff, 0x00, 0x0f, 0xf0}; // flush + __asm__ volatile("fence"); + __asm__ volatile("vle8.v v1, (%0)" ::"r"(INP1)); + // VEC_CMP_8(1,v1,0xff, 0x00, 0x0f,0xf0); + // __asm__ volatile ("fence"); +} + +void TEST_CASE2(void) { + VSET(4, e16, m1); + volatile int16_t INP1[] = {0xffff, 0x0000, 0x0f0f, 0xf0f0}; // flush + __asm__ volatile("fence"); + __asm__ volatile("vle16.v v1, (%0)" ::"r"(INP1)); + // VEC_CMP_16(2,v1,0xffff, 0x0000, 0x0f0f,0xf0f0); + // __asm__ volatile ("fence"); +} + +void TEST_CASE3(void) { + VSET(4, e32, m1); + volatile int32_t INP3[] = {0xffffffff, 0x00000000, 0x0f0f0f0f, + 0xf0f0f0f0}; // flush + __asm__ volatile("fence"); + __asm__ volatile("vle32.v v1, (%0)" ::"r"(INP3)); + // VEC_CMP_32(3,v1,0xffffffff, 0x00000000, 0x0f0f0f0f,0xf0f0f0f0); + // __asm__ volatile ("fence"); +} + +void TEST_CASE4(void) { + VSET(4, e64, m1); + volatile int64_t INP1[] = {0xffffffffffffffff, 0x0000000000000000, + 0x0f0f0f0f0f0f0f0f, 0xf0f0f0f0f0f0f0f0}; // flush + __asm__ volatile("fence"); + __asm__ volatile("vle64.v v1, (%0)" ::"r"(INP1)); + // VEC_CMP_64(4,v1,0xffffffffffffffff, 0x00000000000000000, + // 0x0f0f0f0f0f0f0f0f,0xf0f0f0f0f0f0f0f0); + // __asm__ volatile ("fence"); +} + +/* void TEST_CASE2(void) { */ +/* VSET(4,e8,m1); */ +/* volatile int8_t INP2[] = {0xff, 0x00, 0x0f, 0xf0}; */ +/* __asm__ volatile ("fence"); */ +/* VLOAD_8(v0,0x1,0x0,0x1,0x0); */ +/* VCLEAR_U8(v1); */ +/* __asm__ volatile ("vle8.v v1, (%0), v0.t"::"r" (INP2)); */ +/* VEC_CMP_8(2,v1,0xff, 0x00, 0x0f,0x00); */ +/* } */ + +int main(void) { + INIT_CHECK(); + enable_vec(); + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle1.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle1.c new file mode 100644 index 000000000..1a3daeab8 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle1.c @@ -0,0 +1,45 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "vector_macros.h" + +#define AXI_DWIDTH 128 + +static volatile uint8_t ALIGNED_I8[16] __attribute__((aligned(AXI_DWIDTH))) = { + 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, + 0x88, 0xae, 0x08, 0x91, 0x02, 0x59, 0x11, 0x89}; + +// All the accesses are misaligned wrt AXI DATA WIDTH + +void TEST_CASE1(void) { + VSET(9, e8, m1); + asm volatile("vle1.v v1, (%0)" ::"r"(&ALIGNED_I8[1])); + VCMP_U8(1, v1, 0xd3, 0x40); + + VSET(9, e64, m2); + asm volatile("vle1.v v1, (%0)" ::"r"(&ALIGNED_I8[1])); + VCMP_U8(2, v1, 0xd3, 0x40); + + VSET(16, e64, m8); + asm volatile("vle1.v v1, (%0)" ::"r"(&ALIGNED_I8[1])); + VCMP_U8(3, v1, 0xd3, 0x40); + + VSET(3, e64, m8); + asm volatile("vle1.v v1, (%0)" ::"r"(&ALIGNED_I8[1])); + // The vector used by VCMP_U8 is actually 16 elements long + // Don't store more if you don't want to overflow + VSET(16, e64, m8); + VCMP_U8(4, v1, 0xd3); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle16.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle16.c new file mode 100644 index 000000000..893d0e379 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle16.c @@ -0,0 +1,293 @@ +// TODO uncomment TEST_CASE13 and TEST_CASE 15 after issue of vl=0 and +// non-zero vstart is resolved +// TODO uncomment TEST_CASE2 after issue of exception is resolved + +#include "long_array.h" +#include "vector_macros.h" + +#define AXI_DWIDTH 128 +// Exception Handler for rtl + +void mtvec_handler(void) { + asm volatile("csrr t0, mcause"); // Read mcause + + // Read mepc + asm volatile("csrr t1, mepc"); + + // Increment return address by 4 + asm volatile("addi t1, t1, 4"); + asm volatile("csrw mepc, t1"); + + // Filter with mcause and handle here + + asm volatile("mret"); +} +// Exception Handler for spike +void handle_trap(void) { + // Read mepc + asm volatile("csrr t1, mepc"); + + // Increment return address by 4 + asm volatile("addi t1, t1, 4"); + asm volatile("csrw mepc, t1"); + + asm volatile("ld ra, 8(sp)"); + asm volatile("ld sp, 16(sp)"); + asm volatile("ld gp, 24(sp)"); + asm volatile("ld tp, 32(sp)"); + asm volatile("ld t0, 40(sp)"); + asm volatile("ld t0, 40(sp)"); + asm volatile("ld t1, 48(sp)"); + asm volatile("ld t2, 56(sp)"); + asm volatile("ld s0, 64(sp)"); + asm volatile("ld s1, 72(sp)"); + asm volatile("ld a0, 80(sp)"); + asm volatile("ld a1, 88(sp)"); + asm volatile("ld a2, 96(sp)"); + asm volatile("ld a3, 104(sp)"); + asm volatile("ld a4, 112(sp)"); + asm volatile("ld a5, 120(sp)"); + asm volatile("ld a6, 128(sp)"); + asm volatile("ld a7, 136(sp)"); + asm volatile("ld s2, 144(sp)"); + asm volatile("ld s3, 152(sp)"); + asm volatile("ld s4, 160(sp)"); + asm volatile("ld s5, 168(sp)"); + asm volatile("ld s6, 176(sp)"); + asm volatile("ld s7, 184(sp)"); + asm volatile("ld s8, 192(sp)"); + asm volatile("ld s9, 200(sp)"); + asm volatile("ld s10, 208(sp)"); + asm volatile("ld s11, 216(sp)"); + asm volatile("ld t3, 224(sp)"); + asm volatile("ld t4, 232(sp)"); + asm volatile("ld t5, 240(sp)"); + asm volatile("ld t6, 248(sp)"); + + // Read mcause + asm volatile("csrr t3, mcause"); + + asm volatile("addi sp, sp, 272"); + + // Filter with mcause and handle here + + asm volatile("mret"); +} + +static volatile uint16_t ALIGNED_I16[16] + __attribute__((aligned(AXI_DWIDTH))) = { + 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989}; + +//**********Checking functionality of vle16******// +void TEST_CASE1(void) { + VSET(15, e16, m2); + asm volatile("vle16.v v0, (%0)" ::"r"(&ALIGNED_I16[1])); + VCMP_U16(1, v0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989); +} + +//******Checking functionality of with illegal destination register +// specifier for EMUL********// +// In this test case EMUL=2 and register is v1 which will cause illegal +// instruction exception and set mcause = 2 +void TEST_CASE2(void) { + uint8_t mcause; + VSET(15, e32, m4); + asm volatile("vle16.v v1, (%0)" ::"r"(&ALIGNED_I16[1])); + asm volatile("addi %[A], t3, 0" : [A] "=r"(mcause)); + XCMP(2, mcause, 2); +} + +//*******Checking functionality of vle16 with different values of masking +// register******// +void TEST_CASE3(void) { + VSET(16, e16, m2); + VCLEAR(v6); + VLOAD_8(v0, 0xFF, 0xFF); + asm volatile("vle16.v v6, (%0), v0.t" ::"r"(&ALIGNED_I16[0])); + VCMP_U16(3, v6, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, + 0x9388, 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, + 0x1989); +} + +void TEST_CASE4(void) { + VSET(16, e16, m2); + VLOAD_16(v6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0x00, 0x00); + asm volatile("vle16.v v6, (%0), v0.t" ::"r"(&ALIGNED_I16[0])); + VCMP_U16(4, v6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); +} + +void TEST_CASE5(void) { + VSET(16, e16, m2); + VCLEAR(v6); + VLOAD_16(v6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vle16.v v6, (%0), v0.t" ::"r"(&ALIGNED_I16[0])); + VCMP_U16(5, v6, 1, 0xbbd3, 3, 0x8cd1, 5, 0x7548, 7, 0x9388, 9, 0x11ae, 11, + 0x4891, 13, 0x8759, 15, 0x1989); +} + +//******Checking functionality with different combinations of vta and vma*****// +// **** It uses undisturbed policy for tail agnostic and mask agnostic****// +void TEST_CASE6(void) { + uint64_t avl; + VSET(16, e16, m2); + VLOAD_16(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0xAA, 0xAA); + __asm__ volatile("vsetivli %[A], 12, e16, m1, ta, ma" : [A] "=r"(avl)); + asm volatile("vle16.v v8, (%0), v0.t" ::"r"(&ALIGNED_I16[0])); + VSET(16, e16, m2); + VCMP_U16(6, v8, 1, 0xbbd3, 3, 0x8cd1, 5, 0x7548, 7, 0x9388, 9, 0x11ae, 11, + 0x4891, 13, 14, 15, 16); +} + +void TEST_CASE7(void) { + uint64_t avl; + VSET(16, e16, m2); + VLOAD_16(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0xAA, 0xAA); + __asm__ volatile("vsetivli %[A], 12, e16, m1, ta, mu" : [A] "=r"(avl)); + asm volatile("vle16.v v8, (%0), v0.t" ::"r"(&ALIGNED_I16[0])); + VSET(16, e16, m2); + VCMP_U16(7, v8, 1, 0xbbd3, 3, 0x8cd1, 5, 0x7548, 7, 0x9388, 9, 0x11ae, 11, + 0x4891, 13, 14, 15, 16); +} + +void TEST_CASE8(void) { + uint64_t avl; + VSET(16, e16, m2); + VLOAD_16(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0xAA, 0xAA); + __asm__ volatile("vsetivli %[A], 12, e16, m1, tu, ma" : [A] "=r"(avl)); + asm volatile("vle16.v v8, (%0), v0.t" ::"r"(&ALIGNED_I16[0])); + VSET(16, e16, m2); + VCMP_U16(8, v8, 1, 0xbbd3, 3, 0x8cd1, 5, 0x7548, 7, 0x9388, 9, 0x11ae, 11, + 0x4891, 13, 14, 15, 16); +} + +void TEST_CASE9(void) { + uint64_t avl; + VSET(16, e16, m2); + VLOAD_16(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0xAA, 0xAA); + __asm__ volatile("vsetivli %[A], 12, e16, m1, tu, mu" : [A] "=r"(avl)); + asm volatile("vle16.v v8, (%0), v0.t" ::"r"(&ALIGNED_I16[0])); + VSET(16, e16, m2); + VCMP_U16(9, v8, 1, 0xbbd3, 3, 0x8cd1, 5, 0x7548, 7, 0x9388, 9, 0x11ae, 11, + 0x4891, 13, 14, 15, 16); +} + +//*******Checking functionality if encoded EEW is not supported for given SEW +// and LMUL values because EMUL become out of range*****// +// This test case cover corner case for EEW = 16.If LMUL is changed to +// mf8 it will give error because emul become less than 1/8 (EMUL = 1/16) +// But it does not support this configuration because SEW/LMUL > ELEN +void TEST_CASE10(void) { + VSET(2, e32, mf2); + asm volatile("vle16.v v5, (%0)" ::"r"(&ALIGNED_I16[1])); + VCMP_U16(10, v5, 0xbbd3, 0x3840); +} + +// This test case execute upper bound case of EMUL (8) +// If LMUL is changed to m8 it will give error because emul become greater than +// 8 (EMUL = 16) + +void TEST_CASE11(void) { + VSET(16, e8, m4); + asm volatile("vle16.v v8, (%0)" ::"r"(&ALIGNED_I16[0])); + VCMP_U16(11, v8, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, + 0x9388, 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, + 0x1989); +} + +//******Checking functionality with different values of vl******// +void TEST_CASE12(void) { + VSET(16, e16, m2); + VLOAD_16(v12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(16, e16, m2); + asm volatile("vle16.v v12, (%0)" ::"r"(&ALIGNED_I16[0])); + VSET(16, e8, m2); + VCMP_U16(12, v12, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, + 0x9388, 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, + 0x1989); +} + +void TEST_CASE13(void) { + uint64_t avl; + VSET(16, e16, m1); + VLOAD_16(v6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + __asm__ volatile("vsetivli %[A], 0, e16, m1, ta, ma" : [A] "=r"(avl)); + asm volatile("vle16.v v6, (%0)" ::"r"(&ALIGNED_I16[0])); + VSET(16, e16, m1); + VCMP_U16(13, v6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); +} + +void TEST_CASE14(void) { + VSET(16, e16, m2); + VLOAD_16(v12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(13, e16, m2); + asm volatile("vle16.v v12, (%0)" ::"r"(&ALIGNED_I16[0])); + VSET(16, e16, m2); + VCMP_U16(14, v12, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, + 0x9388, 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 14, 15, 16); +} + +//******Checking functionality with different vstart value*****// +void TEST_CASE15(void) { + VSET(16, e16, m1); + VLOAD_16(v7, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + write_csr(vstart, 2); + asm volatile("vle16.v v7, (%0)" ::"r"(&ALIGNED_I16[0])); + VSET(16, e16, m1); + VCMP_U16(15, v7, 1, 2, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, 0x8188, + 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989); +} + +//****Checking functionality with different values of EMUL and +// large number of elements *******// +void TEST_CASE16(void) { + VSET(1024, e16, m4); + asm volatile("vle16.v v8, (%0)" ::"r"(&LONG_I16[0])); + LVCMP_U16(16, v8, LONG_I16); +} + +void TEST_CASE17(void) { + VSET(512, e16, m2); + asm volatile("vle16.v v10, (%0)" ::"r"(&LONG_I16[0])); + LVCMP_U16(17, v10, LONG_I16); +} + +void TEST_CASE18(void) { + VSET(300, e16, m2); + asm volatile("vle16.v v12, (%0)" ::"r"(&LONG_I16[0])); + LVCMP_U16(18, v12, LONG_I16); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + printf("*****Running tests for vle16.v*****\n"); + TEST_CASE1(); + // TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + TEST_CASE8(); + TEST_CASE9(); + TEST_CASE10(); + TEST_CASE11(); + TEST_CASE12(); + // TEST_CASE13(); + TEST_CASE14(); + // TEST_CASE15(); + TEST_CASE16(); + TEST_CASE17(); + TEST_CASE18(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle32.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle32.c new file mode 100644 index 000000000..0e4f1c1c5 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle32.c @@ -0,0 +1,307 @@ +// TODO uncomment TEST_CASE13 and TEST_CASE 15 after issue of vl=0 and +// non-zero vstart is resolved +// TODO uncomment TEST_CASE2 after issue of exception is resolved + +#include "long_array.h" +#include "vector_macros.h" +#define AXI_DWIDTH 128 +// Exception Handler for rtl + +void mtvec_handler(void) { + asm volatile("csrr t0, mcause"); // Read mcause + + // Read mepc + asm volatile("csrr t1, mepc"); + + // Increment return address by 4 + asm volatile("addi t1, t1, 4"); + asm volatile("csrw mepc, t1"); + + // Filter with mcause and handle here + + asm volatile("mret"); +} +// Exception Handler for spike +void handle_trap(void) { + // Read mepc + asm volatile("csrr t1, mepc"); + + // Increment return address by 4 + asm volatile("addi t1, t1, 4"); + asm volatile("csrw mepc, t1"); + + asm volatile("ld ra, 8(sp)"); + asm volatile("ld sp, 16(sp)"); + asm volatile("ld gp, 24(sp)"); + asm volatile("ld tp, 32(sp)"); + asm volatile("ld t0, 40(sp)"); + asm volatile("ld t0, 40(sp)"); + asm volatile("ld t1, 48(sp)"); + asm volatile("ld t2, 56(sp)"); + asm volatile("ld s0, 64(sp)"); + asm volatile("ld s1, 72(sp)"); + asm volatile("ld a0, 80(sp)"); + asm volatile("ld a1, 88(sp)"); + asm volatile("ld a2, 96(sp)"); + asm volatile("ld a3, 104(sp)"); + asm volatile("ld a4, 112(sp)"); + asm volatile("ld a5, 120(sp)"); + asm volatile("ld a6, 128(sp)"); + asm volatile("ld a7, 136(sp)"); + asm volatile("ld s2, 144(sp)"); + asm volatile("ld s3, 152(sp)"); + asm volatile("ld s4, 160(sp)"); + asm volatile("ld s5, 168(sp)"); + asm volatile("ld s6, 176(sp)"); + asm volatile("ld s7, 184(sp)"); + asm volatile("ld s8, 192(sp)"); + asm volatile("ld s9, 200(sp)"); + asm volatile("ld s10, 208(sp)"); + asm volatile("ld s11, 216(sp)"); + asm volatile("ld t3, 224(sp)"); + asm volatile("ld t4, 232(sp)"); + asm volatile("ld t5, 240(sp)"); + asm volatile("ld t6, 248(sp)"); + + // Read mcause + asm volatile("csrr t3, mcause"); + + asm volatile("addi sp, sp, 272"); + + // Filter with mcause and handle here + + asm volatile("mret"); +} + +static volatile uint32_t ALIGNED_I32[16] + __attribute__((aligned(AXI_DWIDTH))) = { + 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, 0x38197598, + 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, 0x90139301, 0xab8b9148, + 0x90318509, 0x31897598, 0x83195999, 0x89139848}; + +//**********Checking functionality of vle32********// +void TEST_CASE1(void) { + VSET(15, e32, m4); + asm volatile("vle32.v v0, (%0)" ::"r"(&ALIGNED_I32[1])); + VCMP_U32(1, v0, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, 0x38197598, + 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, 0x90139301, + 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, 0x89139848); +} + +//******Checking functionality of with illegal destination register +// specifier for EMUL********// +// In this test case EMUL=2 and register is v1 which will cause illegal +// instruction exception and set mcause = 2 +void TEST_CASE2(void) { + uint8_t mcause; + VSET(15, e64, m4); + asm volatile("vle32.v v1, (%0)" ::"r"(&ALIGNED_I32[1])); + asm volatile("addi %[A], t3, 0" : [A] "=r"(mcause)); + XCMP(2, mcause, 2); +} + +//*******Checking functionality of vle32 with different values of masking +// register******// +void TEST_CASE3(void) { + VSET(16, e32, m4); + VCLEAR(v12); + VLOAD_8(v0, 0xFF, 0xFF); + asm volatile("vle32.v v12, (%0), v0.t" ::"r"(&ALIGNED_I32[0])); + VCMP_U32(3, v12, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, + 0x89139848); +} + +void TEST_CASE4(void) { + VSET(16, e32, m4); + VLOAD_32(v12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0x00, 0x00); + asm volatile("vle32.v v12, (%0), v0.t" ::"r"(&ALIGNED_I32[0])); + VCMP_U32(4, v12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); +} + +void TEST_CASE5(void) { + VSET(16, e32, m4); + VCLEAR(v12); + VLOAD_32(v12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vle32.v v12, (%0), v0.t" ::"r"(&ALIGNED_I32[0])); + VCMP_U32(5, v12, 1, 0xf9aa71f0, 3, 0x99991348, 5, 0x38197598, 7, 0x81937598, + 9, 0x3eeeeeee, 11, 0xab8b9148, 13, 0x31897598, 15, 0x89139848); +} + +//******Checking functionality with different combinations of vta and vma*****// +// **** It uses undisturbed policy for tail agnostic and mask agnostic****// +void TEST_CASE6(void) { + uint64_t avl; + VSET(16, e32, m4); + VLOAD_32(v12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0xAA, 0xAA); + __asm__ volatile("vsetivli %[A], 12, e32, m1, ta, ma" : [A] "=r"(avl)); + asm volatile("vle32.v v12, (%0), v0.t" ::"r"(&ALIGNED_I32[0])); + VSET(16, e32, m4); + VCMP_U32(6, v12, 1, 0xf9aa71f0, 3, 0x99991348, 5, 0x38197598, 7, 0x81937598, + 9, 0x3eeeeeee, 11, 0xab8b9148, 13, 14, 15, 16); +} + +void TEST_CASE7(void) { + uint64_t avl; + VSET(16, e32, m4); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0xAA, 0xAA); + __asm__ volatile("vsetivli %[A], 12, e32, m1, ta, mu" : [A] "=r"(avl)); + asm volatile("vle32.v v16, (%0), v0.t" ::"r"(&ALIGNED_I32[0])); + VSET(16, e32, m4); + VCMP_U32(7, v16, 1, 0xf9aa71f0, 3, 0x99991348, 5, 0x38197598, 7, 0x81937598, + 9, 0x3eeeeeee, 11, 0xab8b9148, 13, 14, 15, 16); +} + +void TEST_CASE8(void) { + uint64_t avl; + VSET(16, e32, m4); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0xAA, 0xAA); + __asm__ volatile("vsetivli %[A], 12, e32, m1, tu, ma" : [A] "=r"(avl)); + asm volatile("vle32.v v16, (%0), v0.t" ::"r"(&ALIGNED_I32[0])); + VSET(16, e32, m4); + VCMP_U32(8, v16, 1, 0xf9aa71f0, 3, 0x99991348, 5, 0x38197598, 7, 0x81937598, + 9, 0x3eeeeeee, 11, 0xab8b9148, 13, 14, 15, 16); +} + +void TEST_CASE9(void) { + uint64_t avl; + VSET(16, e32, m4); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0xAA, 0xAA); + __asm__ volatile("vsetivli %[A], 12, e32, m1, tu, mu" : [A] "=r"(avl)); + asm volatile("vle32.v v16, (%0), v0.t" ::"r"(&ALIGNED_I32[0])); + VSET(16, e32, m4); + VCMP_U32(9, v16, 1, 0xf9aa71f0, 3, 0x99991348, 5, 0x38197598, 7, 0x81937598, + 9, 0x3eeeeeee, 11, 0xab8b9148, 13, 14, 15, 16); +} + +//*******Checking functionality if encoded EEW is not supported for given SEW +// and LMUL values because EMUL become out of range*****// +// This test case cover corner case for EEW = 32.If LMUL is changed to +// mf8 and SEW is changed to e64 it will give error because emul become less +// than 1/8 (EMUL = 1/16) But it does not support this configuration because +// SEW/LMUL > ELEN +void TEST_CASE10(void) { + VSET(1, e32, mf2); + asm volatile("vle32.v v5, (%0)" ::"r"(&ALIGNED_I32[1])); + VCMP_U32(10, v5, 0xf9aa71f0); +} + +// This test case execute upper bound case of EMUL (8) +// If LMUL is changed to m8 or m4 it will give error because emul become greater +// than +// 8 +// (EMUL = 16) + +void TEST_CASE11(void) { + VSET(8, e8, m2); + asm volatile("vle32.v v8, (%0)" ::"r"(&ALIGNED_I32[0])); + VCMP_U32(11, v8, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598); +} + +//******Checking functionality with different values of vl******// +void TEST_CASE12(void) { + VSET(16, e32, m4); + VLOAD_32(v24, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(16, e32, m4); + asm volatile("vle32.v v24, (%0)" ::"r"(&ALIGNED_I32[0])); + VSET(16, e32, m4); + VCMP_U32(12, v24, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, + 0x89139848); +} + +void TEST_CASE13(void) { + uint64_t avl; + VSET(16, e32, m1); + VLOAD_32(v6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + __asm__ volatile("vsetivli %[A], 0, e16, m1, ta, ma" : [A] "=r"(avl)); + asm volatile("vle32.v v6, (%0)" ::"r"(&ALIGNED_I32[0])); + VSET(16, e32, m1); + VCMP_U32(13, v6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); +} + +void TEST_CASE14(void) { + VSET(16, e16, m4); + VLOAD_32(v24, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(13, e16, m4); + asm volatile("vle32.v v24, (%0)" ::"r"(&ALIGNED_I32[0])); + VSET(16, e16, m4); + VCMP_U32(14, v24, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 14, 15, 16); +} + +//******Checking functionality with different vstart value*****// +void TEST_CASE15(void) { + VSET(16, e32, m1); + VLOAD_32(v7, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + write_csr(vstart, 2); + asm volatile("vle32.v v7, (%0)" ::"r"(&ALIGNED_I32[0])); + VSET(15, e32, m1); + VCMP_U32(16, v7, 1, 2, 0xa11a9384, 0x99991348, 0x9fa831c7, 0x38197598, + 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, 0x90139301, + 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, 0x89139848); +} + +//****Checking functionality with different values of EMUL and +// large number of elements *******// + +void TEST_CASE16(void) { + VSET(1024, e32, m8); + asm volatile("vle32.v v8, (%0)" ::"r"(&LONG_I32[0])); + LVCMP_U32(16, v8, LONG_I32); +} + +void TEST_CASE17(void) { + VSET(512, e32, m4); + asm volatile("vle32.v v12, (%0)" ::"r"(&LONG_I32[0])); + LVCMP_U32(17, v12, LONG_I32); +} + +void TEST_CASE18(void) { + VSET(256, e32, m2); + asm volatile("vle32.v v14, (%0)" ::"r"(&LONG_I32[0])); + LVCMP_U32(18, v14, LONG_I32); +} + +void TEST_CASE19(void) { + VSET(200, e32, m2); + asm volatile("vle32.v v16, (%0)" ::"r"(&LONG_I32[0])); + LVCMP_U32(19, v16, LONG_I32); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + printf("*****Running tests for vle32.v*****\n"); + TEST_CASE1(); + // TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + TEST_CASE8(); + TEST_CASE9(); + TEST_CASE10(); + TEST_CASE11(); + TEST_CASE12(); + // TEST_CASE13(); + TEST_CASE14(); + // TEST_CASE15(); + TEST_CASE16(); + TEST_CASE17(); + TEST_CASE18(); + TEST_CASE19(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle64.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle64.c new file mode 100644 index 000000000..282fd11b4 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle64.c @@ -0,0 +1,315 @@ +// TODO uncomment TEST_CASE12 and TEST_CASE 14 after issue of vl=0 and +// non-zero vstart is resolved +// TODO uncomment TEST_CASE2 after issue of exception is resolved + +#include "long_array.h" +#include "vector_macros.h" + +#define AXI_DWIDTH 128 +// Exception Handler for rtl + +void mtvec_handler(void) { + asm volatile("csrr t0, mcause"); // Read mcause + + // Read mepc + asm volatile("csrr t1, mepc"); + + // Increment return address by 4 + asm volatile("addi t1, t1, 4"); + asm volatile("csrw mepc, t1"); + + // Filter with mcause and handle here + + asm volatile("mret"); +} + +// Exception Handler for spike +void handle_trap(void) { + // Read mepc + asm volatile("csrr t1, mepc"); + + // Increment return address by 4 + asm volatile("addi t1, t1, 4"); + asm volatile("csrw mepc, t1"); + + asm volatile("ld ra, 8(sp)"); + asm volatile("ld sp, 16(sp)"); + asm volatile("ld gp, 24(sp)"); + asm volatile("ld tp, 32(sp)"); + asm volatile("ld t0, 40(sp)"); + asm volatile("ld t0, 40(sp)"); + asm volatile("ld t1, 48(sp)"); + asm volatile("ld t2, 56(sp)"); + asm volatile("ld s0, 64(sp)"); + asm volatile("ld s1, 72(sp)"); + asm volatile("ld a0, 80(sp)"); + asm volatile("ld a1, 88(sp)"); + asm volatile("ld a2, 96(sp)"); + asm volatile("ld a3, 104(sp)"); + asm volatile("ld a4, 112(sp)"); + asm volatile("ld a5, 120(sp)"); + asm volatile("ld a6, 128(sp)"); + asm volatile("ld a7, 136(sp)"); + asm volatile("ld s2, 144(sp)"); + asm volatile("ld s3, 152(sp)"); + asm volatile("ld s4, 160(sp)"); + asm volatile("ld s5, 168(sp)"); + asm volatile("ld s6, 176(sp)"); + asm volatile("ld s7, 184(sp)"); + asm volatile("ld s8, 192(sp)"); + asm volatile("ld s9, 200(sp)"); + asm volatile("ld s10, 208(sp)"); + asm volatile("ld s11, 216(sp)"); + asm volatile("ld t3, 224(sp)"); + asm volatile("ld t4, 232(sp)"); + asm volatile("ld t5, 240(sp)"); + asm volatile("ld t6, 248(sp)"); + + // Read mcause + asm volatile("csrr t3, mcause"); + + asm volatile("addi sp, sp, 272"); + + // Filter with mcause and handle here + + asm volatile("mret"); +} + +static volatile uint64_t ALIGNED_I64[16] + __attribute__((aligned(AXI_DWIDTH))) = { + 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989}; + +//**********Checking functionality of vle64********// +void TEST_CASE1(void) { + VSET(15, e64, m8); + asm volatile("vle64.v v0, (%0)" ::"r"(&ALIGNED_I64[1])); + VCMP_U64(1, v0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, 0x99991348a9f38cd1, + 0x9fa831c7a11a9384, 0x3819759853987548, 0x1893179501093489, + 0x81937598aa819388, 0x1874754791888188, 0x3eeeeeeee33111ae, + 0x9013930148815808, 0xab8b914891484891, 0x9031850931584902, + 0x3189759837598759, 0x8319599991911111, 0x8913984898951989); +} + +//******Checking functionality of with illegal destination register +// specifier for EMUL********// +// In this test case EMUL=2 and register is v1 which will cause illegal +// instruction exception and set mcause = 2 +void TEST_CASE2(void) { + uint8_t mcause; + VSET(15, e64, m2); + asm volatile("vle64.v v1, (%0)" ::"r"(&ALIGNED_I64[1])); + asm volatile("addi %[A], t3, 0" : [A] "=r"(mcause)); + XCMP(2, mcause, 2); +} + +//*******Checking functionality of vle64 with different values of masking +// register******// +void TEST_CASE3(void) { + VSET(16, e64, m8); + VCLEAR(v24); + VLOAD_8(v0, 0xFF, 0xFF); + asm volatile("vle64.v v24, (%0), v0.t" ::"r"(&ALIGNED_I64[0])); + VCMP_U64(3, v24, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989); +} + +void TEST_CASE4(void) { + VSET(16, e64, m8); + VCLEAR(v24); + VLOAD_8(v0, 0x00, 0x00); + asm volatile("vle64.v v24, (%0), v0.t" ::"r"(&ALIGNED_I64[0])); + VCMP_U64(4, v24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); +} + +void TEST_CASE5(void) { + VSET(16, e64, m8); + VCLEAR(v24); + VLOAD_64(v24, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vle64.v v24, (%0), v0.t" ::"r"(&ALIGNED_I64[0])); + VCMP_U64(5, v24, 1, 0xf9aa71f0c394bbd3, 3, 0x99991348a9f38cd1, 5, + 0x3819759853987548, 7, 0x81937598aa819388, 9, 0x3eeeeeeee33111ae, 11, + 0xab8b914891484891, 13, 0x3189759837598759, 15, 0x8913984898951989); +} + +//******Checking functionality with different combinations of vta and vma*****// +// **** It uses undisturbed policy for tail agnostic and mask agnostic****// +void TEST_CASE6(void) { + uint64_t avl; + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0xAA, 0xAA); + __asm__ volatile("vsetivli %[A], 12, e64, m1, ta, ma" : [A] "=r"(avl)); + asm volatile("vle64.v v8, (%0), v0.t" ::"r"(&ALIGNED_I64[0])); + VSET(16, e64, m8); + VCMP_U64(6, v8, 1, 0xf9aa71f0c394bbd3, 3, 0x99991348a9f38cd1, 5, + 0x3819759853987548, 7, 0x81937598aa819388, 9, 0x3eeeeeeee33111ae, 11, + 0xab8b914891484891, 13, 14, 15, 16); +} + +void TEST_CASE7(void) { + uint64_t avl; + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0xAA, 0xAA); + __asm__ volatile("vsetivli %[A], 12, e64, m1, ta, mu" : [A] "=r"(avl)); + asm volatile("vle64.v v8, (%0), v0.t" ::"r"(&ALIGNED_I64[0])); + VSET(16, e64, m8); + VCMP_U64(7, v8, 1, 0xf9aa71f0c394bbd3, 3, 0x99991348a9f38cd1, 5, + 0x3819759853987548, 7, 0x81937598aa819388, 9, 0x3eeeeeeee33111ae, 11, + 0xab8b914891484891, 13, 14, 15, 16); +} + +void TEST_CASE8(void) { + uint64_t avl; + VSET(16, e64, m1); + VLOAD_64(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0xAA, 0xAA); + __asm__ volatile("vsetivli %[A], 12, e64, m1, tu, ma" : [A] "=r"(avl)); + asm volatile("vle64.v v4, (%0), v0.t" ::"r"(&ALIGNED_I64[0])); + VSET(16, e64, m1); + VCMP_U64(8, v4, 1, 0xf9aa71f0c394bbd3, 3, 0x99991348a9f38cd1, 5, + 0x3819759853987548, 7, 0x81937598aa819388, 9, 0x3eeeeeeee33111ae, 11, + 0xab8b914891484891, 13, 14, 15, 16); +} + +void TEST_CASE9(void) { + uint64_t avl; + VSET(16, e64, m1); + VLOAD_64(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0xAA, 0xAA); + __asm__ volatile("vsetivli %[A], 12, e64, m1, tu, mu" : [A] "=r"(avl)); + asm volatile("vle64.v v4, (%0), v0.t" ::"r"(&ALIGNED_I64[0])); + VSET(16, e64, m1); + VCMP_U64(9, v4, 1, 0xf9aa71f0c394bbd3, 3, 0x99991348a9f38cd1, 5, + 0x3819759853987548, 7, 0x81937598aa819388, 9, 0x3eeeeeeee33111ae, 11, + 0xab8b914891484891, 13, 14, 15, 16); +} + +//*******Checking functionality if encoded EEW is not supported for given SEW +// and LMUL values because EMUL become out of range*****// +// This test case cover upper bound of EMUL(8). If LMUL is changed to +// m2 it will give error because emul become greater than 8 (EMUL = 16) +void TEST_CASE10(void) { + VSET(15, e8, m1); + asm volatile("vle64.v v8, (%0)" ::"r"(&ALIGNED_I64[1])); + VCMP_U64(10, v8, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, 0x99991348a9f38cd1, + 0x9fa831c7a11a9384, 0x3819759853987548, 0x1893179501093489, + 0x81937598aa819388, 0x1874754791888188, 0x3eeeeeeee33111ae, + 0x9013930148815808, 0xab8b914891484891, 0x9031850931584902, + 0x3189759837598759, 0x8319599991911111, 0x8913984898951989); +} + +//******Checking functionality with different values of vl******// +void TEST_CASE11(void) { + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(16, e64, m8); + asm volatile("vle64.v v8, (%0)" ::"r"(&ALIGNED_I64[0])); + VSET(16, e64, m8); + VCMP_U64(11, v8, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989); +} + +void TEST_CASE12(void) { + uint64_t avl; + VSET(16, e64, m1); + VLOAD_64(v6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + __asm__ volatile("vsetivli %[A], 0, e64, m1, ta, ma" : [A] "=r"(avl)); + asm volatile("vle64.v v6, (%0)" ::"r"(&ALIGNED_I64[0])); + VSET(16, e64, m1); + VCMP_U64(12, v6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); +} + +void TEST_CASE13(void) { + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(13, e64, m8); + asm volatile("vle64.v v8, (%0)" ::"r"(&ALIGNED_I64[0])); + VSET(16, e64, m8); + VCMP_U64(13, v8, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 14, 15, 16); +} + +//******Checking functionality with different vstart value*****// +void TEST_CASE14(void) { + VSET(16, e64, m1); + VLOAD_64(v7, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + write_csr(vstart, 2); + asm volatile("vle64.v v7, (%0)" ::"r"(&ALIGNED_I64[0])); + VSET(16, e64, m1); + VCMP_U64(14, v7, 1, 2, 0xa11a9384a7163840, 0x99991348a9f38cd1, + 0x9fa831c7a11a9384, 0x3819759853987548, 0x1893179501093489, + 0x81937598aa819388, 0x1874754791888188, 0x3eeeeeeee33111ae, + 0x9013930148815808, 0xab8b914891484891, 0x9031850931584902, + 0x3189759837598759, 0x8319599991911111, 0x8913984898951989); +} + +//****Checking functionality with different values of EMUL and +// large number of elements *******// +void TEST_CASE15(void) { + VSET(512, e64, m8); + asm volatile("vle64.v v8, (%0)" ::"r"(&LONG_I64[0])); + LVCMP_U64(15, v8, LONG_I64); +} + +void TEST_CASE16(void) { + VSET(256, e64, m4); + asm volatile("vle64.v v12, (%0)" ::"r"(&LONG_I64[0])); + LVCMP_U64(16, v12, LONG_I64); +} + +void TEST_CASE17(void) { + VSET(128, e64, m2); + asm volatile("vle64.v v10, (%0)" ::"r"(&LONG_I64[0])); + LVCMP_U64(17, v10, LONG_I64); +} + +void TEST_CASE18(void) { + VSET(100, e64, m2); + asm volatile("vle64.v v14, (%0)" ::"r"(&LONG_I64[0])); + LVCMP_U64(18, v14, LONG_I64); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + printf("*****Running tests for vle64.v*****\n"); + TEST_CASE1(); + // TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + TEST_CASE8(); + TEST_CASE9(); + TEST_CASE10(); + TEST_CASE11(); + // TEST_CASE12(); + TEST_CASE13(); + // TEST_CASE14(); + TEST_CASE15(); + TEST_CASE16(); + TEST_CASE17(); + TEST_CASE18(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle8.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle8.c new file mode 100644 index 000000000..b4e1d84ee --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle8.c @@ -0,0 +1,273 @@ +// TODO uncomment TEST_CASE12 and TEST_CASE 14 after issue of vl=0 and +// non-zero vstart is resolved +// TODO uncomment TEST_CASE2 after issue of exception is resolved + +#include "long_array.h" +#include "vector_macros.h" + +#define AXI_DWIDTH 128 + +// Exception Handler for rtl + +void mtvec_handler(void) { + asm volatile("csrr t0, mcause"); // Read mcause + + // Read mepc + asm volatile("csrr t1, mepc"); + + // Increment return address by 4 + asm volatile("addi t1, t1, 4"); + asm volatile("csrw mepc, t1"); + + // Filter with mcause and handle here + + asm volatile("mret"); +} + +// Exception Handler for spike +void handle_trap(void) { + // Read mepc + asm volatile("csrr t1, mepc"); + + // Increment return address by 4 + asm volatile("addi t1, t1, 4"); + asm volatile("csrw mepc, t1"); + + asm volatile("ld ra, 8(sp)"); + asm volatile("ld sp, 16(sp)"); + asm volatile("ld gp, 24(sp)"); + asm volatile("ld tp, 32(sp)"); + asm volatile("ld t0, 40(sp)"); + asm volatile("ld t0, 40(sp)"); + asm volatile("ld t1, 48(sp)"); + asm volatile("ld t2, 56(sp)"); + asm volatile("ld s0, 64(sp)"); + asm volatile("ld s1, 72(sp)"); + asm volatile("ld a0, 80(sp)"); + asm volatile("ld a1, 88(sp)"); + asm volatile("ld a2, 96(sp)"); + asm volatile("ld a3, 104(sp)"); + asm volatile("ld a4, 112(sp)"); + asm volatile("ld a5, 120(sp)"); + asm volatile("ld a6, 128(sp)"); + asm volatile("ld a7, 136(sp)"); + asm volatile("ld s2, 144(sp)"); + asm volatile("ld s3, 152(sp)"); + asm volatile("ld s4, 160(sp)"); + asm volatile("ld s5, 168(sp)"); + asm volatile("ld s6, 176(sp)"); + asm volatile("ld s7, 184(sp)"); + asm volatile("ld s8, 192(sp)"); + asm volatile("ld s9, 200(sp)"); + asm volatile("ld s10, 208(sp)"); + asm volatile("ld s11, 216(sp)"); + asm volatile("ld t3, 224(sp)"); + asm volatile("ld t4, 232(sp)"); + asm volatile("ld t5, 240(sp)"); + asm volatile("ld t6, 248(sp)"); + + // Read mcause + asm volatile("csrr t3, mcause"); + + asm volatile("addi sp, sp, 272"); + + // Filter with mcause and handle here + + asm volatile("mret"); +} + +static volatile uint8_t ALIGNED_I8[16] __attribute__((aligned(AXI_DWIDTH))) = { + 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, + 0x88, 0xae, 0x08, 0x91, 0x02, 0x59, 0x11, 0x89}; + +//**********Checking functionality of vle8 ********// +void TEST_CASE1(void) { + VSET(15, e8, m1); + asm volatile("vle8.v v0, (%0)" ::"r"(&ALIGNED_I8[1])); + VCMP_U8(1, v0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, 0x08, + 0x91, 0x02, 0x59, 0x11, 0x89); +} + +//******Checking functionality of with illegal destination register +// specifier for EMUL********// +// In this test case EMUL=2 and register is v1 which will cause illegal +// instruction exception and set mcause = 2 +void TEST_CASE2(void) { + uint8_t mcause; + VSET(15, e16, m4); + asm volatile("vle8.v v1, (%0)" ::"r"(&ALIGNED_I8[1])); + asm volatile("addi %[A], t3, 0" : [A] "=r"(mcause)); + XCMP(2, mcause, 2); +} + +//*******Checking functionality of vle8 with different values of masking +// register******// +void TEST_CASE3(void) { + VSET(16, e8, m1); + VCLEAR(v3); + VLOAD_8(v0, 0xFF, 0xFF); + asm volatile("vle8.v v3, (%0), v0.t" ::"r"(&ALIGNED_I8[0])); + VCMP_U8(3, v3, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, + 0x08, 0x91, 0x02, 0x59, 0x11, 0x89); +} + +void TEST_CASE4(void) { + VSET(16, e8, m1); + VLOAD_8(v3, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0x00, 0x00); + asm volatile("vle8.v v3, (%0), v0.t" ::"r"(&ALIGNED_I8[0])); + VCMP_U8(4, v3, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); +} + +void TEST_CASE5(void) { + VSET(16, e8, m1); + VCLEAR(v3); + VLOAD_8(v3, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vle8.v v3, (%0), v0.t" ::"r"(&ALIGNED_I8[0])); + VCMP_U8(5, v3, 1, 0xd3, 3, 0xd1, 5, 0x48, 7, 0x88, 9, 0xae, 11, 0x91, 13, + 0x59, 15, 0x89); +} + +//******Checking functionality with different combinations of vta and vma*****// +// **** It uses undisturbed policy for tail agnostic and mask agnostic****// +void TEST_CASE6(void) { + uint64_t avl; + VSET(16, e8, m1); + VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0xAA, 0xAA); + __asm__ volatile("vsetivli %[A], 12, e8, m1, ta, ma" : [A] "=r"(avl)); + asm volatile("vle8.v v4, (%0), v0.t" ::"r"(&ALIGNED_I8[0])); + VSET(16, e8, m1); + VCMP_U8(6, v4, 1, 0xd3, 3, 0xd1, 5, 0x48, 7, 0x88, 9, 0xae, 11, 0x91, 13, 14, + 15, 16); +} + +void TEST_CASE7(void) { + uint64_t avl; + VSET(16, e8, m1); + VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0xAA, 0xAA); + __asm__ volatile("vsetivli %[A], 12, e8, m1, ta, mu" : [A] "=r"(avl)); + asm volatile("vle8.v v4, (%0), v0.t" ::"r"(&ALIGNED_I8[0])); + VSET(16, e8, m1); + VCMP_U8(7, v4, 1, 0xd3, 3, 0xd1, 5, 0x48, 7, 0x88, 9, 0xae, 11, 0x91, 13, 14, + 15, 16); +} + +void TEST_CASE8(void) { + uint64_t avl; + VSET(16, e8, m1); + VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0xAA, 0xAA); + __asm__ volatile("vsetivli %[A], 12, e8, m1, tu, ma" : [A] "=r"(avl)); + asm volatile("vle8.v v4, (%0), v0.t" ::"r"(&ALIGNED_I8[0])); + VSET(16, e8, m1); + VCMP_U8(8, v4, 1, 0xd3, 3, 0xd1, 5, 0x48, 7, 0x88, 9, 0xae, 11, 0x91, 13, 14, + 15, 16); +} + +void TEST_CASE9(void) { + uint64_t avl; + VSET(16, e8, m1); + VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0xAA, 0xAA); + __asm__ volatile("vsetivli %[A], 12, e8, m1, tu, mu" : [A] "=r"(avl)); + asm volatile("vle8.v v4, (%0), v0.t" ::"r"(&ALIGNED_I8[0])); + VSET(16, e8, m1); + VCMP_U8(9, v4, 1, 0xd3, 3, 0xd1, 5, 0x48, 7, 0x88, 9, 0xae, 11, 0x91, 13, 14, + 15, 16); +} + +//*******Checking functionality if encoded EEW is not supported for given SEW +// and LMUL values because EMUL become out of range*****// +// This test case execute lower bound case of EMUL (1/8). If LMUL is changed to +// mf4 or mf8 it will give error because emul become out of range +void TEST_CASE10(void) { + VSET(2, e32, mf2); + asm volatile("vle8.v v5, (%0)" ::"r"(&ALIGNED_I8[1])); + VCMP_U8(10, v5, 0xd3, 0x40); +} + +//******Checking functionality with different values of vl******// +void TEST_CASE11(void) { + VSET(16, e8, m1); + VLOAD_8(v6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(16, e8, m1); // Setting vl=16 + asm volatile("vle8.v v6, (%0)" ::"r"(&ALIGNED_I8[0])); + VSET(16, e8, m1); + VCMP_U8(11, v6, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, + 0x08, 0x91, 0x02, 0x59, 0x11, 0x89); +} + +void TEST_CASE12(void) { + uint64_t avl; + VSET(16, e8, m1); + VLOAD_8(v6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + __asm__ volatile("vsetivli %[A], 0, e8, m1, ta, ma" + : [A] "=r"(avl)); // Setting vl=0 + asm volatile("vle8.v v6, (%0)" ::"r"(&ALIGNED_I8[0])); + VSET(16, e8, m1); + VCMP_U8(12, v6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); +} + +void TEST_CASE13(void) { + VSET(16, e8, m1); + VLOAD_8(v6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(13, e8, m1); // Setting vl =13 + asm volatile("vle8.v v6, (%0)" ::"r"(&ALIGNED_I8[0])); + VSET(16, e8, m1); + VCMP_U8(13, v6, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, + 0x08, 0x91, 0x02, 14, 15, 16); +} + +//******Checking functionality with different vstart value*****// +void TEST_CASE14(void) { + uint64_t vstart; + VSET(16, e8, m1); + VLOAD_8(v7, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + write_csr(vstart, 2); + asm volatile("vle8.v v7, (%0)" ::"r"(&ALIGNED_I8[0])); + write_csr(vstart, 0); + VSET(16, e8, m1); + VCMP_U8(14, v7, 1, 2, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, 0x08, + 0x91, 0x02, 0x59, 0x11, 0x89); +} + +//****Checking functionality with different values of EMUL and +// large number of elements *******// +void TEST_CASE15(void) { + VSET(1024, e8, m2); + asm volatile("vle8.v v8, (%0)" ::"r"(&LONG_I8[0])); + LVCMP_U8(15, v8, LONG_I8); +} + +void TEST_CASE16(void) { + VSET(800, e8, m2); + asm volatile("vle8.v v8, (%0)" ::"r"(&LONG_I8[0])); + LVCMP_U8(16, v8, LONG_I8); +} +int main(void) { + INIT_CHECK(); + enable_vec(); + + printf("*****Running tests for vle8.v*****\n"); + TEST_CASE1(); + // TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + TEST_CASE8(); + TEST_CASE9(); + TEST_CASE10(); + TEST_CASE11(); + // TEST_CASE12(); + TEST_CASE13(); + // TEST_CASE14(); + TEST_CASE15(); + TEST_CASE16(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vlff.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vlff.c new file mode 100644 index 000000000..b837f525d --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vlff.c @@ -0,0 +1,91 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(4, e8, m1); + volatile int8_t INP[] = {0xff, 0x00, 0x0f, 0xf0}; + __asm__ volatile("vle8ff.v v1, (%0)" ::"r"(INP)); + VEC_CMP_8(1, v1, 0xff, 0x00, 0x0f, 0xf0); +} + +void TEST_CASE2(void) { + VSET(4, e8, m1); + volatile int8_t INP[] = {0xff, 0x00, 0x0f, 0xf0}; + VLOAD_8(v0, 0x5, 0x0, 0x0, 0x0); + CLEAR(v1); + __asm__ volatile("vle8ff.v v1, (%0), v0.t" ::"r"(INP)); + VEC_CMP_8(2, v1, 0xff, 0x00, 0x0f, 0x00); +} + +void TEST_CASE3(void) { + VSET(3, e16, m1); + volatile int16_t INP[] = {0xffff, 0x0000, 0x0f0f, 0xf0f0}; + __asm__ volatile("vle16ff.v v1, (%0)" ::"r"(INP)); + VEC_CMP_16(3, v1, 0xffff, 0x0000, 0x0f0f); +} + +void TEST_CASE4(void) { + VSET(3, e16, m1); + volatile int16_t INP[] = {0xffff, 0x0001, 0x0f0f, 0xf0f0}; + VLOAD_16(v0, 0x5, 0x0, 0x0, 0x0); + CLEAR(v1); + __asm__ volatile("vle16ff.v v1, (%0), v0.t" ::"r"(INP)); + VEC_CMP_16(4, v1, 0xffff, 0x0000, 0x0f0f); +} + +void TEST_CASE5(void) { + VSET(4, e32, m1); + volatile int32_t INP[] = {0xffffffff, 0x00000000, 0x0f0f0f0f, 0xf0f0f0f0}; + __asm__ volatile("vle32ff.v v1, (%0)" ::"r"(INP)); + VEC_CMP_32(5, v1, 0xffffffff, 0x00000000, 0x0f0f0f0f, 0xf0f0f0f0); +} + +void TEST_CASE6(void) { + VSET(4, e32, m1); + volatile int32_t INP[] = {0xffffffff, 0x80000000, 0x0f0f0f0f, 0xf0f0f0f0}; + VLOAD_32(v0, 0x5, 0x0, 0x0, 0x0); + CLEAR(v1); + __asm__ volatile(" vle32ff.v v1, (%0), v0.t \n" ::"r"(INP)); + VEC_CMP_32(6, v1, 0xffffffff, 0x0, 0x0f0f0f0f, 0x0); +} + +void TEST_CASE7(void) { + VSET(4, e64, m1); + volatile int64_t INP[] = {0xdeadbeefffffffff, 0xdeadbeef00000000, + 0xdeadbeef0f0f0f0f, 0xdeadbeeff0f0f0f0}; + __asm__ volatile("vle64ff.v v1,(%0)" ::"r"(INP)); + VEC_CMP_64(7, v1, 0xdeadbeefffffffff, 0xdeadbeef00000000, 0xdeadbeef0f0f0f0f, + 0xdeadbeeff0f0f0f0); +} + +void TEST_CASE8(void) { + VSET(4, e64, m1); + volatile int64_t INP[] = {0xdeadbeefffffffff, 0xdeadbeef00000000, + 0xdeadbeef0f0f0f0f, 0xdeadbeeff0f0f0f0}; + VLOAD_64(v0, 0x5, 0x0, 0x0, 0x0); + CLEAR(v1); + __asm__ volatile("vle64ff.v v1,(%0), v0.t" ::"r"(INP)); + VEC_CMP_64(8, v1, 0xdeadbeefffffffff, 0x0000000000000000, 0xdeadbeef0f0f0f0f, + 0x0000000000000000); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + TEST_CASE8(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vls.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vls.c new file mode 100644 index 000000000..65d71ab04 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vls.c @@ -0,0 +1,190 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "vector_macros.h" + +// Positive-stride tests +void TEST_CASE1(void) { + VSET(4, e8, m1); + volatile uint8_t INP1[] = {0x9f, 0xe4, 0x19, 0x20, 0x8f, 0x2e, 0x05, 0xe0, + 0xf9, 0xaa, 0x71, 0xf0, 0xc3, 0x94, 0xbb, 0xd3}; + uint64_t stride = 3; + asm volatile("vlse8.v v1, (%0), %1" ::"r"(INP1), "r"(stride)); + VCMP_U8(1, v1, 0x9f, 0x20, 0x05, 0xaa); +} + +void TEST_CASE2(void) { + VSET(4, e16, m1); + volatile uint16_t INP1[] = {0x9fe4, 0x1920, 0x8f2e, 0x05e0, + 0xf9aa, 0x71f0, 0xc394, 0xbbd3}; + uint64_t stride = 4; + asm volatile("vlse16.v v1, (%0), %1" ::"r"(INP1), "r"(stride)); + VCMP_U16(2, v1, 0x9fe4, 0x8f2e, 0xf9aa, 0xc394); +} + +void TEST_CASE3(void) { + VSET(4, e32, m1); + volatile uint32_t INP1[] = {0x9fe41920, 0x8f2e05e0, 0xf9aa71f0, 0xc394bbd3, + 0xa11a9384, 0xa7163840, 0x99991348, 0xa9f38cd1}; + uint64_t stride = 8; + asm volatile("vlse32.v v1, (%0), %1" ::"r"(INP1), "r"(stride)); + VCMP_U32(3, v1, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348); +} + +void TEST_CASE4(void) { + VSET(2, e64, m1); + volatile uint64_t INP1[] = {0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, + 0xa11a9384a7163840, 0x99991348a9f38cd1}; + uint64_t stride = 16; + VCLEAR(v1); + asm volatile("vlse64.v v1, (%0), %1" ::"r"(INP1), "r"(stride)); + VCMP_U64(4, v1, 0x9fe419208f2e05e0, 0xa11a9384a7163840); +} + +// Zero-stride tests +// The implementation must perform all the memory accesses +void TEST_CASE5(void) { + VSET(16, e8, m1); + volatile uint8_t INP1[] = {0x9f}; + uint64_t stride = 0; + asm volatile("vlse8.v v1, (%0), %1" ::"r"(INP1), "r"(stride)); + VCMP_U8(5, v1, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f, + 0x9f, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f); +} + +// The implementation can also perform fewer accesses +void TEST_CASE6(void) { + VSET(16, e8, m1); + volatile uint8_t INP1[] = {0x9f}; + asm volatile("vlse8.v v1, (%0), x0" ::"r"(INP1)); + VCMP_U8(6, v1, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f, + 0x9f, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f); +} + +// Different LMUL +void TEST_CASE7(void) { + VSET(8, e64, m4); + volatile uint64_t INP1[] = {0x9fa831c7a11a9384}; + asm volatile("vlse64.v v4, (%0), x0" ::"r"(INP1)); + VCMP_U64(7, v4, 0x9fa831c7a11a9384, 0x9fa831c7a11a9384, 0x9fa831c7a11a9384, + 0x9fa831c7a11a9384, 0x9fa831c7a11a9384, 0x9fa831c7a11a9384, + 0x9fa831c7a11a9384, 0x9fa831c7a11a9384); +} + +// Others +// Negative-stride test +void TEST_CASE8(void) { + VSET(4, e16, m1); + volatile uint16_t INP1[] = {0x9fe4, 0x1920, 0x8f2e, 0x05e0, + 0xf9aa, 0x71f0, 0xc394, 0xbbd3}; + uint64_t stride = -4; + asm volatile("vlse16.v v1, (%0), %1" ::"r"(&INP1[7]), "r"(stride)); + VCMP_U16(8, v1, 0xbbd3, 0x71f0, 0x05e0, 0x1920); +} + +// Stride greater than default Ara AXI width == 128-bit (4 lanes) +void TEST_CASE9(void) { + VSET(2, e64, m1); + volatile uint64_t INP1[] = {0x99991348a9f38cd1, 0x9fa831c7a11a9384, + 0x9fa831c7a11a9384, 0x9fa831c7a11a9384, + 0x9fa831c7a11a9384, 0x01015ac1309bb678}; + uint64_t stride = 40; + asm volatile("vlse64.v v1, (%0), %1" ::"r"(INP1), "r"(stride)); + VCMP_U64(9, v1, 0x99991348a9f38cd1, 0x01015ac1309bb678); +} + +// Fill Ara internal Load Buffer +void TEST_CASE10(void) { + VSET(8, e64, m4); + volatile uint64_t INP1[] = { + 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989}; + uint64_t stride = 16; + asm volatile("vlse64.v v4, (%0), %1" ::"r"(INP1), "r"(stride)); + VCMP_U64(10, v4, 0x9fe419208f2e05e0, 0xa11a9384a7163840, 0x9fa831c7a11a9384, + 0x1893179501093489, 0x1874754791888188, 0x9013930148815808, + 0x9031850931584902, 0x8319599991911111); +} + +// Masked stride loads +void TEST_CASE11(void) { + VSET(4, e8, m1); + volatile uint8_t INP1[] = {0x9f, 0xe4, 0x19, 0x20, 0x8f, 0x2e, 0x05, 0xe0, + 0xf9, 0xaa, 0x71, 0xf0, 0xc3, 0x94, 0xbb, 0xd3}; + uint64_t stride = 3; + VLOAD_8(v0, 0xAA); + VCLEAR(v1); + asm volatile("vlse8.v v1, (%0), %1, v0.t" ::"r"(INP1), "r"(stride)); + VCMP_U8(11, v1, 0x00, 0x20, 0x00, 0xaa); +} + +void TEST_CASE12(void) { + VSET(4, e16, m1); + volatile uint16_t INP1[] = {0x9fe4, 0x1920, 0x8f2e, 0x05e0, + 0xf9aa, 0x71f0, 0xc394, 0xbbd3}; + uint64_t stride = 4; + VLOAD_8(v0, 0xAA); + VCLEAR(v1); + asm volatile("vlse16.v v1, (%0), %1, v0.t" ::"r"(INP1), "r"(stride)); + VCMP_U16(12, v1, 0, 0x8f2e, 0, 0xc394); +} + +void TEST_CASE13(void) { + VSET(4, e32, m1); + volatile uint32_t INP1[] = {0x9fe41920, 0x8f2e05e0, 0xf9aa71f0, 0xc394bbd3, + 0xa11a9384, 0xa7163840, 0x99991348, 0xa9f38cd1}; + uint64_t stride = 8; + VLOAD_8(v0, 0xAA); + VCLEAR(v1); + asm volatile("vlse32.v v1, (%0), %1, v0.t" ::"r"(INP1), "r"(stride)); + VCMP_U32(13, v1, 0, 0xf9aa71f0, 0, 0x99991348); +} + +void TEST_CASE14(void) { + VSET(8, e64, m4); + volatile uint64_t INP1[] = { + 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989}; + uint64_t stride = 16; + VLOAD_8(v0, 0xAA); + VCLEAR(v4); + asm volatile("vlse64.v v4, (%0), %1, v0.t" ::"r"(INP1), "r"(stride)); + VCMP_U64(14, v4, 0, 0xa11a9384a7163840, 0, 0x1893179501093489, 0, + 0x9013930148815808, 0, 0x8319599991911111); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + + TEST_CASE8(); + TEST_CASE9(); + TEST_CASE10(); + + TEST_CASE11(); + TEST_CASE12(); + TEST_CASE13(); + TEST_CASE14(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vluxei.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vluxei.c new file mode 100644 index 000000000..fae863c9b --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vluxei.c @@ -0,0 +1,167 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "vector_macros.h" + +#define AXI_DWIDTH 128 + +static volatile uint8_t ALIGNED_I8[16] __attribute__((aligned(AXI_DWIDTH))) = { + 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, + 0x88, 0xae, 0x08, 0x91, 0x02, 0x59, 0x11, 0x89}; + +static volatile uint16_t ALIGNED_I16[16] + __attribute__((aligned(AXI_DWIDTH))) = { + 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989}; + +static volatile uint32_t ALIGNED_I32[16] + __attribute__((aligned(AXI_DWIDTH))) = { + 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, 0x38197598, + 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, 0x90139301, 0xab8b9148, + 0x90318509, 0x31897598, 0x83195999, 0x89139848}; + +static volatile uint64_t ALIGNED_I64[16] + __attribute__((aligned(AXI_DWIDTH))) = { + 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989}; + +// EEW destination == EEW indexes +void TEST_CASE1(void) { + VSET(2, e8, m1); + VLOAD_8(v2, 1, 15); + asm volatile("vluxei8.v v1, (%0), v2" ::"r"(&ALIGNED_I8[0])); + VCMP_U8(1, v1, 0xd3, 0x89); + + VSET(2, e16, m1); + VLOAD_16(v2, 2, 30); + asm volatile("vluxei16.v v1, (%0), v2" ::"r"(&ALIGNED_I16[0])); + VCMP_U16(2, v1, 0xbbd3, 0x1989); + + VSET(2, e32, m1); + VLOAD_32(v2, 4, 60); + asm volatile("vluxei32.v v1, (%0), v2" ::"r"(&ALIGNED_I32[0])); + VCMP_U32(3, v1, 0xf9aa71f0, 0x89139848); + + VSET(2, e64, m1); + VLOAD_64(v2, 8, 120); + VCLEAR(v1); + asm volatile("vluxei64.v v1, (%0), v2" ::"r"(&ALIGNED_I64[0])); + VCMP_U64(4, v1, 0xf9aa71f0c394bbd3, 0x8913984898951989); +} + +// EEW Destination > EEW indexes +void TEST_CASE2(void) { + VSET(2, e16, m1); + VLOAD_8(v2, 2, 30); + asm volatile("vluxei8.v v1, (%0), v2" ::"r"(&ALIGNED_I16[0])); + VCMP_U16(5, v1, 0xbbd3, 0x1989); + + VSET(2, e32, m1); + VLOAD_16(v2, 4, 60); + asm volatile("vluxei16.v v1, (%0), v2" ::"r"(&ALIGNED_I32[0])); + VCMP_U32(6, v1, 0xf9aa71f0, 0x89139848); + + VSET(2, e64, m1); + VLOAD_32(v2, 8, 120); + asm volatile("vluxei32.v v1, (%0), v2" ::"r"(&ALIGNED_I64[0])); + VCMP_U64(7, v1, 0xf9aa71f0c394bbd3, 0x8913984898951989); +} + +// EEW Destination < EEW indexes +void TEST_CASE3(void) { + VSET(2, e8, m1); + VLOAD_16(v2, 1, 15); + asm volatile("vluxei16.v v1, (%0), v2" ::"r"(&ALIGNED_I8[0])); + VCMP_U8(8, v1, 0xd3, 0x89); + + VSET(2, e16, m1); + VLOAD_32(v2, 2, 30); + asm volatile("vluxei32.v v1, (%0), v2" ::"r"(&ALIGNED_I16[0])); + VCMP_U16(9, v1, 0xbbd3, 0x1989); + + VSET(2, e32, m1); + VLOAD_64(v2, 4, 60); + asm volatile("vluxei64.v v1, (%0), v2" ::"r"(&ALIGNED_I32[0])); + VCMP_U32(10, v1, 0xf9aa71f0, 0x89139848); +} + +// Naive, masked +void TEST_CASE4(void) { + VSET(2, e8, m1); + VLOAD_8(v1, 99, 99); + VLOAD_8(v2, 1, 15); + VLOAD_8(v0, 0xAA); + asm volatile("vluxei8.v v1, (%0), v2, v0.t" ::"r"(&ALIGNED_I8[0])); + VCMP_U8(11, v1, 99, 0x89); + + VSET(2, e16, m1); + VLOAD_16(v1, 999, 999); + VLOAD_16(v2, 2, 30); + VLOAD_8(v0, 0xAA); + asm volatile("vluxei16.v v1, (%0), v2, v0.t" ::"r"(&ALIGNED_I16[0])); + VCMP_U16(12, v1, 999, 0x1989); + + VSET(2, e32, m1); + VLOAD_32(v1, 999, 999); + VLOAD_32(v2, 4, 60); + VLOAD_8(v0, 0xAA); + asm volatile("vluxei32.v v1, (%0), v2, v0.t" ::"r"(&ALIGNED_I32[0])); + VCMP_U32(13, v1, 999, 0x89139848); + + VSET(2, e64, m1); + VLOAD_64(v1, 999, 999); + VLOAD_64(v2, 8, 120); + VLOAD_8(v0, 0xAA); + asm volatile("vluxei64.v v1, (%0), v2, v0.t" ::"r"(&ALIGNED_I64[0])); + VCMP_U64(14, v1, 999, 0x8913984898951989); +} + +// EEW destination == EEW indexes, many elements +void TEST_CASE5(void) { + VSET(12, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 7, 8, 9, 11, 12, 13, 15); + asm volatile("vluxei8.v v1, (%0), v2" ::"r"(&ALIGNED_I8[0])); + VCMP_U8(15, v1, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x88, 0x88, 0xae, 0x91, 0x02, + 0x59, 0x89); + + VSET(12, e16, m2); + VLOAD_16(v4, 2, 4, 6, 8, 10, 14, 16, 18, 22, 24, 26, 30); + asm volatile("vluxei16.v v2, (%0), v4" ::"r"(&ALIGNED_I16[0])); + VCMP_U16(16, v2, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x9388, 0x8188, + 0x11ae, 0x4891, 0x4902, 0x8759, 0x1989); + + VSET(12, e32, m4); + VLOAD_32(v8, 4, 8, 12, 16, 20, 28, 32, 36, 44, 48, 52, 60); + asm volatile("vluxei32.v v4, (%0), v8" ::"r"(&ALIGNED_I32[0])); + VCMP_U32(17, v4, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, 0x38197598, + 0x81937598, 0x18747547, 0x3eeeeeee, 0xab8b9148, 0x90318509, + 0x31897598, 0x89139848); + + VSET(12, e64, m8); + VLOAD_64(v16, 8, 16, 24, 32, 40, 56, 64, 72, 88, 96, 104, 120); + asm volatile("vluxei64.v v8, (%0), v16" ::"r"(&ALIGNED_I64[0])); + VCMP_U64(18, v8, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, 0x99991348a9f38cd1, + 0x9fa831c7a11a9384, 0x3819759853987548, 0x81937598aa819388, + 0x1874754791888188, 0x3eeeeeeee33111ae, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8913984898951989); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vlx.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vlx.c new file mode 100644 index 000000000..a56f5b1e3 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vlx.c @@ -0,0 +1,101 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(4, e8, m1); + VLOAD_U8(v2, 0, 1, 2, 3); + volatile uint8_t INP[] = {0xff, 0x00, 0x0f, 0xf0}; + MEMBARRIER; + __asm__ volatile("vlxei8.v v1, (%0), v2" ::"r"(INP)); + VEC_CMP_U8(1, v1, 0xff, 0x00, 0x0f, 0xf0); +} + +// void TEST_CASE2(void) { +// VSET(4,e8,m1); +// VLOAD_8(v2,0,1,2,3); +// volatile int8_t INP[] = {0xff, 0x00, 0x0f, 0xf0}; +// VLOAD_8(v0,0x5,0x0,0x0,0x0); +// CLEAR(v1); +// __asm__ volatile ("vlxei8.v v1, (%0), v2, v0.t"::"r" (INP)); +// VEC_CMP_8(2,v1,0xff, 0x00, 0x0f,0x00); +// } + +void TEST_CASE3(void) { + VSET(3, e16, m1); + VLOAD_U16(v2, 0, 2, 4); + volatile uint16_t INP[] = {0xffff, 0x0000, 0x0f0f, 0xf0f0}; + MEMBARRIER; + __asm__ volatile("vlxei16.v v1, (%0), v2" ::"r"(INP)); + VEC_CMP_U16(3, v1, 0xffff, 0x0000, 0x0f0f); +} + +// void TEST_CASE4(void) { +// VSET(3,e16,m1); +// VLOAD_16(v2,0,2,4); +// volatile int16_t INP[] = {0xffff, 0x0000, 0x0f0f, 0xf0f0}; +// VLOAD_16(v0,0x5,0x0,0x0,0x0); +// CLEAR(v1); +// __asm__ volatile ("vlxei16.v v1, (%0), v2, v0.t"::"r" (INP)); +// VEC_CMP_16(4,v1,0xffff, 0x0000, 0x0f0f); +// } + +void TEST_CASE5(void) { + VSET(4, e32, m1); + VLOAD_U32(v2, 0, 4, 8, 12); + volatile uint32_t INP[] = {0xffffffff, 0x00000000, 0x0f0f0f0f, 0xf0f0f0f0}; + MEMBARRIER; + __asm__ volatile("vlxei32.v v1, (%0), v2" ::"r"(INP)); + VEC_CMP_U32(5, v1, 0xffffffff, 0x00000000, 0x0f0f0f0f, 0xf0f0f0f0); +} + +// void TEST_CASE6(void) { +// VSET(4,e32,m1); +// VLOAD_32(v2,0,4,8,12); +// volatile int32_t INP[] = {0xffffffff, 0x80000000, 0x0f0f0f0f, 0xf0f0f0f0}; +// VLOAD_32(v0,0x5,0x0,0x0,0x0); +// CLEAR(v1); +// __asm__ volatile (" vlxei32.v v1, (%0), v2, v0.t \n" :: "r" (INP)); +// VEC_CMP_32(6,v1,0xffffffff, 0x0, 0x0f0f0f0f, 0x0); +// } + +void TEST_CASE7(void) { + VSET(4, e64, m1); + VLOAD_U64(v2, 0, 8, 16, 24); + volatile uint64_t INP[] = {0xdeadbeefffffffff, 0xdeadbeef00000000, + 0xdeadbeef0f0f0f0f, 0xdeadbeeff0f0f0f0}; + MEMBARRIER; + __asm__ volatile("vlxei64.v v1,(%0), v2" ::"r"(INP)); + VEC_CMP_U64(7, v1, 0xdeadbeefffffffff, 0xdeadbeef00000000, 0xdeadbeef0f0f0f0f, + 0xdeadbeeff0f0f0f0); +} + +// void TEST_CASE8(void) { +// VSET(4,e64,m1); +// VLOAD_64(v2,0,8,16,24); +// volatile int64_t INP[] = +// {0xdeadbeefffffffff,0xdeadbeef00000000,0xdeadbeef0f0f0f0f,0xdeadbeeff0f0f0f0}; +// VLOAD_64(v0,0x5,0x0,0x0,0x0); +// CLEAR(v1); +// __asm__ volatile ("vlxei64.v v1,(%0), v2, v0.t"::"r" (INP)); +// VEC_CMP_64(8,v1,0xdeadbeefffffffff,0x0000000000000000,0xdeadbeef0f0f0f0f,0x0000000000000000); +// } + +int main(void) { + INIT_CHECK(); + enable_vec(); + TEST_CASE1(); + TEST_CASE3(); + TEST_CASE5(); + TEST_CASE7(); + // TEST_CASE2(); + // TEST_CASE4(); + // TEST_CASE6(); + // TEST_CASE8(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmacc.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmacc.c new file mode 100644 index 000000000..18fb6661a --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmacc.c @@ -0,0 +1,292 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v3, 0x21, 0x75, 0x7f, 0x3a, 0x50, 0x6d, 0x3f, 0x3e, 0x74, 0x11, 0x29, + 0xea, 0x14, 0xce, 0xb0, 0x37); + VLOAD_8(v2, 0xfe, 0xa7, 0x06, 0xaa, 0x35, 0x3c, 0x2c, 0x58, 0xa1, 0xc4, 0x40, + 0x42, 0x52, 0x40, 0xa8, 0x53); + VLOAD_8(v1, 0x30, 0xef, 0xb4, 0x12, 0x6d, 0x3b, 0x2c, 0x5e, 0xf0, 0x25, 0xd7, + 0x70, 0xc2, 0x62, 0xe0, 0x99); + asm volatile("vmacc.vv v1, v2, v3"); + VCMP_U8(1, v1, 0xee, 0x42, 0xae, 0x96, 0xfd, 0xc7, 0x00, 0xae, 0xe4, 0x29, + 0x17, 0xc4, 0x2a, 0xe2, 0x60, 0x6e); + + VSET(16, e16, m2); + VLOAD_16(v6, 0x1c20, 0x11e4, 0xde38, 0x642f, 0x3eb5, 0xa0af, 0x48e1, 0x5fc4, + 0x3d2a, 0x67d5, 0x3f07, 0x2889, 0x8812, 0x0bd9, 0x56f4, 0xe068); + VLOAD_16(v4, 0x02cc, 0xd99c, 0xdba2, 0xf282, 0x0f99, 0xa219, 0x2dcc, 0x17cc, + 0xe8fb, 0x1e83, 0xed20, 0xbfee, 0xee87, 0x6b0f, 0xf6cf, 0x4cd1); + VLOAD_16(v2, 0xe3f0, 0x42db, 0x2fde, 0x1983, 0x910c, 0x853b, 0x82aa, 0x9ac2, + 0x4631, 0x1f8b, 0x68c3, 0x6fbc, 0x3b5c, 0xf98b, 0x2db1, 0x8e75); + asm volatile("vmacc.vv v2, v4, v6"); + VCMP_U16(2, v2, 0x8d70, 0x6dcb, 0xb74e, 0x6761, 0xa639, 0xf452, 0x22f6, + 0x86f2, 0x4e5f, 0x378a, 0xc4a3, 0x561a, 0xb8da, 0x5e42, 0xf4fd, + 0xa35d); + + VSET(16, e32, m4); + VLOAD_32(v12, 0x0401c584, 0x69049955, 0x4a71aa0c, 0xc651666f, 0x273fcd5d, + 0x23ca1d7d, 0x599c994e, 0xb2d8adc5, 0x4710afae, 0x69c61cad, + 0x96ee5026, 0x2c197996, 0xd95da451, 0x3a654fb9, 0xbe990e4b, + 0xc41fd55a); + VLOAD_32(v8, 0x39d5b56a, 0xc578a540, 0x51283b5c, 0x07b4ba9d, 0xe5aba5e4, + 0x28720dc8, 0x600fb42b, 0xf2937fa7, 0x4032d36f, 0xc676e3b3, + 0xf1cd5f96, 0x1c14bcbf, 0x7dea81ed, 0x40270562, 0x9577b3be, + 0xea615f0a); + VLOAD_32(v4, 0xa055bbb6, 0x71f9a668, 0x0be640c9, 0x2336ca55, 0xca121638, + 0xbf234fb5, 0xe7c83142, 0xb7048f12, 0x8eb340e3, 0xef253e93, + 0xffef4a03, 0xdf346833, 0xd0922181, 0xf159ee1d, 0xf86a7c06, + 0xfcb24a2d); + asm volatile("vmacc.vv v4, v8, v12"); + VCMP_U32(3, v4, 0x448bd85e, 0xf2cbc4a8, 0x5cd02119, 0xf69b4268, 0x3c60ee0c, + 0xa233b25d, 0x4c72c95c, 0xe2b1a595, 0xefb7d755, 0x95d6b28a, + 0xd3be5a47, 0x6338471d, 0xfb1a117e, 0xabe00fef, 0xbede88b0, + 0x913705b1); + + VSET(16, e64, m8); + VLOAD_64(v24, 0x9cffef345b95f00b, 0x85d366e07e4bbc6b, 0xadfda1d2464c6433, + 0x610bf2c1435b3cf6, 0x8a0c6e4bc950e81f, 0x4296e7147ef94d7a, + 0x27d7ec90ba159756, 0x2a6c87932c3aef86, 0xbfd90c33e58a8fe3, + 0x1114f7672cf625c1, 0x1a7b72dd8ac39fab, 0xdb80f952e5fd2e5b, + 0x6b01c18a3daf288b, 0x69b4b0e4335f26d5, 0x0c059f365ec6d3d5, + 0xc22568276f1dcdd0); + VLOAD_64(v16, 0x6dc8e88769e54465, 0xce8cda83d16c3859, 0x1465ee5b6eb0d2b8, + 0x4827a9b40add2507, 0xd24c4005695a64d6, 0xb97c8e41e912f84a, + 0xc8c22e3b3b2e2fa1, 0x26712aa325bd00b6, 0xdf7ad19151df27b5, + 0x68ba6d050ffcba1e, 0x94448979a2b854e6, 0x84bf5d544f97f739, + 0x6d4bfa429e9d6ef0, 0xdb6c54b9a91ab935, 0x1a0051ca72162c5e, + 0xe04b73fdf1b61f9c); + VLOAD_64(v8, 0x32a4c1edbbfe5591, 0xf6baf4e747f4a120, 0x3a29727ae38b9b92, + 0xf173f78d09c997e4, 0xaab9d34e4aeaa57a, 0xa8fe3bf12b7c95e8, + 0xc4bd99b066821092, 0x9c2f1daf5fe2db9d, 0xa8b041a876aabcae, + 0xb9a2e6f9ded9a60a, 0x8bdf55954f50101d, 0x704f0e648c11d63f, + 0x0c8ca4d0a6d1a982, 0xa74d01c12ae6aea5, 0x3f2cd5d2e2f5b538, + 0x79803b24efa2caa3); + asm volatile("vmacc.vv v8, v16, v24"); + VCMP_U64(4, v8, 0xf7c2044aeebff5e8, 0xad447a1b99a48a53, 0x78676efbe1b5763a, + 0x813582af4d75d09e, 0x483adf8d811ecb64, 0x36d90fe4df2f2b2c, + 0xf833b173685307a8, 0x955c2ac405b724e1, 0xdcf9681f074b0d2d, + 0x10277404741c4ca8, 0x25d9bca0245d9fbf, 0x58439c4175d7f582, + 0x27ae9e3365b265d2, 0xabfe86591f4ba5be, 0xd964de90eaae196e, + 0xfb655e2263986563); +} + +void TEST_CASE2() { + VSET(16, e8, m1); + VLOAD_8(v3, 0x21, 0x75, 0x7f, 0x3a, 0x50, 0x6d, 0x3f, 0x3e, 0x74, 0x11, 0x29, + 0xea, 0x14, 0xce, 0xb0, 0x37); + VLOAD_8(v2, 0xfe, 0xa7, 0x06, 0xaa, 0x35, 0x3c, 0x2c, 0x58, 0xa1, 0xc4, 0x40, + 0x42, 0x52, 0x40, 0xa8, 0x53); + VLOAD_8(v1, 0x30, 0xef, 0xb4, 0x12, 0x6d, 0x3b, 0x2c, 0x5e, 0xf0, 0x25, 0xd7, + 0x70, 0xc2, 0x62, 0xe0, 0x99); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vmacc.vv v1, v2, v3, v0.t"); + VCMP_U8(5, v1, 0x30, 0x42, 0xb4, 0x96, 0x6d, 0xc7, 0x2c, 0xae, 0xf0, 0x29, + 0xd7, 0xc4, 0xc2, 0xe2, 0xe0, 0x6e); + + VSET(16, e16, m2); + VLOAD_16(v6, 0x1c20, 0x11e4, 0xde38, 0x642f, 0x3eb5, 0xa0af, 0x48e1, 0x5fc4, + 0x3d2a, 0x67d5, 0x3f07, 0x2889, 0x8812, 0x0bd9, 0x56f4, 0xe068); + VLOAD_16(v4, 0x02cc, 0xd99c, 0xdba2, 0xf282, 0x0f99, 0xa219, 0x2dcc, 0x17cc, + 0xe8fb, 0x1e83, 0xed20, 0xbfee, 0xee87, 0x6b0f, 0xf6cf, 0x4cd1); + VLOAD_16(v2, 0xe3f0, 0x42db, 0x2fde, 0x1983, 0x910c, 0x853b, 0x82aa, 0x9ac2, + 0x4631, 0x1f8b, 0x68c3, 0x6fbc, 0x3b5c, 0xf98b, 0x2db1, 0x8e75); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vmacc.vv v2, v4, v6, v0.t"); + VCMP_U16(6, v2, 0xe3f0, 0x6dcb, 0x2fde, 0x6761, 0x910c, 0xf452, 0x82aa, + 0x86f2, 0x4631, 0x378a, 0x68c3, 0x561a, 0x3b5c, 0x5e42, 0x2db1, + 0xa35d); + + VSET(16, e32, m4); + VLOAD_32(v12, 0x0401c584, 0x69049955, 0x4a71aa0c, 0xc651666f, 0x273fcd5d, + 0x23ca1d7d, 0x599c994e, 0xb2d8adc5, 0x4710afae, 0x69c61cad, + 0x96ee5026, 0x2c197996, 0xd95da451, 0x3a654fb9, 0xbe990e4b, + 0xc41fd55a); + VLOAD_32(v8, 0x39d5b56a, 0xc578a540, 0x51283b5c, 0x07b4ba9d, 0xe5aba5e4, + 0x28720dc8, 0x600fb42b, 0xf2937fa7, 0x4032d36f, 0xc676e3b3, + 0xf1cd5f96, 0x1c14bcbf, 0x7dea81ed, 0x40270562, 0x9577b3be, + 0xea615f0a); + VLOAD_32(v4, 0xa055bbb6, 0x71f9a668, 0x0be640c9, 0x2336ca55, 0xca121638, + 0xbf234fb5, 0xe7c83142, 0xb7048f12, 0x8eb340e3, 0xef253e93, + 0xffef4a03, 0xdf346833, 0xd0922181, 0xf159ee1d, 0xf86a7c06, + 0xfcb24a2d); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vmacc.vv v4, v8, v12, v0.t"); + VCMP_U32(7, v4, 0xa055bbb6, 0xf2cbc4a8, 0x0be640c9, 0xf69b4268, 0xca121638, + 0xa233b25d, 0xe7c83142, 0xe2b1a595, 0x8eb340e3, 0x95d6b28a, + 0xffef4a03, 0x6338471d, 0xd0922181, 0xabe00fef, 0xf86a7c06, + 0x913705b1); + + VSET(16, e64, m8); + VLOAD_64(v24, 0x9cffef345b95f00b, 0x85d366e07e4bbc6b, 0xadfda1d2464c6433, + 0x610bf2c1435b3cf6, 0x8a0c6e4bc950e81f, 0x4296e7147ef94d7a, + 0x27d7ec90ba159756, 0x2a6c87932c3aef86, 0xbfd90c33e58a8fe3, + 0x1114f7672cf625c1, 0x1a7b72dd8ac39fab, 0xdb80f952e5fd2e5b, + 0x6b01c18a3daf288b, 0x69b4b0e4335f26d5, 0x0c059f365ec6d3d5, + 0xc22568276f1dcdd0); + VLOAD_64(v16, 0x6dc8e88769e54465, 0xce8cda83d16c3859, 0x1465ee5b6eb0d2b8, + 0x4827a9b40add2507, 0xd24c4005695a64d6, 0xb97c8e41e912f84a, + 0xc8c22e3b3b2e2fa1, 0x26712aa325bd00b6, 0xdf7ad19151df27b5, + 0x68ba6d050ffcba1e, 0x94448979a2b854e6, 0x84bf5d544f97f739, + 0x6d4bfa429e9d6ef0, 0xdb6c54b9a91ab935, 0x1a0051ca72162c5e, + 0xe04b73fdf1b61f9c); + VLOAD_64(v8, 0x32a4c1edbbfe5591, 0xf6baf4e747f4a120, 0x3a29727ae38b9b92, + 0xf173f78d09c997e4, 0xaab9d34e4aeaa57a, 0xa8fe3bf12b7c95e8, + 0xc4bd99b066821092, 0x9c2f1daf5fe2db9d, 0xa8b041a876aabcae, + 0xb9a2e6f9ded9a60a, 0x8bdf55954f50101d, 0x704f0e648c11d63f, + 0x0c8ca4d0a6d1a982, 0xa74d01c12ae6aea5, 0x3f2cd5d2e2f5b538, + 0x79803b24efa2caa3); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vmacc.vv v8, v16, v24, v0.t"); + VCMP_U64(8, v8, 0x32a4c1edbbfe5591, 0xad447a1b99a48a53, 0x3a29727ae38b9b92, + 0x813582af4d75d09e, 0xaab9d34e4aeaa57a, 0x36d90fe4df2f2b2c, + 0xc4bd99b066821092, 0x955c2ac405b724e1, 0xa8b041a876aabcae, + 0x10277404741c4ca8, 0x8bdf55954f50101d, 0x58439c4175d7f582, + 0x0c8ca4d0a6d1a982, 0xabfe86591f4ba5be, 0x3f2cd5d2e2f5b538, + 0xfb655e2263986563); +} + +void TEST_CASE3() { + VSET(16, e8, m1); + int64_t scalar = 5; + VLOAD_8(v2, 0x60, 0xe3, 0xa0, 0xb7, 0x35, 0x23, 0xa3, 0xf4, 0x5f, 0x6e, 0x07, + 0x01, 0xe7, 0x51, 0x53, 0x29); + VLOAD_8(v1, 0xfb, 0x1b, 0xc0, 0x36, 0xa7, 0xe0, 0xc8, 0x47, 0x57, 0xe0, 0x51, + 0xaa, 0xd2, 0x93, 0x83, 0xa8); + asm volatile("vmacc.vx v1, %[A], v2" ::[A] "r"(scalar)); + VCMP_U8(9, v1, 0xdb, 0x8a, 0xe0, 0xc9, 0xb0, 0x8f, 0xf7, 0x0b, 0x32, 0x06, + 0x74, 0xaf, 0x55, 0x28, 0x22, 0x75); + + VSET(16, e16, m2); + scalar = -5383; + VLOAD_16(v4, 0x992e, 0x9a07, 0x90c3, 0xf1ce, 0xd53c, 0x8f07, 0x2d2f, 0x5ab1, + 0x0a79, 0x0523, 0x6f34, 0xe5fd, 0xc95a, 0xca1c, 0x36bf, 0x16a1); + VLOAD_16(v2, 0x0a9f, 0x7ee0, 0x494e, 0xb6d0, 0x394c, 0xc8e7, 0xc117, 0x8108, + 0xb1af, 0x9f16, 0x22ab, 0xa244, 0xf1c9, 0xe363, 0x9bed, 0xa06f); + asm volatile("vmacc.vx v2, %[A], v4" ::[A] "r"(scalar)); + VCMP_U16(10, v2, 0x145d, 0xb5af, 0x54f9, 0x342e, 0x78a8, 0x4cb6, 0xa9ce, + 0x8131, 0x7b60, 0x9c21, 0xd43f, 0x9759, 0x0e53, 0x109f, 0x71b4, + 0xcd08); + + VSET(16, e32, m4); + scalar = 6474219; + VLOAD_32(v8, 0x709e784e, 0x8e13e48a, 0xad5df7fd, 0x738c8997, 0x0a0030d0, + 0x7569b952, 0x507fd5c7, 0x5d09af12, 0x0bf1c209, 0x7be6ed49, + 0x842ba667, 0x53360ec0, 0xd85d7415, 0xf20de61f, 0x153e7e16, + 0xec5512e4); + VLOAD_32(v4, 0xb2436fad, 0x6b162382, 0xd94eebe7, 0x9c43d906, 0xb80f178d, + 0x5cf91d42, 0x7764b8a3, 0x6269f72c, 0xb0dff3a6, 0x838d6893, + 0xa98a861e, 0x758b63de, 0xde488617, 0x371696ab, 0xc3ba8192, + 0x7ca33236); + asm volatile("vmacc.vx v4, %[A], v8" ::[A] "r"(scalar)); + VCMP_U32(11, v4, 0x8e0d1d47, 0xf29d4830, 0xb5213626, 0xb21bb5a3, 0xbc2f367d, + 0x18eb9d88, 0x91c53550, 0x69a6ceb2, 0xc09822e9, 0x66c98b96, + 0xf6b125ab, 0xef3fae1e, 0x4c40925e, 0x6b652c20, 0x998385c4, + 0x75d88d82); + + VSET(16, e64, m8); + scalar = -598189234597999223; + VLOAD_64(v16, 0x2a47beb4fd7729c5, 0x401c187818b15d1e, 0xbbaf5fe50c41f22a, + 0x31eaddea171055a9, 0x609cbc4a78316c29, 0xd7bb8f31d8b59d88, + 0x97860fd5fba018c0, 0x724cecf178bd2125, 0x866d16f96d3d8b67, + 0x56153b0315164a5a, 0x6962bde49e3edf3f, 0x9b3f792bfbf5f343, + 0x64cf433b239e7764, 0x583c3a4ae481fef0, 0x217e2df75fcf0d8d, + 0x935ac02069fe54ce); + VLOAD_64(v8, 0x0dc8fa1b817237e5, 0xc817934370de904d, 0xb015bdbf0f39ec01, + 0x3c7e70a75643cce5, 0x80c45834a5026c02, 0xcdf1fcd83b8133a0, + 0x9d31b9b802ae2db1, 0xba7e57975c5febf5, 0x8732f75adf268ddb, + 0x5ff488a4187bd3f3, 0x6a259fe666091333, 0x5afc4de057de51c4, + 0x8a479b7e3558e399, 0xbc21e79022996c26, 0xe2c7432cd7e3e81d, + 0xdab377ddbdfb2df7); + asm volatile("vmacc.vx v8, %[A], v16" ::[A] "r"(scalar)); + VCMP_U64(12, v8, 0x093861b79ac45352, 0xfd3c909decf66b5b, 0x04eb13132ce4267b, + 0xb258e6b065bbf956, 0x62775181e33422f3, 0xdc0ae0e371686968, + 0xf8db06270cad2c71, 0x6c3cc52cd1fb49c2, 0x41c19c0ac1b5a2fa, + 0x8867d35049c7b01d, 0x6d71fe0f35a1feea, 0xace16ac43ec0279f, + 0x82faf4a574c9dc1d, 0xa875c9d17e310a96, 0x1f75616001b61192, + 0x16ce205f44fb8635); +} + +void TEST_CASE4() { + VSET(16, e8, m1); + int64_t scalar = 5; + VLOAD_8(v2, 0x60, 0xe3, 0xa0, 0xb7, 0x35, 0x23, 0xa3, 0xf4, 0x5f, 0x6e, 0x07, + 0x01, 0xe7, 0x51, 0x53, 0x29); + VLOAD_8(v1, 0xfb, 0x1b, 0xc0, 0x36, 0xa7, 0xe0, 0xc8, 0x47, 0x57, 0xe0, 0x51, + 0xaa, 0xd2, 0x93, 0x83, 0xa8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vmacc.vx v1, %[A], v2, v0.t" ::[A] "r"(scalar)); + VCMP_U8(13, v1, 0xfb, 0x8a, 0xc0, 0xc9, 0xa7, 0x8f, 0xc8, 0x0b, 0x57, 0x06, + 0x51, 0xaf, 0xd2, 0x28, 0x83, 0x75); + + VSET(16, e16, m2); + scalar = -5383; + VLOAD_16(v4, 0x992e, 0x9a07, 0x90c3, 0xf1ce, 0xd53c, 0x8f07, 0x2d2f, 0x5ab1, + 0x0a79, 0x0523, 0x6f34, 0xe5fd, 0xc95a, 0xca1c, 0x36bf, 0x16a1); + VLOAD_16(v2, 0x0a9f, 0x7ee0, 0x494e, 0xb6d0, 0x394c, 0xc8e7, 0xc117, 0x8108, + 0xb1af, 0x9f16, 0x22ab, 0xa244, 0xf1c9, 0xe363, 0x9bed, 0xa06f); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vmacc.vx v2, %[A], v4, v0.t" ::[A] "r"(scalar)); + VCMP_U16(14, v2, 0x0a9f, 0xb5af, 0x494e, 0x342e, 0x394c, 0x4cb6, 0xc117, + 0x8131, 0xb1af, 0x9c21, 0x22ab, 0x9759, 0xf1c9, 0x109f, 0x9bed, + 0xcd08); + + VSET(16, e32, m4); + scalar = 6474219; + VLOAD_32(v8, 0x709e784e, 0x8e13e48a, 0xad5df7fd, 0x738c8997, 0x0a0030d0, + 0x7569b952, 0x507fd5c7, 0x5d09af12, 0x0bf1c209, 0x7be6ed49, + 0x842ba667, 0x53360ec0, 0xd85d7415, 0xf20de61f, 0x153e7e16, + 0xec5512e4); + VLOAD_32(v4, 0xb2436fad, 0x6b162382, 0xd94eebe7, 0x9c43d906, 0xb80f178d, + 0x5cf91d42, 0x7764b8a3, 0x6269f72c, 0xb0dff3a6, 0x838d6893, + 0xa98a861e, 0x758b63de, 0xde488617, 0x371696ab, 0xc3ba8192, + 0x7ca33236); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vmacc.vx v4, %[A], v8, v0.t" ::[A] "r"(scalar)); + VCMP_U32(15, v4, 0xb2436fad, 0xf29d4830, 0xd94eebe7, 0xb21bb5a3, 0xb80f178d, + 0x18eb9d88, 0x7764b8a3, 0x69a6ceb2, 0xb0dff3a6, 0x66c98b96, + 0xa98a861e, 0xef3fae1e, 0xde488617, 0x6b652c20, 0xc3ba8192, + 0x75d88d82); + + VSET(16, e64, m8); + scalar = -598189234597999223; + VLOAD_64(v16, 0x2a47beb4fd7729c5, 0x401c187818b15d1e, 0xbbaf5fe50c41f22a, + 0x31eaddea171055a9, 0x609cbc4a78316c29, 0xd7bb8f31d8b59d88, + 0x97860fd5fba018c0, 0x724cecf178bd2125, 0x866d16f96d3d8b67, + 0x56153b0315164a5a, 0x6962bde49e3edf3f, 0x9b3f792bfbf5f343, + 0x64cf433b239e7764, 0x583c3a4ae481fef0, 0x217e2df75fcf0d8d, + 0x935ac02069fe54ce); + VLOAD_64(v8, 0x0dc8fa1b817237e5, 0xc817934370de904d, 0xb015bdbf0f39ec01, + 0x3c7e70a75643cce5, 0x80c45834a5026c02, 0xcdf1fcd83b8133a0, + 0x9d31b9b802ae2db1, 0xba7e57975c5febf5, 0x8732f75adf268ddb, + 0x5ff488a4187bd3f3, 0x6a259fe666091333, 0x5afc4de057de51c4, + 0x8a479b7e3558e399, 0xbc21e79022996c26, 0xe2c7432cd7e3e81d, + 0xdab377ddbdfb2df7); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vmacc.vx v8, %[A], v16, v0.t" ::[A] "r"(scalar)); + VCMP_U64(16, v8, 0x0dc8fa1b817237e5, 0xfd3c909decf66b5b, 0xb015bdbf0f39ec01, + 0xb258e6b065bbf956, 0x80c45834a5026c02, 0xdc0ae0e371686968, + 0x9d31b9b802ae2db1, 0x6c3cc52cd1fb49c2, 0x8732f75adf268ddb, + 0x8867d35049c7b01d, 0x6a259fe666091333, 0xace16ac43ec0279f, + 0x8a479b7e3558e399, 0xa875c9d17e310a96, 0xe2c7432cd7e3e81d, + 0x16ce205f44fb8635); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmadc.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmadc.c new file mode 100644 index 000000000..9f10378b8 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmadc.c @@ -0,0 +1,224 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 16, 0xff, 8, 0x81, 16, 0xff, 8, 0x81, 16, 0xff, 8, 0x81, 16, 0xff, + 8, 0x81); + VLOAD_8(v2, 4, 8, 12, 0x80, 4, 8, 12, 0x80, 4, 8, 12, 0x80, 4, 8, 12, 0x80); + VLOAD_8(v0, 0xDD, 0xDD); + asm volatile("vmadc.vvm v3, v1, v2, v0"); + VSET(2, e8, m1); + VCMP_U8(1, v3, 0xAA, 0xAA); + + VSET(8, e16, m1); + VLOAD_16(v1, 16, 0xffff, 8, 0x8001, 16, 0xffff, 8, 0x8001); + VLOAD_16(v2, 4, 8, 12, 0x8000, 4, 8, 12, 0x8000); + VLOAD_16(v0, 0xDD); + asm volatile("vmadc.vvm v3, v1, v2, v0"); + VSET(1, e8, m1); + VCMP_U8(2, v3, 0xAA); + + VSET(4, e32, m1); + VLOAD_32(v1, 16, 0xffffffff, 8, 0x80000001); + VLOAD_32(v2, 4, 8, 12, 0x80000000); + VLOAD_8(v0, 0x0D); + VCLEAR(v3); + asm volatile("vmadc.vvm v3, v1, v2, v0"); + VSET(1, e8, m1); + VCMP_U8(3, v3, 0x0A); + + VSET(2, e64, m1); + VLOAD_64(v1, 16, 0xffffffffffffffff); + VLOAD_64(v2, 4, 8); + VLOAD_8(v0, 0x03); + VCLEAR(v3); + asm volatile("vmadc.vvm v3, v1, v2, v0"); + VSET(1, e8, m1); + VCMP_U8(4, v3, 0x02); +}; + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 16, 0xff, 8, 0x81, 16, 0xff, 8, 0x81, 16, 0xff, 8, 0x81, 16, 0xff, + 8, 0x81); + VLOAD_8(v2, 4, 8, 12, 0x80, 4, 8, 12, 0x80, 4, 8, 12, 0x80, 4, 8, 12, 0x80); + asm volatile("vmadc.vv v3, v1, v2"); + VSET(2, e8, m1); + VCMP_U8(5, v3, 0xAA, 0xAA); + + VSET(8, e16, m1); + VLOAD_16(v1, 16, 0xffff, 8, 0x8001, 16, 0xffff, 8, 0x8001); + VLOAD_16(v2, 4, 8, 12, 0x8000, 4, 8, 12, 0x8000); + VCLEAR(v3); + asm volatile("vmadc.vv v3, v1, v2"); + VSET(1, e8, m1); + VCMP_U8(6, v3, 0xAA); + + VSET(4, e32, m1); + VLOAD_32(v1, 16, 0xffffffff, 8, 0x80000001); + VLOAD_32(v2, 4, 8, 12, 0x80000000); + VCLEAR(v3); + asm volatile("vmadc.vv v3, v1, v2"); + VSET(1, e8, m1); + VCMP_U8(7, v3, 0x0A); + + VSET(2, e64, m1); + VLOAD_64(v1, 16, 0xffffffffffffffff); + VLOAD_64(v2, 4, 8); + VCLEAR(v3); + asm volatile("vmadc.vv v3, v1, v2"); + VSET(2, e8, m1); + VCMP_U8(8, v3, 0x02); +}; + +void TEST_CASE3(void) { + const uint64_t scalar = 0x8000000080008080; + + VSET(16, e8, m1); + VLOAD_8(v1, 16, 0xff, 8, 0x81, 16, 0xff, 8, 0x81, 16, 0xff, 8, 0x81, 16, 0xff, + 8, 0x81); + VLOAD_8(v0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1); + asm volatile("vmadc.vxm v3, v1, %[A], v0" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(9, v3, 0xAA, 0xAA); + + VSET(8, e16, m1); + VLOAD_16(v1, 16, 0xffff, 8, 0x8001, 16, 0xffff, 8, 0x8001); + VLOAD_16(v0, 1, 1, 0, 1, 1, 1, 0, 1); + asm volatile("vmadc.vxm v2, v1, %[A], v0" ::[A] "r"(scalar)); + VSET(1, e8, m1); + VCMP_U8(10, v2, 0xAA); + + VSET(4, e32, m1); + VLOAD_32(v1, 16, 0xffffffff, 8, 0x80000001); + VLOAD_32(v0, 1, 1, 0, 1); + VCLEAR(v2); + asm volatile("vmadc.vxm v2, v1, %[A], v0" ::[A] "r"(scalar)); + VSET(1, e8, m1); + VCMP_U8(11, v2, 0x0A); + + VSET(2, e64, m1); + VLOAD_64(v1, 16, 0xffffffffffffffff); + VLOAD_64(v0, 1, 1); + VCLEAR(v2); + asm volatile("vmadc.vxm v2, v1, %[A], v0" ::[A] "r"(scalar)); + VSET(1, e8, m1); + VCMP_U8(12, v2, 0x02); +}; + +void TEST_CASE4(void) { + const uint64_t scalar = 0x8000000080008080; + + VSET(16, e8, m1); + VLOAD_8(v1, 16, 0xff, 8, 0x81, 16, 0xff, 8, 0x81, 16, 0xff, 8, 0x81, 16, 0xff, + 8, 0x81); + asm volatile("vmadc.vx v2, v1, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(13, v2, 0xAA, 0xAA); + + VSET(8, e16, m1); + VLOAD_16(v1, 16, 0xffff, 8, 0x8001, 16, 0xffff, 8, 0x8001); + VCLEAR(v2); + asm volatile("vmadc.vx v2, v1, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(14, v2, 0xAA); + + VSET(4, e32, m1); + VLOAD_32(v1, 16, 0xffffffff, 8, 0x80000001); + VCLEAR(v2); + asm volatile("vmadc.vx v2, v1, %[A]" ::[A] "r"(scalar)); + VSET(1, e8, m1); + VCMP_U8(15, v2, 0x0A); + + VSET(2, e64, m1); + VLOAD_64(v1, 16, 0xffffffffffffffff); + VCLEAR(v2); + asm volatile("vmadc.vx v2, v1, %[A]" ::[A] "r"(scalar)); + VSET(1, e8, m1); + VCMP_U8(16, v2, 0x02); +}; + +void TEST_CASE5(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 16, 0xff, 8, 0x81, 16, 0xff, 8, 0x81, 16, 0xff, 8, 0x81, 16, 0xff, + 8, 0x81); + VLOAD_8(v0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1); + asm volatile("vmadc.vim v2, v1, 10, v0"); + VSET(2, e8, m1); + VCMP_U8(17, v2, 0x22, 0x22); + + VSET(8, e16, m1); + VLOAD_16(v1, 16, 0xffff, 8, 0x8001, 16, 0xffff, 8, 0x8001); + VLOAD_16(v0, 1, 1, 0, 1, 1, 1, 0, 1); + VCLEAR(v2); + asm volatile("vmadc.vim v2, v1, 10, v0"); + VSET(1, e8, m1); + VCMP_U8(18, v2, 0x22); + + VSET(4, e32, m1); + VLOAD_32(v1, 16, 0xffffffff, 8, 0x80000001); + VLOAD_32(v0, 1, 1, 0, 1); + VCLEAR(v2); + asm volatile("vmadc.vim v2, v1, 10, v0"); + VSET(1, e8, m1); + VCMP_U8(19, v2, 0x02); + + VSET(2, e64, m1); + VLOAD_64(v1, 16, 0xffffffffffffffff); + VLOAD_64(v0, 1, 1); + VCLEAR(v2); + asm volatile("vmadc.vim v2, v1, 10, v0"); + VSET(1, e8, m1); + VCMP_U8(20, v2, 0x02); +}; + +void TEST_CASE6(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 16, 0xff, 8, 0x81, 16, 0xff, 8, 0x81, 16, 0xff, 8, 0x81, 16, 0xff, + 8, 0x81); + asm volatile("vmadc.vi v3, v1, 10"); + VSET(2, e8, m1); + VCMP_U8(21, v3, 0x22, 0x22); + + VSET(8, e16, m1); + VLOAD_16(v1, 16, 0xffff, 8, 0x8001, 16, 0xffff, 8, 0x8001); + VCLEAR(v2); + asm volatile("vmadc.vi v2, v1, 10"); + VSET(1, e8, m1); + VCMP_U8(22, v2, 0x22); + + VSET(4, e32, m1); + VLOAD_32(v1, 16, 0xffffffff, 8, 0x80000001); + VCLEAR(v2); + asm volatile("vmadc.vi v2, v1, 10"); + VSET(1, e8, m1); + VCMP_U8(23, v2, 0x02); + + VSET(2, e64, m1); + VLOAD_64(v1, 16, 0xffffffffffffffff); + VCLEAR(v2); + asm volatile("vmadc.vi v2, v1, 10"); + VSET(1, e8, m1); + VCMP_U8(24, v2, 0x02); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmadd.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmadd.c new file mode 100644 index 000000000..b657e3f59 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmadd.c @@ -0,0 +1,292 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v1, 0x21, 0x75, 0x7f, 0x3a, 0x50, 0x6d, 0x3f, 0x3e, 0x74, 0x11, 0x29, + 0xea, 0x14, 0xce, 0xb0, 0x37); + VLOAD_8(v2, 0xfe, 0xa7, 0x06, 0xaa, 0x35, 0x3c, 0x2c, 0x58, 0xa1, 0xc4, 0x40, + 0x42, 0x52, 0x40, 0xa8, 0x53); + VLOAD_8(v3, 0x30, 0xef, 0xb4, 0x12, 0x6d, 0x3b, 0x2c, 0x5e, 0xf0, 0x25, 0xd7, + 0x70, 0xc2, 0x62, 0xe0, 0x99); + asm volatile("vmadd.vv v1, v2, v3"); + VCMP_U8(1, v1, 0xee, 0x42, 0xae, 0x96, 0xfd, 0xc7, 0x00, 0xae, 0xe4, 0x29, + 0x17, 0xc4, 0x2a, 0xe2, 0x60, 0x6e); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x1c20, 0x11e4, 0xde38, 0x642f, 0x3eb5, 0xa0af, 0x48e1, 0x5fc4, + 0x3d2a, 0x67d5, 0x3f07, 0x2889, 0x8812, 0x0bd9, 0x56f4, 0xe068); + VLOAD_16(v4, 0x02cc, 0xd99c, 0xdba2, 0xf282, 0x0f99, 0xa219, 0x2dcc, 0x17cc, + 0xe8fb, 0x1e83, 0xed20, 0xbfee, 0xee87, 0x6b0f, 0xf6cf, 0x4cd1); + VLOAD_16(v6, 0xe3f0, 0x42db, 0x2fde, 0x1983, 0x910c, 0x853b, 0x82aa, 0x9ac2, + 0x4631, 0x1f8b, 0x68c3, 0x6fbc, 0x3b5c, 0xf98b, 0x2db1, 0x8e75); + asm volatile("vmadd.vv v2, v4, v6"); + VCMP_U16(2, v2, 0x8d70, 0x6dcb, 0xb74e, 0x6761, 0xa639, 0xf452, 0x22f6, + 0x86f2, 0x4e5f, 0x378a, 0xc4a3, 0x561a, 0xb8da, 0x5e42, 0xf4fd, + 0xa35d); + + VSET(16, e32, m4); + VLOAD_32(v4, 0x0401c584, 0x69049955, 0x4a71aa0c, 0xc651666f, 0x273fcd5d, + 0x23ca1d7d, 0x599c994e, 0xb2d8adc5, 0x4710afae, 0x69c61cad, + 0x96ee5026, 0x2c197996, 0xd95da451, 0x3a654fb9, 0xbe990e4b, + 0xc41fd55a); + VLOAD_32(v8, 0x39d5b56a, 0xc578a540, 0x51283b5c, 0x07b4ba9d, 0xe5aba5e4, + 0x28720dc8, 0x600fb42b, 0xf2937fa7, 0x4032d36f, 0xc676e3b3, + 0xf1cd5f96, 0x1c14bcbf, 0x7dea81ed, 0x40270562, 0x9577b3be, + 0xea615f0a); + VLOAD_32(v12, 0xa055bbb6, 0x71f9a668, 0x0be640c9, 0x2336ca55, 0xca121638, + 0xbf234fb5, 0xe7c83142, 0xb7048f12, 0x8eb340e3, 0xef253e93, + 0xffef4a03, 0xdf346833, 0xd0922181, 0xf159ee1d, 0xf86a7c06, + 0xfcb24a2d); + asm volatile("vmadd.vv v4, v8, v12"); + VCMP_U32(3, v4, 0x448bd85e, 0xf2cbc4a8, 0x5cd02119, 0xf69b4268, 0x3c60ee0c, + 0xa233b25d, 0x4c72c95c, 0xe2b1a595, 0xefb7d755, 0x95d6b28a, + 0xd3be5a47, 0x6338471d, 0xfb1a117e, 0xabe00fef, 0xbede88b0, + 0x913705b1); + + VSET(16, e64, m8); + VLOAD_64(v8, 0x9cffef345b95f00b, 0x85d366e07e4bbc6b, 0xadfda1d2464c6433, + 0x610bf2c1435b3cf6, 0x8a0c6e4bc950e81f, 0x4296e7147ef94d7a, + 0x27d7ec90ba159756, 0x2a6c87932c3aef86, 0xbfd90c33e58a8fe3, + 0x1114f7672cf625c1, 0x1a7b72dd8ac39fab, 0xdb80f952e5fd2e5b, + 0x6b01c18a3daf288b, 0x69b4b0e4335f26d5, 0x0c059f365ec6d3d5, + 0xc22568276f1dcdd0); + VLOAD_64(v16, 0x6dc8e88769e54465, 0xce8cda83d16c3859, 0x1465ee5b6eb0d2b8, + 0x4827a9b40add2507, 0xd24c4005695a64d6, 0xb97c8e41e912f84a, + 0xc8c22e3b3b2e2fa1, 0x26712aa325bd00b6, 0xdf7ad19151df27b5, + 0x68ba6d050ffcba1e, 0x94448979a2b854e6, 0x84bf5d544f97f739, + 0x6d4bfa429e9d6ef0, 0xdb6c54b9a91ab935, 0x1a0051ca72162c5e, + 0xe04b73fdf1b61f9c); + VLOAD_64(v24, 0x32a4c1edbbfe5591, 0xf6baf4e747f4a120, 0x3a29727ae38b9b92, + 0xf173f78d09c997e4, 0xaab9d34e4aeaa57a, 0xa8fe3bf12b7c95e8, + 0xc4bd99b066821092, 0x9c2f1daf5fe2db9d, 0xa8b041a876aabcae, + 0xb9a2e6f9ded9a60a, 0x8bdf55954f50101d, 0x704f0e648c11d63f, + 0x0c8ca4d0a6d1a982, 0xa74d01c12ae6aea5, 0x3f2cd5d2e2f5b538, + 0x79803b24efa2caa3); + asm volatile("vmadd.vv v8, v16, v24"); + VCMP_U64(4, v8, 0xf7c2044aeebff5e8, 0xad447a1b99a48a53, 0x78676efbe1b5763a, + 0x813582af4d75d09e, 0x483adf8d811ecb64, 0x36d90fe4df2f2b2c, + 0xf833b173685307a8, 0x955c2ac405b724e1, 0xdcf9681f074b0d2d, + 0x10277404741c4ca8, 0x25d9bca0245d9fbf, 0x58439c4175d7f582, + 0x27ae9e3365b265d2, 0xabfe86591f4ba5be, 0xd964de90eaae196e, + 0xfb655e2263986563); +} + +void TEST_CASE2() { + VSET(16, e8, m1); + VLOAD_8(v1, 0x21, 0x75, 0x7f, 0x3a, 0x50, 0x6d, 0x3f, 0x3e, 0x74, 0x11, 0x29, + 0xea, 0x14, 0xce, 0xb0, 0x37); + VLOAD_8(v2, 0xfe, 0xa7, 0x06, 0xaa, 0x35, 0x3c, 0x2c, 0x58, 0xa1, 0xc4, 0x40, + 0x42, 0x52, 0x40, 0xa8, 0x53); + VLOAD_8(v3, 0x30, 0xef, 0xb4, 0x12, 0x6d, 0x3b, 0x2c, 0x5e, 0xf0, 0x25, 0xd7, + 0x70, 0xc2, 0x62, 0xe0, 0x99); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vmadd.vv v1, v2, v3, v0.t"); + VCMP_U8(5, v1, 0x21, 0x42, 0x7f, 0x96, 0x50, 0xc7, 0x3f, 0xae, 0x74, 0x29, + 0x29, 0xc4, 0x14, 0xe2, 0xb0, 0x6e); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x1c20, 0x11e4, 0xde38, 0x642f, 0x3eb5, 0xa0af, 0x48e1, 0x5fc4, + 0x3d2a, 0x67d5, 0x3f07, 0x2889, 0x8812, 0x0bd9, 0x56f4, 0xe068); + VLOAD_16(v4, 0x02cc, 0xd99c, 0xdba2, 0xf282, 0x0f99, 0xa219, 0x2dcc, 0x17cc, + 0xe8fb, 0x1e83, 0xed20, 0xbfee, 0xee87, 0x6b0f, 0xf6cf, 0x4cd1); + VLOAD_16(v6, 0xe3f0, 0x42db, 0x2fde, 0x1983, 0x910c, 0x853b, 0x82aa, 0x9ac2, + 0x4631, 0x1f8b, 0x68c3, 0x6fbc, 0x3b5c, 0xf98b, 0x2db1, 0x8e75); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vmadd.vv v2, v4, v6, v0.t"); + VCMP_U16(6, v2, 0x1c20, 0x6dcb, 0xde38, 0x6761, 0x3eb5, 0xf452, 0x48e1, + 0x86f2, 0x3d2a, 0x378a, 0x3f07, 0x561a, 0x8812, 0x5e42, 0x56f4, + 0xa35d); + + VSET(16, e32, m4); + VLOAD_32(v4, 0x0401c584, 0x69049955, 0x4a71aa0c, 0xc651666f, 0x273fcd5d, + 0x23ca1d7d, 0x599c994e, 0xb2d8adc5, 0x4710afae, 0x69c61cad, + 0x96ee5026, 0x2c197996, 0xd95da451, 0x3a654fb9, 0xbe990e4b, + 0xc41fd55a); + VLOAD_32(v8, 0x39d5b56a, 0xc578a540, 0x51283b5c, 0x07b4ba9d, 0xe5aba5e4, + 0x28720dc8, 0x600fb42b, 0xf2937fa7, 0x4032d36f, 0xc676e3b3, + 0xf1cd5f96, 0x1c14bcbf, 0x7dea81ed, 0x40270562, 0x9577b3be, + 0xea615f0a); + VLOAD_32(v12, 0xa055bbb6, 0x71f9a668, 0x0be640c9, 0x2336ca55, 0xca121638, + 0xbf234fb5, 0xe7c83142, 0xb7048f12, 0x8eb340e3, 0xef253e93, + 0xffef4a03, 0xdf346833, 0xd0922181, 0xf159ee1d, 0xf86a7c06, + 0xfcb24a2d); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vmadd.vv v4, v8, v12, v0.t"); + VCMP_U32(7, v4, 0x0401c584, 0xf2cbc4a8, 0x4a71aa0c, 0xf69b4268, 0x273fcd5d, + 0xa233b25d, 0x599c994e, 0xe2b1a595, 0x4710afae, 0x95d6b28a, + 0x96ee5026, 0x6338471d, 0xd95da451, 0xabe00fef, 0xbe990e4b, + 0x913705b1); + + VSET(16, e64, m8); + VLOAD_64(v8, 0x9cffef345b95f00b, 0x85d366e07e4bbc6b, 0xadfda1d2464c6433, + 0x610bf2c1435b3cf6, 0x8a0c6e4bc950e81f, 0x4296e7147ef94d7a, + 0x27d7ec90ba159756, 0x2a6c87932c3aef86, 0xbfd90c33e58a8fe3, + 0x1114f7672cf625c1, 0x1a7b72dd8ac39fab, 0xdb80f952e5fd2e5b, + 0x6b01c18a3daf288b, 0x69b4b0e4335f26d5, 0x0c059f365ec6d3d5, + 0xc22568276f1dcdd0); + VLOAD_64(v16, 0x6dc8e88769e54465, 0xce8cda83d16c3859, 0x1465ee5b6eb0d2b8, + 0x4827a9b40add2507, 0xd24c4005695a64d6, 0xb97c8e41e912f84a, + 0xc8c22e3b3b2e2fa1, 0x26712aa325bd00b6, 0xdf7ad19151df27b5, + 0x68ba6d050ffcba1e, 0x94448979a2b854e6, 0x84bf5d544f97f739, + 0x6d4bfa429e9d6ef0, 0xdb6c54b9a91ab935, 0x1a0051ca72162c5e, + 0xe04b73fdf1b61f9c); + VLOAD_64(v24, 0x32a4c1edbbfe5591, 0xf6baf4e747f4a120, 0x3a29727ae38b9b92, + 0xf173f78d09c997e4, 0xaab9d34e4aeaa57a, 0xa8fe3bf12b7c95e8, + 0xc4bd99b066821092, 0x9c2f1daf5fe2db9d, 0xa8b041a876aabcae, + 0xb9a2e6f9ded9a60a, 0x8bdf55954f50101d, 0x704f0e648c11d63f, + 0x0c8ca4d0a6d1a982, 0xa74d01c12ae6aea5, 0x3f2cd5d2e2f5b538, + 0x79803b24efa2caa3); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vmadd.vv v8, v16, v24, v0.t"); + VCMP_U64(8, v8, 0x9cffef345b95f00b, 0xad447a1b99a48a53, 0xadfda1d2464c6433, + 0x813582af4d75d09e, 0x8a0c6e4bc950e81f, 0x36d90fe4df2f2b2c, + 0x27d7ec90ba159756, 0x955c2ac405b724e1, 0xbfd90c33e58a8fe3, + 0x10277404741c4ca8, 0x1a7b72dd8ac39fab, 0x58439c4175d7f582, + 0x6b01c18a3daf288b, 0xabfe86591f4ba5be, 0x0c059f365ec6d3d5, + 0xfb655e2263986563); +} + +void TEST_CASE3() { + VSET(16, e8, m1); + int64_t scalar = 5; + VLOAD_8(v1, 0x60, 0xe3, 0xa0, 0xb7, 0x35, 0x23, 0xa3, 0xf4, 0x5f, 0x6e, 0x07, + 0x01, 0xe7, 0x51, 0x53, 0x29); + VLOAD_8(v2, 0xfb, 0x1b, 0xc0, 0x36, 0xa7, 0xe0, 0xc8, 0x47, 0x57, 0xe0, 0x51, + 0xaa, 0xd2, 0x93, 0x83, 0xa8); + asm volatile("vmadd.vx v1, %[A], v2" ::[A] "r"(scalar)); + VCMP_U8(9, v1, 0xdb, 0x8a, 0xe0, 0xc9, 0xb0, 0x8f, 0xf7, 0x0b, 0x32, 0x06, + 0x74, 0xaf, 0x55, 0x28, 0x22, 0x75); + + VSET(16, e16, m2); + scalar = -5383; + VLOAD_16(v2, 0x992e, 0x9a07, 0x90c3, 0xf1ce, 0xd53c, 0x8f07, 0x2d2f, 0x5ab1, + 0x0a79, 0x0523, 0x6f34, 0xe5fd, 0xc95a, 0xca1c, 0x36bf, 0x16a1); + VLOAD_16(v4, 0x0a9f, 0x7ee0, 0x494e, 0xb6d0, 0x394c, 0xc8e7, 0xc117, 0x8108, + 0xb1af, 0x9f16, 0x22ab, 0xa244, 0xf1c9, 0xe363, 0x9bed, 0xa06f); + asm volatile("vmadd.vx v2, %[A], v4" ::[A] "r"(scalar)); + VCMP_U16(10, v2, 0x145d, 0xb5af, 0x54f9, 0x342e, 0x78a8, 0x4cb6, 0xa9ce, + 0x8131, 0x7b60, 0x9c21, 0xd43f, 0x9759, 0x0e53, 0x109f, 0x71b4, + 0xcd08); + + VSET(16, e32, m4); + scalar = 6474219; + VLOAD_32(v4, 0x709e784e, 0x8e13e48a, 0xad5df7fd, 0x738c8997, 0x0a0030d0, + 0x7569b952, 0x507fd5c7, 0x5d09af12, 0x0bf1c209, 0x7be6ed49, + 0x842ba667, 0x53360ec0, 0xd85d7415, 0xf20de61f, 0x153e7e16, + 0xec5512e4); + VLOAD_32(v8, 0xb2436fad, 0x6b162382, 0xd94eebe7, 0x9c43d906, 0xb80f178d, + 0x5cf91d42, 0x7764b8a3, 0x6269f72c, 0xb0dff3a6, 0x838d6893, + 0xa98a861e, 0x758b63de, 0xde488617, 0x371696ab, 0xc3ba8192, + 0x7ca33236); + asm volatile("vmadd.vx v4, %[A], v8" ::[A] "r"(scalar)); + VCMP_U32(11, v4, 0x8e0d1d47, 0xf29d4830, 0xb5213626, 0xb21bb5a3, 0xbc2f367d, + 0x18eb9d88, 0x91c53550, 0x69a6ceb2, 0xc09822e9, 0x66c98b96, + 0xf6b125ab, 0xef3fae1e, 0x4c40925e, 0x6b652c20, 0x998385c4, + 0x75d88d82); + + VSET(16, e64, m8); + scalar = -598189234597999223; + VLOAD_64(v8, 0x2a47beb4fd7729c5, 0x401c187818b15d1e, 0xbbaf5fe50c41f22a, + 0x31eaddea171055a9, 0x609cbc4a78316c29, 0xd7bb8f31d8b59d88, + 0x97860fd5fba018c0, 0x724cecf178bd2125, 0x866d16f96d3d8b67, + 0x56153b0315164a5a, 0x6962bde49e3edf3f, 0x9b3f792bfbf5f343, + 0x64cf433b239e7764, 0x583c3a4ae481fef0, 0x217e2df75fcf0d8d, + 0x935ac02069fe54ce); + VLOAD_64(v16, 0x0dc8fa1b817237e5, 0xc817934370de904d, 0xb015bdbf0f39ec01, + 0x3c7e70a75643cce5, 0x80c45834a5026c02, 0xcdf1fcd83b8133a0, + 0x9d31b9b802ae2db1, 0xba7e57975c5febf5, 0x8732f75adf268ddb, + 0x5ff488a4187bd3f3, 0x6a259fe666091333, 0x5afc4de057de51c4, + 0x8a479b7e3558e399, 0xbc21e79022996c26, 0xe2c7432cd7e3e81d, + 0xdab377ddbdfb2df7); + asm volatile("vmadd.vx v8, %[A], v16" ::[A] "r"(scalar)); + VCMP_U64(12, v8, 0x093861b79ac45352, 0xfd3c909decf66b5b, 0x04eb13132ce4267b, + 0xb258e6b065bbf956, 0x62775181e33422f3, 0xdc0ae0e371686968, + 0xf8db06270cad2c71, 0x6c3cc52cd1fb49c2, 0x41c19c0ac1b5a2fa, + 0x8867d35049c7b01d, 0x6d71fe0f35a1feea, 0xace16ac43ec0279f, + 0x82faf4a574c9dc1d, 0xa875c9d17e310a96, 0x1f75616001b61192, + 0x16ce205f44fb8635); +} + +void TEST_CASE4() { + VSET(16, e8, m1); + int64_t scalar = 5; + VLOAD_8(v1, 0x60, 0xe3, 0xa0, 0xb7, 0x35, 0x23, 0xa3, 0xf4, 0x5f, 0x6e, 0x07, + 0x01, 0xe7, 0x51, 0x53, 0x29); + VLOAD_8(v2, 0xfb, 0x1b, 0xc0, 0x36, 0xa7, 0xe0, 0xc8, 0x47, 0x57, 0xe0, 0x51, + 0xaa, 0xd2, 0x93, 0x83, 0xa8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vmadd.vx v1, %[A], v2, v0.t" ::[A] "r"(scalar)); + VCMP_U8(13, v1, 0x60, 0x8a, 0xa0, 0xc9, 0x35, 0x8f, 0xa3, 0x0b, 0x5f, 0x06, + 0x07, 0xaf, 0xe7, 0x28, 0x53, 0x75); + + VSET(16, e16, m2); + scalar = -5383; + VLOAD_16(v2, 0x992e, 0x9a07, 0x90c3, 0xf1ce, 0xd53c, 0x8f07, 0x2d2f, 0x5ab1, + 0x0a79, 0x0523, 0x6f34, 0xe5fd, 0xc95a, 0xca1c, 0x36bf, 0x16a1); + VLOAD_16(v4, 0x0a9f, 0x7ee0, 0x494e, 0xb6d0, 0x394c, 0xc8e7, 0xc117, 0x8108, + 0xb1af, 0x9f16, 0x22ab, 0xa244, 0xf1c9, 0xe363, 0x9bed, 0xa06f); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vmadd.vx v2, %[A], v4, v0.t" ::[A] "r"(scalar)); + VCMP_U16(14, v2, 0x992e, 0xb5af, 0x90c3, 0x342e, 0xd53c, 0x4cb6, 0x2d2f, + 0x8131, 0x0a79, 0x9c21, 0x6f34, 0x9759, 0xc95a, 0x109f, 0x36bf, + 0xcd08); + + VSET(16, e32, m4); + scalar = 6474219; + VLOAD_32(v4, 0x709e784e, 0x8e13e48a, 0xad5df7fd, 0x738c8997, 0x0a0030d0, + 0x7569b952, 0x507fd5c7, 0x5d09af12, 0x0bf1c209, 0x7be6ed49, + 0x842ba667, 0x53360ec0, 0xd85d7415, 0xf20de61f, 0x153e7e16, + 0xec5512e4); + VLOAD_32(v8, 0xb2436fad, 0x6b162382, 0xd94eebe7, 0x9c43d906, 0xb80f178d, + 0x5cf91d42, 0x7764b8a3, 0x6269f72c, 0xb0dff3a6, 0x838d6893, + 0xa98a861e, 0x758b63de, 0xde488617, 0x371696ab, 0xc3ba8192, + 0x7ca33236); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vmadd.vx v4, %[A], v8, v0.t" ::[A] "r"(scalar)); + VCMP_U32(15, v4, 0x709e784e, 0xf29d4830, 0xad5df7fd, 0xb21bb5a3, 0x0a0030d0, + 0x18eb9d88, 0x507fd5c7, 0x69a6ceb2, 0x0bf1c209, 0x66c98b96, + 0x842ba667, 0xef3fae1e, 0xd85d7415, 0x6b652c20, 0x153e7e16, + 0x75d88d82); + + VSET(16, e64, m8); + scalar = -598189234597999223; + VLOAD_64(v8, 0x2a47beb4fd7729c5, 0x401c187818b15d1e, 0xbbaf5fe50c41f22a, + 0x31eaddea171055a9, 0x609cbc4a78316c29, 0xd7bb8f31d8b59d88, + 0x97860fd5fba018c0, 0x724cecf178bd2125, 0x866d16f96d3d8b67, + 0x56153b0315164a5a, 0x6962bde49e3edf3f, 0x9b3f792bfbf5f343, + 0x64cf433b239e7764, 0x583c3a4ae481fef0, 0x217e2df75fcf0d8d, + 0x935ac02069fe54ce); + VLOAD_64(v16, 0x0dc8fa1b817237e5, 0xc817934370de904d, 0xb015bdbf0f39ec01, + 0x3c7e70a75643cce5, 0x80c45834a5026c02, 0xcdf1fcd83b8133a0, + 0x9d31b9b802ae2db1, 0xba7e57975c5febf5, 0x8732f75adf268ddb, + 0x5ff488a4187bd3f3, 0x6a259fe666091333, 0x5afc4de057de51c4, + 0x8a479b7e3558e399, 0xbc21e79022996c26, 0xe2c7432cd7e3e81d, + 0xdab377ddbdfb2df7); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vmadd.vx v8, %[A], v16, v0.t" ::[A] "r"(scalar)); + VCMP_U64(16, v8, 0x2a47beb4fd7729c5, 0xfd3c909decf66b5b, 0xbbaf5fe50c41f22a, + 0xb258e6b065bbf956, 0x609cbc4a78316c29, 0xdc0ae0e371686968, + 0x97860fd5fba018c0, 0x6c3cc52cd1fb49c2, 0x866d16f96d3d8b67, + 0x8867d35049c7b01d, 0x6962bde49e3edf3f, 0xace16ac43ec0279f, + 0x64cf433b239e7764, 0xa875c9d17e310a96, 0x217e2df75fcf0d8d, + 0x16ce205f44fb8635); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmand.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmand.c new file mode 100644 index 000000000..9e280b9f2 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmand.c @@ -0,0 +1,79 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + asm volatile("vmand.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(1, v1, 0x84, 0x21); +} + +void TEST_CASE2() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0xFF, 0xFF); + asm volatile("vmand.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(2, v1, 0xCD, 0xEF); +} + +void TEST_CASE3() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x00, 0x00); + asm volatile("vmand.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(3, v1, 0x00, 0x00); +} + +void TEST_CASE4() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x0F, 0xF0); + asm volatile("vmand.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(4, v1, 0x0D, 0xE0); +} + +void TEST_CASE5() { + VSET(16, e8, m1); + VLOAD_8(v1, 0xFF, 0xFF); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + VSET(13, e8, m1); + asm volatile("vmand.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(5, v1, 0x84, 0xE1); +} + +void TEST_CASE6() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF, 0xCD, 0xEF, 0xCD, 0xEF, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21, 0x84, 0x21, 0x84, 0x21, 0x84, 0x21); + asm volatile("vmand.mm v1, v2, v3"); + VSET(13, e8, m1); + VCLEAR(v2); + VCMP_U8(6, v2, 0, 0, 0, 0, 0, 0, 0, 0); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmandnot.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmandnot.c new file mode 100644 index 000000000..4952d9760 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmandnot.c @@ -0,0 +1,68 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + asm volatile("vmandnot.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(1, v1, 0x49, 0xCE); +} + +void TEST_CASE2() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0xFF, 0xFF); + asm volatile("vmandnot.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(2, v1, 0x00, 0x00); +} + +void TEST_CASE3() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x00, 0x00); + asm volatile("vmandnot.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(3, v1, 0xCD, 0xEF); +} + +void TEST_CASE4() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x0F, 0xF0); + asm volatile("vmandnot.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(4, v1, 0xC0, 0x0F); +} + +void TEST_CASE5() { + VSET(16, e8, m1); + VLOAD_8(v1, 0xFF, 0xFF); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + VSET(13, e8, m1); + asm volatile("vmandnot.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(5, v1, 0x49, 0xEE); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmax.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmax.c new file mode 100644 index 000000000..6348fc5f1 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmax.c @@ -0,0 +1,181 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_16(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + 50, 7000, 400, 19901); + asm volatile("vmax.vv v2, v4, v6"); + VCMP_I16(1, v2, 12345, 7000, 2560, 19901, 12345, 7000, 2560, 19901, 12345, + 7000, 2560, 19901, 12345, 7000, 2560, 19901); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_32(v12, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + asm volatile("vmax.vv v4, v8, v12"); + VCMP_I32(2, v4, 12345, 7000, 2560, 19901, 12345, 7000, 2560, 19901, 12345, + 7000, 2560, 19901, 12345, 7000, 2560, 19901); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_64(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + asm volatile("vmax.vv v8, v16, v24"); + VCMP_I64(3, v8, 12345, 7000, 2560, 19901, 12345, 7000, 2560, 19901, 12345, + 7000, 2560, 19901, 12345, 7000, 2560, 19901); +}; + +void TEST_CASE2(void) { + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_16(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef); + asm volatile("vmax.vv v2, v4, v6, v0.t"); + VCMP_I16(4, v2, 0xbeef, 0xbeef, 2560, 19901, 0xbeef, 0xbeef, 2560, 19901, + 0xbeef, 0xbeef, 2560, 19901, 0xbeef, 0xbeef, 2560, 19901); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_32(v12, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_32(v4, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef); + asm volatile("vmax.vv v4, v8, v12, v0.t"); + VCMP_I32(5, v4, 0xdeadbeef, 0xdeadbeef, 2560, 19901, 0xdeadbeef, 0xdeadbeef, + 2560, 19901, 0xdeadbeef, 0xdeadbeef, 2560, 19901, 0xdeadbeef, + 0xdeadbeef, 2560, 19901); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_64(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef); + asm volatile("vmax.vv v8, v16, v24, v0.t"); + VCMP_I64(6, v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 2560, 19901, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 2560, 19901, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 2560, 19901, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 2560, 19901); +}; + +void TEST_CASE3(void) { + const uint64_t scalar = 40; + + VSET(16, e8, m1); + VLOAD_8(v2, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, + -25, 99); + asm volatile("vmax.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_I8(7, v1, 123, 40, 40, 99, 123, 40, 40, 99, 123, 40, 40, 99, 123, 40, 40, + 99); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + asm volatile("vmax.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VCMP_I16(8, v2, 12345, 40, 40, 199, 12345, 40, 40, 199, 12345, 40, 40, 199, + 12345, 40, 40, 199); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + asm volatile("vmax.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VCMP_I32(9, v4, 12345, 40, 40, 199, 12345, 40, 40, 199, 12345, 40, 40, 199, + 12345, 40, 40, 199); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + asm volatile("vmax.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I64(10, v8, 12345, 40, 40, 199, 12345, 40, 40, 199, 12345, 40, 40, 199, + 12345, 40, 40, 199); +}; + +void TEST_CASE4(void) { + const uint64_t scalar = 40; + + VSET(16, e8, m1); + VLOAD_8(v2, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, + -25, 99); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_8(v1, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + 0xef, 0xef, 0xef, 0xef, 0xef); + asm volatile("vmax.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I8(11, v1, 0xef, 0xef, 40, 99, 0xef, 0xef, 40, 99, 0xef, 0xef, 40, 99, + 0xef, 0xef, 40, 99); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef); + asm volatile("vmax.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I16(12, v2, 0xbeef, 0xbeef, 40, 199, 0xbeef, 0xbeef, 40, 199, 0xbeef, + 0xbeef, 40, 199, 0xbeef, 0xbeef, 40, 199); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_32(v4, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef); + asm volatile("vmax.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I32(13, v4, 0xdeadbeef, 0xdeadbeef, 40, 199, 0xdeadbeef, 0xdeadbeef, 40, + 199, 0xdeadbeef, 0xdeadbeef, 40, 199, 0xdeadbeef, 0xdeadbeef, 40, + 199); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef); + asm volatile("vmax.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I64(14, v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 40, 199, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 40, 199, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 40, 199, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 40, 199); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmaxu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmaxu.c new file mode 100644 index 000000000..d71e3c8f1 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmaxu.c @@ -0,0 +1,181 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_16(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + 50, 7000, 400, 19901); + asm volatile("vmaxu.vv v2, v4, v6"); + VCMP_U16(1, v2, 12345, 7000, 2560, 19901, 12345, 7000, 2560, 19901, 12345, + 7000, 2560, 19901, 12345, 7000, 2560, 19901); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_32(v12, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + asm volatile("vmaxu.vv v4, v8, v12"); + VCMP_U32(2, v4, 12345, 7000, 2560, 19901, 12345, 7000, 2560, 19901, 12345, + 7000, 2560, 19901, 12345, 7000, 2560, 19901); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_64(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + asm volatile("vmaxu.vv v8, v16, v24"); + VCMP_U64(3, v8, 12345, 7000, 2560, 19901, 12345, 7000, 2560, 19901, 12345, + 7000, 2560, 19901, 12345, 7000, 2560, 19901); +}; + +void TEST_CASE2(void) { + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_16(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef); + asm volatile("vmaxu.vv v2, v4, v6, v0.t"); + VCMP_U16(4, v2, 0xbeef, 0xbeef, 2560, 19901, 0xbeef, 0xbeef, 2560, 19901, + 0xbeef, 0xbeef, 2560, 19901, 0xbeef, 0xbeef, 2560, 19901); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_32(v12, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_32(v4, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef); + asm volatile("vmaxu.vv v4, v8, v12, v0.t"); + VCMP_U32(5, v4, 0xdeadbeef, 0xdeadbeef, 2560, 19901, 0xdeadbeef, 0xdeadbeef, + 2560, 19901, 0xdeadbeef, 0xdeadbeef, 2560, 19901, 0xdeadbeef, + 0xdeadbeef, 2560, 19901); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_64(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef); + asm volatile("vmaxu.vv v8, v16, v24, v0.t"); + VCMP_U64(6, v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 2560, 19901, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 2560, 19901, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 2560, 19901, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 2560, 19901); +}; + +void TEST_CASE3(void) { + const uint64_t scalar = 40; + + VSET(16, e8, m1); + VLOAD_8(v2, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, + 199); + asm volatile("vmaxu.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U8(7, v1, 123, 40, 40, 199, 123, 40, 40, 199, 123, 40, 40, 199, 123, 40, + 40, 199); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + asm volatile("vmaxu.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VCMP_U16(8, v2, 12345, 40, 40, 199, 12345, 40, 40, 199, 12345, 40, 40, 199, + 12345, 40, 40, 199); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + asm volatile("vmaxu.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U32(9, v4, 12345, 40, 40, 199, 12345, 40, 40, 199, 12345, 40, 40, 199, + 12345, 40, 40, 199); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + asm volatile("vmaxu.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U64(10, v8, 12345, 40, 40, 199, 12345, 40, 40, 199, 12345, 40, 40, 199, + 12345, 40, 40, 199); +}; + +void TEST_CASE4(void) { + const uint64_t scalar = 40; + + VSET(16, e8, m1); + VLOAD_8(v2, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, + 199); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_8(v1, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + 0xef, 0xef, 0xef, 0xef, 0xef); + asm volatile("vmaxu.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(11, v1, 0xef, 0xef, 40, 199, 0xef, 0xef, 40, 199, 0xef, 0xef, 40, 199, + 0xef, 0xef, 40, 199); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef); + asm volatile("vmaxu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(12, v2, 0xbeef, 0xbeef, 40, 199, 0xbeef, 0xbeef, 40, 199, 0xbeef, + 0xbeef, 40, 199, 0xbeef, 0xbeef, 40, 199); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_32(v4, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef); + asm volatile("vmaxu.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(13, v4, 0xdeadbeef, 0xdeadbeef, 40, 199, 0xdeadbeef, 0xdeadbeef, 40, + 199, 0xdeadbeef, 0xdeadbeef, 40, 199, 0xdeadbeef, 0xdeadbeef, 40, + 199); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef); + asm volatile("vmaxu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(14, v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 40, 199, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 40, 199, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 40, 199, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 40, 199); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmerge.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmerge.c new file mode 100644 index 000000000..9e0eb91f7 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmerge.c @@ -0,0 +1,113 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v2, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + VLOAD_8(v0, 0xAA, 0x55); + asm volatile("vmerge.vvm v3, v1, v2, v0"); + VCMP_U8(1, v3, 1, 7, 3, 5, 5, 3, 7, 1, 8, 2, 6, 4, 4, 6, 2, 8); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + VLOAD_8(v0, 0xAA, 0x55); + asm volatile("vmerge.vvm v6, v2, v4, v0"); + VCMP_U16(2, v6, 1, 7, 3, 5, 5, 3, 7, 1, 8, 2, 6, 4, 4, 6, 2, 8); + + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + VLOAD_8(v0, 0xAA, 0x55); + asm volatile("vmerge.vvm v12, v4, v8, v0"); + VCMP_U32(3, v12, 1, 7, 3, 5, 5, 3, 7, 1, 8, 2, 6, 4, 4, 6, 2, 8); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + VLOAD_8(v0, 0xAA, 0x55); + asm volatile("vmerge.vvm v24, v8, v16, v0"); + VCMP_U64(4, v24, 1, 7, 3, 5, 5, 3, 7, 1, 8, 2, 6, 4, 4, 6, 2, 8); +} + +void TEST_CASE2() { + const uint64_t scalar = 0x00000000deadbeef; + + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0x55); + asm volatile("vmerge.vxm v3, v1, %[A], v0" ::[A] "r"(scalar)); + VCMP_U8(5, v3, 1, 0xef, 3, 0xef, 5, 0xef, 7, 0xef, 0xef, 2, 0xef, 4, 0xef, 6, + 0xef, 8); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0x55); + asm volatile("vmerge.vxm v4, v2, %[A], v0" ::[A] "r"(scalar)); + VCMP_U16(6, v4, 1, 0xbeef, 3, 0xbeef, 5, 0xbeef, 7, 0xbeef, 0xbeef, 2, 0xbeef, + 4, 0xbeef, 6, 0xbeef, 8); + + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0x55); + asm volatile("vmerge.vxm v8, v4, %[A], v0" ::[A] "r"(scalar)); + VCMP_U32(7, v8, 1, 0xdeadbeef, 3, 0xdeadbeef, 5, 0xdeadbeef, 7, 0xdeadbeef, + 0xdeadbeef, 2, 0xdeadbeef, 4, 0xdeadbeef, 6, 0xdeadbeef, 8); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0x55); + asm volatile("vmerge.vxm v16, v8, %[A], v0" ::[A] "r"(scalar)); + VCMP_U64(8, v16, 1, 0x00000000deadbeef, 3, 0x00000000deadbeef, 5, + 0x00000000deadbeef, 7, 0x00000000deadbeef, 0x00000000deadbeef, 2, + 0x00000000deadbeef, 4, 0x00000000deadbeef, 6, 0x00000000deadbeef, 8); +} + +void TEST_CASE3() { + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0x55); + asm volatile("vmerge.vim v3, v1, -1, v0"); + VCMP_U8(9, v3, 1, 0xff, 3, 0xff, 5, 0xff, 7, 0xff, 0xff, 2, 0xff, 4, 0xff, 6, + 0xff, 8); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0x55); + asm volatile("vmerge.vim v4, v2, -1, v0"); + VCMP_U16(10, v4, 1, 0xffff, 3, 0xffff, 5, 0xffff, 7, 0xffff, 0xffff, 2, + 0xffff, 4, 0xffff, 6, 0xffff, 8); + + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0x55); + asm volatile("vmerge.vim v8, v4, -1, v0"); + VCMP_U32(11, v8, 1, 0xffffffff, 3, 0xffffffff, 5, 0xffffffff, 7, 0xffffffff, + 0xffffffff, 2, 0xffffffff, 4, 0xffffffff, 6, 0xffffffff, 8); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0x55); + asm volatile("vmerge.vim v16, v8, -1, v0"); + VCMP_U64(12, v16, 1, 0xffffffffffffffff, 3, 0xffffffffffffffff, 5, + 0xffffffffffffffff, 7, 0xffffffffffffffff, 0xffffffffffffffff, 2, + 0xffffffffffffffff, 4, 0xffffffffffffffff, 6, 0xffffffffffffffff, 8); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmfeq.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmfeq.c new file mode 100644 index 000000000..f99187197 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmfeq.c @@ -0,0 +1,503 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot +// Matteo Perotti + +#include "float_macros.h" +#include "vector_macros.h" + +// This instruction writes a mask to a register, with a layout of elements as +// described in section "Mask Register Layout" +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 0.2434, 0.7285, 0.7241, -0.2678, 0.0027, -0.7114, 0.2622, + // 0.8701, -0.5786, -0.4229, 0.5981, 0.6968, 0.7217, -0.2842, + // 0.1328, 0.1659 + VLOAD_16(v4, 0x33ca, 0x39d4, 0x39cb, 0xb449, 0x1975, 0xb9b1, 0x3432, 0x3af6, + 0xb8a1, 0xb6c4, 0x38c9, 0x3993, 0x39c6, 0xb48c, 0x3040, 0x314f); + // 0.7319, 0.0590, 0.7593, -0.6606, -0.4758, 0.8530, 0.0453, + // 0.0987, 0.1777, 0.3047, 0.2330, -0.3467, -0.4153, 0.7080, + // 0.3142, -0.9492 + VLOAD_16(v6, 0x39db, 0x2b8c, 0x3a13, 0xb949, 0xb79d, 0x3ad3, 0x29cc, 0x2e51, + 0x31b0, 0x34e0, 0x3375, 0xb58c, 0xb6a5, 0x39aa, 0x3041, 0xbb98); + asm volatile("vmfeq.vv v2, v4, v6"); + VSET(1, e16, m2); + VCMP_U16(1, v2, 0x0); + + VSET(16, e32, m4); + // +0, sNaN, -0.34645590, -0.06222415, + // 0.96037650, -0.81018746, -0.69337404, 0.70466602, + // -0.30920035, -0.31596854, -0.92116749, 0.51336122, + // 0.22002794, 0.48599416, 0.69166088, 0.85755372 + VLOAD_32(v8, 0x00000000, 0xffffffff, 0xbeb162ab, 0xbd7edebf, 0x3f75db3c, + 0xbf4f6872, 0xbf3180f6, 0x3f3464fe, 0xbe9e4f82, 0xbea1c6a1, + 0xbf6bd1a2, 0x3f036ba4, 0x3e614f01, 0x3ef8d43a, 0x3f3110b0, + 0x3f5b88a4); + // -0, sNaN, 0.39402914, -0.81853813, + // 0.24656086, -0.71423489, -0.44735566, -0.25510681, + // -0.94378990, -0.30138883, 0.19188073, -0.29310879, + // -0.22981364, -0.58626360, -0.80913633, -0.00670803 + VLOAD_32(v12, 0x80000000, 0xffffffff, 0x3ec9be30, 0xbf518bb7, 0x3e7c7a73, + 0xbf36d819, 0xbee50bcd, 0xbe829d5c, 0xbf719c37, 0xbe9a4fa3, + 0x3e447c62, 0xbe96125b, 0xbe6b5444, 0xbf16155f, 0xbf4f238f, + 0xbbdbcefe); + asm volatile("vmfeq.vv v4, v8, v12"); + VSET(1, e16, m2); + VCMP_U16(2, v4, 0x1); + + VSET(16, e64, m8); + // 0.8643613633211786, 0.4842301798024149, 0.9229840140784857, + // -0.9479687162489723, -0.1308855743137316, + // -0.3798019472030296, 0.1570811980936915, + // -0.7665403705017886, -0.3736408604742532, 0.4947226024634424, + // -0.3032110323317654, 0.8998114670494881, 0.6283940115157876, + // 0.1053912590957002, -0.2936564640984622, 0.4329957213663693 + VLOAD_64(v16, 0x3feba8d9296c7e74, 0x3fdefda0947f3460, 0x3fed8915c5665532, + 0xbfee55c27d3d743e, 0xbfc0c0dbc6990b38, 0xbfd84eacd38c6ca4, + 0x3fc41b3c98507fe0, 0xbfe8877fabcbce12, 0xbfd7e9bb5b0beaf8, + 0x3fdfa988fd8b0a24, 0xbfd367cf3ee9af68, 0x3feccb416af162fc, + 0x3fe41bcdc20ecd40, 0x3fbafaebeb19acf0, 0xbfd2cb447b63f610, + 0x3fdbb633afa4e520); + // -0.3562510538138417, -0.0135629748736219, 0.6176167733891369, + // 0.9703747829163081, -0.0909539316920625, -0.1057326828885887, + // -0.8792039527057112, -0.1745056251010144, 0.3110320594479206, + // 0.3238986651420683, -0.9079294226891812, -0.9490909352855985, + // 0.6962970677624296, 0.7585780695949504, -0.5927175227484118, + // -0.7793965434104730 + VLOAD_64(v24, 0xbfd6ccd13852f170, 0xbf8bc6e7ac263f80, 0x3fed8915c5665532, + 0x3fef0d4f6aafa2f6, 0xbfb748c1c20f5de0, 0xbfbb114c0f1ff4b0, + 0xbfec227053ec5198, 0xbfc6563348637140, 0x3fd3e7f302d586b4, + 0x3fd4bac177803510, 0xbfed0dc20130d694, 0xbfee5ef3f3ff6a12, + 0x3fe64810c9cae3fe, 0x3fe84645840bf0a2, 0xbfe2f78abcff0ede, + 0xbfe8f0d105120796); + asm volatile("vmfeq.vv v8, v16, v24"); + VSET(1, e16, m2); + VCMP_U16(3, v8, 0x4); +}; + +// Simple random test with similar values + 1 subnormal (masked) +void TEST_CASE2(void) { + VSET(16, e16, m4); + // 0.2434, 0.7285, 0.7241, -0.2678, 0.0027, -0.7114, 0.2622, + // 0.8701, -0.5786, -0.4229, 0.5981, 0.6968, 0.7217, -0.2842, + // 0.1328, 0.1659 + VLOAD_16(v8, 0x33ca, 0x39d4, 0x39cb, 0xb449, 0x1975, 0xb9b1, 0x3432, 0x3af6, + 0xb8a1, 0xb6c4, 0x38c9, 0x3993, 0x39c6, 0xb48c, 0x3040, 0x314f); + // 0.7319, 0.7285, 0.7593, -0.6606, -0.4758, 0.8530, 0.0453, + // 0.0987, 0.1777, 0.3047, 0.2330, -0.3467, -0.4153, 0.7080, + // 0.3142, -0.9492 + VLOAD_16(v12, 0x39db, 0x39d4, 0x3a13, 0xb949, 0xb79d, 0x3ad3, 0x29cc, 0x2e51, + 0x31b0, 0x34e0, 0x3375, 0xb58c, 0xb6a5, 0x39aa, 0x3507, 0xbb98); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmfeq.vv v4, v8, v12, v0.t"); + VSET(1, e16, m2); + VCMP_U16(4, v4, 0x0002); + + VSET(16, e32, m4); + // 0x00000000, 0.09933749, -0.34645590, -0.06222415, + // 0.96037650, -0.81018746, -0.69337404, 0.70466602, + // -0.30920035, -0.31596854, -0.92116749, 0.51336122, + // 0.22002794, 0.48599416, 0.69166088, 0.85755372 + VLOAD_32(v8, 0x00000000, 0x3dcb7174, 0xbeb162ab, 0xbd7edebf, 0x3f75db3c, + 0xbf4f6872, 0xbf3180f6, 0x3f3464fe, 0xbe9e4f82, 0xbea1c6a1, + 0xbf6bd1a2, 0x3f036ba4, 0x3e614f01, 0x3ef8d43a, 0x3f3110b0, + 0x3f5d88a4); + // 0x00000000, -0.64782482, 0.39402914, -0.81853813, + // 0.24656086, -0.71423489, -0.44735566, -0.25510681, + // -0.94378990, -0.30138883, 0.19188073, -0.29310879, + // -0.22981364, -0.58626360, -0.80913633, 0.85755372 + VLOAD_32(v12, 0x00000000, 0xbf25d7d9, 0x3ec9be30, 0xbf518bb7, 0x3e7c7a73, + 0xbf36d819, 0xbee50bcd, 0xbe829d5c, 0xbf719c37, 0xbe9a4fa3, + 0x3e447c62, 0xbe96125b, 0xbe6b5444, 0xbf16155f, 0xbf4f238f, + 0x3f5d88a4); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmfeq.vv v4, v8, v12, v0.t"); + VSET(1, e16, m2); + VCMP_U16(5, v4, 0x8000); + + VSET(16, e64, m8); + // 0.8643613633211786, 0.4842301798024149, 0.9229840140784857, + // -0.9479687162489723, -0.1308855743137316, + // -0.3798019472030296, 0.1570811980936915, + // -0.7665403705017886, -0.3736408604742532, 0.4947226024634424, + // -0.3032110323317654, 0.8998114670494881, 0.6283940115157876, + // 0.1053912590957002, -0.2936564640984622, -0.7793965434104730 + VLOAD_64(v16, 0x3feba8d9296c7e74, 0x3fdefda0947f3460, 0x3fed8915c5665532, + 0xbfee55c27d3d743e, 0xbfc0c0dbc6990b38, 0xbfd84eacd38c6ca4, + 0x3fc41b3c98507fe0, 0xbfe8877fabcbce12, 0xbfd7e9bb5b0beaf8, + 0x3fdfa988fd8b0a24, 0xbfd367cf3ee9af68, 0x3feccb416af162fc, + 0x3fe41bcdc20ecd40, 0x3fbafaebeb19acf0, 0xbfd2cb447b63f610, + 0xbfe8f0d105120796); + // 0.8643613633211786, 0.4842301798024149, 0.6176167733891369, + // 0.9703747829163081, -0.0909539316920625, -0.1057326828885887, + // -0.8792039527057112, -0.1745056251010144, 0.3110320594479206, + // 0.3238986651420683, -0.9079294226891812, -0.9490909352855985, + // 0.6962970677624296, 0.7585780695949504, -0.5927175227484118, + // -0.7793965434104730 + VLOAD_64(v24, 0x3feba8d9296c7e74, 0x3fdefda0947f3460, 0x3fed8915c5665532, + 0xbfee55c27d3d743e, 0xbfb748c1c20f5de0, 0xbfbb114c0f1ff4b0, + 0xbfec227053ec5198, 0xbfc6563348637140, 0x3fd3e7f302d586b4, + 0x3fd4bac177803510, 0xbfed0dc20130d694, 0xbfee5ef3f3ff6a12, + 0x3fe64810c9cae3fe, 0x3fe84645840bf0a2, 0xbfe2f78abcff0ede, + 0xbfe8f0d105120796); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmfeq.vv v8, v16, v24, v0.t"); + VSET(1, e16, m2); + VCMP_U16(6, v8, 0x800a); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.2649 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xb43d); + // -0.0651, 0.5806, 0.2563, -0.4783, 0.7393, -0.2649, -0.4590, + // 0.5469, -0.9082, 0.6235, -0.8276, -0.7939, -0.0236, -0.1166, + // 0.4026, 0.0022 + VLOAD_16(v4, 0xac2a, 0x38a5, 0x341a, 0xb7a7, 0x39ea, 0xb43d, 0xb758, 0x3860, + 0xbb44, 0x38fd, 0xba9f, 0xba5a, 0xa60b, 0xaf76, 0x3671, 0x1896); + asm volatile("vmfeq.vf v2, v4, %[A]" ::[A] "f"(dscalar_16)); + VSET(1, e16, m2); + VCMP_U16(7, v2, 0x0020); + + VSET(16, e32, m4); + double dscalar_32; + // 0.80517912 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f4e2038); + // -0.15601152, -0.92020410, -0.29387674, 0.98594254, + // 0.88163614, -0.44641387, 0.88191622, 0.15161350, + // -0.79952192, -0.03668820, -0.38464722, -0.54745716, + // 0.09956384, 0.21655059, -0.37557366, -0.79342169 + VLOAD_32(v8, 0xbe1fc17c, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, + 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, + 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, + 0xbf4b1daf); + asm volatile("vmfeq.vf v4, v8, %[A]" ::[A] "f"(dscalar_32)); + VSET(1, e16, m2); + VCMP_U16(8, v4, 0x7ffe); + + VSET(16, e64, m8); + double dscalar_64; + // -0.3394093097660049 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfd5b8e1d359c984); + // 0.8852775142880511, -0.1502080091211320, + // -0.7804423569145378, 0.4585094341291300, + // 0.8417440789882031, -0.1215927835809432, 0.9442717441528423, + // -0.3993868853091622, 0.5719771249018739, + // 0.0497853851400327, 0.6627817945481365, 0.2150621318612425, + // -0.8506676370622683, -0.4531982633526939, + // 0.5943189287417812, -0.5034380636605356 + VLOAD_64(v16, 0x3fec543182780b14, 0xbfc33a041b62e250, 0xbfe8f9623feb8e20, + 0xbfd5b8e1d359c984, 0x3feaef91475b6422, 0xbfbf20b464e8e5d0, + 0x3fee377960758bfa, 0xbfd98f8e02b6aa78, 0x3fe24da2f8b06fde, + 0x3fa97d7851fd8b80, 0x3fe535822a7efd70, 0x3fcb8727eb79dda0, + 0xbfeb38ab561e5658, 0xbfdd013349ed0b50, 0x3fe304a9214adedc, + 0xbfe01c2a245f7960); + asm volatile("vmfeq.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); + VSET(1, e16, m2); + VCMP_U16(9, v8, 0x0008); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.2649 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xb43d); + // -0.2649, 0.5806, -0.2649, -0.4783, -0.2649, -0.2649, + // -0.2649, -0.2649, -0.2649, -0.2649, -0.2649, -0.2649, + // -0.2649, -0.2649, -0.2649, -0.2649, + VLOAD_16(v4, 0xb43d, 0x7653, 0xad3d, 0x033d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, + 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmfeq.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + VSET(1, e16, m2); + VCMP_U16(10, v2, 0xaaa0); + + VSET(16, e32, m4); + double dscalar_32; + // 0.80517912 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f4e2038); + // 0.80517912, 0.80517912, -0.29387674, 0.98594254, + // 0.88163614, -0.44641387, 0.88191622, 0.15161350, + // -0.79952192, -0.03668820, -0.38464722, -0.54745716, + // 0.09956384, 0.21655059, -0.37557366, -0.79342169 + VLOAD_32(v8, 0x3f4e2038, 0x3f4e2038, 0xbe967703, 0x3f7c66bb, 0x3f61b2e8, + 0xbee4905c, 0x3f61c543, 0x3e1b4092, 0xbf4cad78, 0xbd16465d, + 0xbec4f07b, 0xbf0c2627, 0x3dcbe820, 0x3e5dbf70, 0xbec04b31, + 0xbf4b1daf); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmfeq.vf v4, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + VSET(1, e16, m2); + VCMP_U16(11, v4, 0x0002); + + VSET(16, e64, m8); + double dscalar_64; + // -0.3394093097660049 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfd5b8e1d359c984); + // 0.8852775142880511, -0.1502080091211320, + // -0.7804423569145378, -0.3394093097660049, + // 0.8417440789882031, -0.1215927835809432, + // 0.9442717441528423, -0.3993868853091622, + // 0.5719771249018739, 0.0497853851400327, + // 0.6627817945481365, 0.2150621318612425, + // -0.8506676370622683, -0.4531982633526939, + // 0.5943189287417812, -0.5034380636605356 + VLOAD_64(v16, 0x3fec543182780b14, 0xbfc33a041b62e250, 0xbfe8f9623feb8e20, + 0xbfd5b8e1d359c984, 0x3feaef91475b6422, 0xbfbf20b464e8e5d0, + 0x3fee377960758bfa, 0xbfd98f8e02b6aa78, 0x3fe24da2f8b06fde, + 0x3fa97d7851fd8b80, 0x3fe535822a7efd70, 0x3fcb8727eb79dda0, + 0xbfeb38ab561e5658, 0xbfdd013349ed0b50, 0x3fe304a9214adedc, + 0xbfe01c2a245f7960); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmfeq.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); + VSET(1, e16, m2); + VCMP_U16(12, v8, 0x0008); +}; + +// Check if only the correct destination bits are written +void TEST_CASE5(void) { + // Fill 64-bits with 1 + VSET(1, e64, m1); + VLOAD_64(v1, 0xffffffffffffffff); + // Perform vmfeq.vv on 16 different elements, and then check that the last (64 + // - 16 = 48) bits were not overwritten with zeroes + VSET(16, e16, m1); + // 0.2434, 0.7285, 0.7241, -0.2678, 0.0027, -0.7114, 0.2622, + // 0.8701, -0.5786, -0.4229, 0.5981, 0.6968, 0.7217, -0.2842, + // 0.1328, 0.1659 + VLOAD_16(v2, 0x33ca, 0x39d4, 0x39cb, 0xb449, 0x1975, 0xb9b1, 0x3432, 0x3af6, + 0xb8a1, 0xb6c4, 0x38c9, 0x3993, 0x39c6, 0xb48c, 0x3040, 0x314f); + // 0.7319, 0.0590, 0.7593, -0.6606, -0.4758, 0.8530, 0.0453, + // 0.0987, 0.1777, 0.3047, 0.2330, -0.3467, -0.4153, 0.7080, + // 0.3142, -0.9492 + VLOAD_16(v3, 0x33ca, 0x2b8c, 0x3a13, 0xb949, 0xb79d, 0x3ad3, 0x29cc, 0x2e51, + 0x31b0, 0x34e0, 0x3375, 0xb58c, 0xb6a5, 0x39aa, 0x3041, 0xbb98); + asm volatile("vmfeq.vv v1, v2, v3"); + VSET(1, e64, m1); + VCMP_U64(13, v1, 0xffffffffffff0001); + + // Fill 64-bits with 1 + VSET(1, e64, m1); + VLOAD_64(v1, 0xffffffffffffffff); + // Perform vmfeq.vv on 16 different elements, and then check that the last (64 + // - 16 = 48) bits were not overwritten with zeroes + VSET(16, e32, m1); + // -0.72077256, sNaN, -0.34645590, -0.06222415, + // 0.96037650, -0.81018746, -0.69337404, 0.70466602, + // -0.30920035, -0.31596854, -0.92116749, 0.51336122, + // 0.22002794, 0.48599416, 0.69166088, 0.85755372 + VLOAD_32(v2, 0x70000000, 0xffffffff, 0xbeb162ab, 0xbd7edebf, 0x3f75db3c, + 0xbf4f6872, 0xbf3180f6, 0x3f3464fe, 0xbe9e4f82, 0xbea1c6a1, + 0xbf6bd1a2, 0x3f036ba4, 0x3e614f01, 0x3ef8d43a, 0x3f3110b0, + 0x3f5b88a4); + // 0.79994357, sNaN, -0.34645590, -0.81853813, + // 0.24656086, -0.71423489, -0.44735566, -0.25510681, + // -0.94378990, -0.30138883, 0.19188073, -0.29310879, + // -0.22981364, -0.58626360, -0.80913633, -0.00670803 + VLOAD_32(v3, 0x80000000, 0xffffffff, 0xbeb162ab, 0xbf518bb7, 0x3e7c7a73, + 0xbf36d819, 0xbee50bcd, 0xbe829d5c, 0xbf719c37, 0xbe9a4fa3, + 0x3e447c62, 0xbe96125b, 0xbe6b5444, 0xbf16155f, 0xbf4f238f, + 0xbbdbcefe); + asm volatile("vmfeq.vv v1, v2, v3"); + VSET(1, e64, m1); + VCMP_U64(14, v1, 0xffffffffffff0004); + + // Fill 64-bits with 1 + VSET(1, e64, m1); + VLOAD_64(v1, 0xffffffffffffffff); + // Perform vmfeq.vv on 16 different elements, and then check that the last (64 + // - 16 = 48) bits were not overwritten with zeroes + VSET(16, e64, m1); + // 0.8643613633211786, 0.4842301798024149, 0.9229840140784857, + // -0.9479687162489723, -0.1308855743137316, + // -0.3798019472030296, 0.1570811980936915, + // -0.7665403705017886, -0.3736408604742532, 0.4947226024634424, + // -0.3032110323317654, 0.8998114670494881, 0.6283940115157876, + // 0.1053912590957002, -0.2936564640984622, 0.4329957213663693 + VLOAD_64(v2, 0x3feba8d9296c7e74, 0x3fdefda0947f3460, 0xbf3180f63f75db3c, + 0xbfee55c27d3d743e, 0xbfc0c0dbc6990b38, 0xbfd84eacd38c6ca4, + 0x3fc41b3c98507fe0, 0xbfe8877fabcbce12, 0xbfd7e9bb5b0beaf8, + 0x3fdfa988fd8b0a24, 0xbfd367cf3ee9af68, 0x3feccb416af162fc, + 0x3fe41bcdc20ecd40, 0x3fbafaebeb19acf0, 0xbfd2cb447b63f610, + 0x3fdbb633afa4e520); + // 0.8643613633211786, -0.0135629748736219, 0.6176167733891369, + // 0.9703747829163081, -0.0909539316920625, -0.1057326828885887, + // -0.8792039527057112, -0.1745056251010144, 0.3110320594479206, + // 0.3238986651420683, -0.9079294226891812, -0.9490909352855985, + // 0.6962970677624296, 0.7585780695949504, -0.5927175227484118, + // -0.7793965434104730 + VLOAD_64(v3, 0x3feba8d9296c7e74, 0xbf8bc6e7ac263f80, 0x3fed8915c5665532, + 0x3fef0d4f6aafa2f6, 0xbfb748c1c20f5de0, 0xbfbb114c0f1ff4b0, + 0xbfec227053ec5198, 0xbfc6563348637140, 0x3fd3e7f302d586b4, + 0x3fd4bac177803510, 0xbfed0dc20130d694, 0xbfee5ef3f3ff6a12, + 0x3fe64810c9cae3fe, 0x3fe84645840bf0a2, 0xbfe2f78abcff0ede, + 0xbfe8f0d105120796); + asm volatile("vmfeq.vv v1, v2, v3"); + VSET(1, e64, m1); + VCMP_U64(15, v1, 0xffffffffffff0001); +}; + +// Write to v0 during a masked operation, WAR dependency should be respected +void TEST_CASE6(void) { + VSET(16, e16, m2); + // 0.2434, 0.7285, 0.7241, 0.7241, 0.0027, -0.7114, 0.8701, + // 0.8701, -0.5786, -0.4229, 0.6968, 0.6968, 0.7217, -0.2842, + // 0.1659, 0.1659 + VLOAD_16(v4, 0x33ca, 0x39d4, 0x39cb, 0xb449, 0x1975, 0xb9b1, 0x3af6, 0x3af6, + 0xb8a1, 0xb6c4, 0x3993, 0x3993, 0x39c6, 0xb48c, 0x314f, 0x314f); + // 0.2434, 0.7285, -0.2678, -0.2678, 0.0027, -0.7114, 0.2622, + // 0.2622, -0.5786, -0.4229, 0.5981, 0.5981, 0.7217, -0.2842, + // 0.1328, 0.1328 + VLOAD_16(v6, 0x33ca, 0x39d4, 0xb449, 0x39cb, 0x1975, 0xb9b1, 0x3432, 0x3432, + 0xb8a1, 0xb6c4, 0x38c9, 0x38c9, 0x39c6, 0xb48c, 0x3040, 0x3040); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmfeq.vv v0, v4, v6, v0.t"); + VSET(1, e16, m2); + VCMP_U16(16, v0, 0x2222); + + VSET(16, e32, m4); + // 0x00000000, 0.09933749, -0.34645590, -0.06222415, + // 0.96037650, -0.81018746, -0.69337404, 0.70466602, + // -0.30920035, -0.31596854, -0.92116749, 0.51336122, + // 0.22002794, 0.48599416, 0.69166088, 0.85755372 + VLOAD_32(v8, 0x00000000, 0x3dcb7174, 0xbeb162ab, 0xbd7edebf, 0x3f75db3c, + 0xbf4f6872, 0xbf3180f6, 0x3f3464fe, 0xbe9e4f82, 0xbea1c6a1, + 0xbf6bd1a2, 0x3f036ba4, 0x3e614f01, 0x3ef8d43a, 0x3f3110b0, + 0x3f5d88a4); + // 0x00000000, 0.09933749, 0.39402914, -0.81853813, + // 0.96037650, -0.81018746, -0.44735566, -0.25510681, + // -0.30920035, -0.31596854, 0.19188073, -0.29310879, + // 0.22002794, 0.48599416, -0.80913633, -0.30138883 + VLOAD_32(v12, 0x00000000, 0x3dcb7174, 0x3ec9be30, 0xbf518bb7, 0x3f75db3c, + 0xbf4f6872, 0xbee50bcd, 0xbe829d5c, 0xbe9e4f82, 0xbea1c6a1, + 0x3e447c62, 0xbe96125b, 0x3e614f01, 0x3ef8d43a, 0xbf4f238f, + 0xbe9a4fa3); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmfeq.vv v0, v8, v12, v0.t"); + VSET(1, e16, m2); + VCMP_U16(17, v0, 0x2222); + + VSET(16, e64, m8); + // 0.8643613633211786, 0.4842301798024149, 0.9229840140784857, + // -0.8792039527057112, -0.1308855743137316, + // -0.3798019472030296, 0.1570811980936915, + // -0.7665403705017886, -0.3736408604742532, 0.4947226024634424, + // -0.3032110323317654, 0.8998114670494881, 0.6283940115157876, + // 0.1053912590957002, -0.2936564640984622, -0.7793965434104730 + VLOAD_64(v16, 0x3feba8d9296c7e74, 0x3fdefda0947f3460, 0x3fed8915c5665532, + 0xbfec227053ec5198, 0xbfc0c0dbc6990b38, 0xbfd84eacd38c6ca4, + 0x3fc41b3c98507fe0, 0xbfe8877fabcbce12, 0xbfd7e9bb5b0beaf8, + 0x3fdfa988fd8b0a24, 0xbfd367cf3ee9af68, 0x3feccb416af162fc, + 0x3fe41bcdc20ecd40, 0x3fbafaebeb19acf0, 0xbfd2cb447b63f610, + 0xbfe8f0d105120796); + // 0.8643613633211786, 0.4842301798024149, -0.8792039527057112, + // 0.9703747829163081, -0.1308855743137316, -0.3798019472030296, + // -0.8792039527057112, -0.1745056251010144, + // -0.3736408604742532, 0.4947226024634424, + // -0.9079294226891812, -0.9490909352855985, 0.6283940115157876, + // 0.1053912590957002, -0.5927175227484118, -0.3032110323317654 + VLOAD_64(v24, 0x3feba8d9296c7e74, 0x3fdefda0947f3460, 0xbfec227053ec5198, + 0x9fee55c27d3d743e, 0xbfc0c0dbc6990b38, 0xbfd84eacd38c6ca4, + 0xbfec227053ec5198, 0xbfc6563348637140, 0xbfd7e9bb5b0beaf8, + 0x3fdfa988fd8b0a24, 0xbfed0dc20130d694, 0xbfee5ef3f3ff6a12, + 0x3fe41bcdc20ecd40, 0x3fbafaebeb19acf0, 0xbfe2f78abcff0ede, + 0xbfd367cf3ee9af68); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmfeq.vv v0, v16, v24, v0.t"); + VSET(1, e16, m2); + VCMP_U16(18, v0, 0x2222); +}; + +// Test sNaN/qNaN behaviour +void TEST_CASE7(void) { + CLEAR_FFLAGS; + // First, give only qNaN (no exception is generated) + VSET(16, e16, m2); + CHECK_FFLAGS(0); + VLOAD_16(v4, qNaNh, qNaNh, 0x39cb, qNaNh, 0x1975, 0xb9b1, 0x3af6, 0x3af6, + 0xb8a1, 0xb6c4, 0x3993, 0x3993, qNaNh, 0xb48c, qNaNh, qNaNh); + VLOAD_16(v6, 0x33ca, qNaNh, qNaNh, 0x39cb, 0x1975, 0xb9b1, 0x3432, 0x3432, + 0xb8a1, 0xb6c4, 0x38c9, 0x38c9, 0x39c6, qNaNh, qNaNh, 0x3040); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmfeq.vv v0, v4, v6"); + VSET(1, e16, m2); + VCMP_U16(19, v0, 0x0330); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x3f75db3c, qNaNf, qNaNf, qNaNf, 0x3f75db3c, 0xbf4f6872, + 0xbf3180f6, 0x3f3464fe, 0xbe9e4f82, 0xbea1c6a1, 0xbf6bd1a2, + 0x3f036ba4, qNaNf, qNaNf, 0x3f3110b0, qNaNf); + VLOAD_32(v12, 0x3f75db3c, 0x3dcb7174, qNaNf, 0xbf518bb7, 0x3f75db3c, + 0xbf4f6872, 0xbee50bcd, 0xbe829d5c, 0xbe9e4f82, 0xbea1c6a1, + 0x3e447c62, 0xbe96125b, qNaNf, 0x3ef8d43a, qNaNf, qNaNf); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmfeq.vv v0, v8, v12"); + VSET(1, e16, m2); + VCMP_U16(20, v0, 0x0331); + + VSET(16, e64, m8); + VLOAD_64(v16, qNaNd, qNaNd, 0x3fed8915c5665532, 0xbfec227053ec5198, + 0xbfc0c0dbc6990b38, 0xbfd84eacd38c6ca4, 0x3fc41b3c98507fe0, + 0xbfe8877fabcbce12, 0xbfd7e9bb5b0beaf8, 0x3fdfa988fd8b0a24, + 0xbfd367cf3ee9af68, 0x3feccb416af162fc, qNaNd, qNaNd, + 0xbfd2cb447b63f610, qNaNd); + VLOAD_64(v24, qNaNd, 0x3fdefda0947f3460, qNaNd, 0x9fee55c27d3d743e, + 0xbfc0c0dbc6990b38, 0xbfd84eacd38c6ca4, 0xbfec227053ec5198, + 0xbfc6563348637140, 0xbfd7e9bb5b0beaf8, 0x3fdfa988fd8b0a24, + 0xbfed0dc20130d694, 0xbfee5ef3f3ff6a12, qNaNd, 0x3fbafaebeb19acf0, + qNaNd, qNaNd); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmfeq.vv v0, v16, v24"); + VSET(1, e16, m1); + VCMP_U16(21, v0, 0x0330); + CHECK_FFLAGS(0); + + // Give sNaN (Invalid operation) + VSET(16, e32, m4); + VLOAD_32(v8, 0x3f75db3c, sNaNf, sNaNf, qNaNf, 0x3f75db3c, 0xbf4f6872, + 0xbf3180f6, 0x3f3464fe, 0xbe9e4f82, 0xbea1c6a1, 0xbf6bd1a2, + 0x3f036ba4, qNaNf, qNaNf, 0x3f3110b0, qNaNf); + VLOAD_32(v12, 0x3f75db3c, 0x3dcb7174, qNaNf, 0xbf518bb7, 0x3f75db3c, + 0xbf4f6872, 0xbee50bcd, 0xbe829d5c, 0xbe9e4f82, 0xbea1c6a1, + 0x3e447c62, 0xbe96125b, qNaNf, 0x3ef8d43a, qNaNf, qNaNf); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmfeq.vv v0, v8, v12"); + VSET(1, e16, m2); + VCMP_U16(22, v0, 0x0331); + CHECK_FFLAGS(NV); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + // TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmfge.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmfge.c new file mode 100644 index 000000000..07b4943f2 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmfge.c @@ -0,0 +1,134 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot +// Matteo Perotti + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values (vector-scalar) +void TEST_CASE1(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.2649 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xb43d); + // -0.0651, 0.5806, 0.2563, -0.4783, 0.7393, -0.2649, -0.4590, + // 0.5469, -0.9082, 0.6235, -0.8276, -0.7939, -0.0236, -0.1166, + // 0.4026, 0.0022 + VLOAD_16(v4, 0xac2a, 0x38a5, 0x341a, 0xb7a7, 0x39ea, 0xb43d, 0xb758, 0x3860, + 0xbb44, 0x38fd, 0xba9f, 0xba5a, 0xa60b, 0xaf76, 0x3671, 0x1896); + asm volatile("vmfge.vf v2, v4, %[A]" ::[A] "f"(dscalar_16)); + VSET(1, e16, m2); + VCMP_U16(1, v2, 0xf2b7); + + VSET(16, e32, m4); + double dscalar_32; + // 0.80517912 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f4e2038); + // -0.15601152, -0.92020410, -0.29387674, 0.98594254, + // 0.88163614, -0.44641387, 0.88191622, 0.15161350, + // -0.79952192, -0.03668820, -0.38464722, -0.54745716, + // 0.09956384, 0.21655059, -0.37557366, -0.79342169 + VLOAD_32(v8, 0xbe1fc17c, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, + 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, + 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, + 0xbf4b1daf); + asm volatile("vmfge.vf v4, v8, %[A]" ::[A] "f"(dscalar_32)); + VSET(1, e16, m2); + VCMP_U16(2, v4, 0x7ffe); + + VSET(16, e64, m8); + double dscalar_64; + // -0.3394093097660049 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfd5b8e1d359c984); + // 0.8852775142880511, -0.1502080091211320, + // -0.7804423569145378, 0.4585094341291300, + // 0.8417440789882031, -0.1215927835809432, 0.9442717441528423, + // -0.3993868853091622, 0.5719771249018739, + // 0.0497853851400327, 0.6627817945481365, 0.2150621318612425, + // -0.8506676370622683, -0.4531982633526939, + // 0.5943189287417812, -0.5034380636605356 + VLOAD_64(v16, 0x3fec543182780b14, 0xbfc33a041b62e250, 0xbfe8f9623feb8e20, + 0xbfd5b8e1d359c984, 0x3feaef91475b6422, 0xbfbf20b464e8e5d0, + 0x3fee377960758bfa, 0xbfd98f8e02b6aa78, 0x3fe24da2f8b06fde, + 0x3fa97d7851fd8b80, 0x3fe535822a7efd70, 0x3fcb8727eb79dda0, + 0xbfeb38ab561e5658, 0xbfdd013349ed0b50, 0x3fe304a9214adedc, + 0xbfe01c2a245f7960); + asm volatile("vmfge.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); + VSET(1, e16, m2); + VCMP_U16(3, v8, 0x4f7b); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE2(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.2649 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xb43d); + // -0.2649, 0.5806, -0.2649, -0.4783, -0.2649, -0.2649, + // -0.2649, -0.2649, -0.2649, -0.2649, -0.2649, -0.2649, + // -0.2649, -0.2649, -0.2649, -0.2649, + VLOAD_16(v4, 0xb43d, 0x7653, 0xad3d, 0x033d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, + 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmfge.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + VSET(1, e16, m2); + VCMP_U16(4, v2, 0xaaaa); + + VSET(16, e32, m4); + double dscalar_32; + // 0.80517912 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f4e2038); + // 0.80517912, 0.80517912, -0.29387674, 0.98594254, + // 0.88163614, -0.44641387, 0.88191622, 0.15161350, + // -0.79952192, -0.03668820, -0.38464722, -0.54745716, + // 0.09956384, 0.21655059, -0.37557366, -0.79342169 + VLOAD_32(v8, 0x3f4e2038, 0x3f4e2038, 0xbe967703, 0x3f7c66bb, 0x3f61b2e8, + 0xbee4905c, 0x3f61c543, 0x3e1b4092, 0xbf4cad78, 0xbd16465d, + 0xbec4f07b, 0xbf0c2627, 0x3dcbe820, 0x3e5dbf70, 0xbec04b31, + 0xbf4b1daf); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmfge.vf v4, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + VSET(1, e16, m2); + VCMP_U16(5, v4, 0x000a); + + VSET(16, e64, m8); + double dscalar_64; + // -0.3394093097660049 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfd5b8e1d359c984); + // 0.8852775142880511, -0.1502080091211320, + // -0.7804423569145378, -0.3394093097660049, + // 0.8417440789882031, -0.1215927835809432, + // 0.9442717441528423, -0.3993868853091622, + // 0.5719771249018739, 0.0497853851400327, + // 0.6627817945481365, 0.2150621318612425, + // -0.8506676370622683, -0.4531982633526939, + // 0.5943189287417812, -0.5034380636605356 + VLOAD_64(v16, 0x3fec543182780b14, 0xbfc33a041b62e250, 0xbfe8f9623feb8e20, + 0xbfd5b8e1d359c984, 0x3feaef91475b6422, 0xbfbf20b464e8e5d0, + 0x3fee377960758bfa, 0xbfd98f8e02b6aa78, 0x3fe24da2f8b06fde, + 0x3fa97d7851fd8b80, 0x3fe535822a7efd70, 0x3fcb8727eb79dda0, + 0xbfeb38ab561e5658, 0xbfdd013349ed0b50, 0x3fe304a9214adedc, + 0xbfe01c2a245f7960); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmfge.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); + VSET(1, e16, m2); + VCMP_U16(6, v8, 0x0a2a); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmfgt.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmfgt.c new file mode 100644 index 000000000..2e401cd5a --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmfgt.c @@ -0,0 +1,134 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot +// Matteo Perotti + +#include "float_macros.h" +#include "vector_macros.h" + +// Simple random test with similar values (vector-scalar) +void TEST_CASE1(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.2649 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xb43d); + // -0.0651, 0.5806, 0.2563, -0.4783, 0.7393, -0.2649, -0.4590, + // 0.5469, -0.9082, 0.6235, -0.8276, -0.7939, -0.0236, -0.1166, + // 0.4026, 0.0022 + VLOAD_16(v4, 0xac2a, 0x38a5, 0x341a, 0xb7a7, 0x39ea, 0xb43d, 0xb758, 0x3860, + 0xbb44, 0x38fd, 0xba9f, 0xba5a, 0xa60b, 0xaf76, 0x3671, 0x1896); + asm volatile("vmfgt.vf v2, v4, %[A]" ::[A] "f"(dscalar_16)); + VSET(1, e16, m2); + VCMP_U16(1, v2, 0xf297); + + VSET(16, e32, m4); + double dscalar_32; + // 0.80517912 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f4e2038); + // -0.15601152, -0.92020410, -0.29387674, 0.98594254, + // 0.88163614, -0.44641387, 0.88191622, 0.15161350, + // -0.79952192, -0.03668820, -0.38464722, -0.54745716, + // 0.09956384, 0.21655059, -0.37557366, -0.79342169 + VLOAD_32(v8, 0xbe1fc17c, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, + 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, + 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, + 0xbf4b1daf); + asm volatile("vmfgt.vf v4, v8, %[A]" ::[A] "f"(dscalar_32)); + VSET(1, e16, m2); + VCMP_U16(2, v4, 0x0000); + + VSET(16, e64, m8); + double dscalar_64; + // -0.3394093097660049 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfd5b8e1d359c984); + // 0.8852775142880511, -0.1502080091211320, + // -0.7804423569145378, 0.4585094341291300, + // 0.8417440789882031, -0.1215927835809432, 0.9442717441528423, + // -0.3993868853091622, 0.5719771249018739, + // 0.0497853851400327, 0.6627817945481365, 0.2150621318612425, + // -0.8506676370622683, -0.4531982633526939, + // 0.5943189287417812, -0.5034380636605356 + VLOAD_64(v16, 0x3fec543182780b14, 0xbfc33a041b62e250, 0xbfe8f9623feb8e20, + 0xbfd5b8e1d359c984, 0x3feaef91475b6422, 0xbfbf20b464e8e5d0, + 0x3fee377960758bfa, 0xbfd98f8e02b6aa78, 0x3fe24da2f8b06fde, + 0x3fa97d7851fd8b80, 0x3fe535822a7efd70, 0x3fcb8727eb79dda0, + 0xbfeb38ab561e5658, 0xbfdd013349ed0b50, 0x3fe304a9214adedc, + 0xbfe01c2a245f7960); + asm volatile("vmfgt.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); + VSET(1, e16, m2); + VCMP_U16(3, v8, 0x4f73); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE2(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.2649 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xb43d); + // -0.2649, 0.5806, -0.2649, -0.4783, -0.2649, -0.2649, + // -0.2649, -0.2649, -0.2649, -0.2649, -0.2649, -0.2649, + // -0.2649, -0.2649, -0.2649, -0.2649, + VLOAD_16(v4, 0xb43d, 0x7653, 0xad3d, 0x033d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, + 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmfgt.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + VSET(1, e16, m2); + VCMP_U16(4, v2, 0x000a); + + VSET(16, e32, m4); + double dscalar_32; + // 0.80517912 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f4e2038); + // 0.80517912, 0.80517912, -0.29387674, 0.98594254, + // 0.88163614, -0.44641387, 0.88191622, 0.15161350, + // -0.79952192, -0.03668820, -0.38464722, -0.54745716, + // 0.09956384, 0.21655059, -0.37557366, -0.79342169 + VLOAD_32(v8, 0x3f4e2038, 0x3f4e2038, 0xbe967703, 0x3f7c66bb, 0x3f61b2e8, + 0xbee4905c, 0x3f61c543, 0x3e1b4092, 0xbf4cad78, 0xbd16465d, + 0xbec4f07b, 0xbf0c2627, 0x3dcbe820, 0x3e5dbf70, 0xbec04b31, + 0xbf4b1daf); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmfgt.vf v4, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + VSET(1, e16, m2); + VCMP_U16(5, v4, 0x0008); + + VSET(16, e64, m8); + double dscalar_64; + // -0.3394093097660049 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfd5b8e1d359c984); + // 0.8852775142880511, -0.1502080091211320, + // -0.7804423569145378, -0.3394093097660049, + // 0.8417440789882031, -0.1215927835809432, + // 0.9442717441528423, -0.3993868853091622, + // 0.5719771249018739, 0.0497853851400327, + // 0.6627817945481365, 0.2150621318612425, + // -0.8506676370622683, -0.4531982633526939, + // 0.5943189287417812, -0.5034380636605356 + VLOAD_64(v16, 0x3fec543182780b14, 0xbfc33a041b62e250, 0xbfe8f9623feb8e20, + 0xbfd5b8e1d359c984, 0x3feaef91475b6422, 0xbfbf20b464e8e5d0, + 0x3fee377960758bfa, 0xbfd98f8e02b6aa78, 0x3fe24da2f8b06fde, + 0x3fa97d7851fd8b80, 0x3fe535822a7efd70, 0x3fcb8727eb79dda0, + 0xbfeb38ab561e5658, 0xbfdd013349ed0b50, 0x3fe304a9214adedc, + 0xbfe01c2a245f7960); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmfgt.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); + VSET(1, e16, m2); + VCMP_U16(6, v8, 0x0a22); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmfle.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmfle.c new file mode 100644 index 000000000..d8bbce14a --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmfle.c @@ -0,0 +1,273 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot +// Matteo Perotti + +#include "float_macros.h" +#include "vector_macros.h" + +// This instruction writes a mask to a register, with a layout of elements as +// described in section "Mask Register Layout" +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 0.2434, 0.7285, 0.7241, -0.2678, 0.0027, -0.7114, 0.2622, + // 0.8701, -0.5786, -0.4229, 0.5981, 0.6968, 0.7217, -0.2842, + // 0.1328, 0.1659 + VLOAD_16(v4, 0x33ca, 0x39d4, 0x39cb, 0xb449, 0x1975, 0xb9b1, 0x3432, 0x3af6, + 0xb8a1, 0xb6c4, 0x38c9, 0x3993, 0x39c6, 0xb48c, 0x3040, 0x314f); + // 0.7319, 0.0590, 0.7593, -0.6606, -0.4758, 0.8530, 0.0453, + // 0.0987, 0.1777, 0.3047, 0.2330, -0.3467, -0.4153, 0.7080, + // 0.3142, -0.9492 + VLOAD_16(v6, 0x39db, 0x2b8c, 0x3a13, 0xb949, 0xb79d, 0x3ad3, 0x29cc, 0x2e51, + 0x31b0, 0x34e0, 0x3375, 0xb58c, 0xb6a5, 0x39aa, 0x3041, 0xbb98); + asm volatile("vmfle.vv v2, v4, v6"); + VSET(1, e16, m2); + VCMP_U16(1, v2, 0x6325); + + VSET(16, e32, m4); + // +0, sNaN, -0.34645590, -0.06222415, + // 0.96037650, -0.81018746, -0.69337404, 0.70466602, + // -0.30920035, -0.31596854, -0.92116749, 0.51336122, + // 0.22002794, 0.48599416, 0.69166088, 0.85755372 + VLOAD_32(v8, 0x00000000, 0xffffffff, 0xbeb162ab, 0xbd7edebf, 0x3f75db3c, + 0xbf4f6872, 0xbf3180f6, 0x3f3464fe, 0xbe9e4f82, 0xbea1c6a1, + 0xbf6bd1a2, 0x3f036ba4, 0x3e614f01, 0x3ef8d43a, 0x3f3110b0, + 0x3f5b88a4); + // -0, sNaN, 0.39402914, -0.81853813, + // 0.24656086, -0.71423489, -0.44735566, -0.25510681, + // -0.94378990, -0.30138883, 0.19188073, -0.29310879, + // -0.22981364, -0.58626360, -0.80913633, -0.00670803 + VLOAD_32(v12, 0x80000000, 0xffffffff, 0x3ec9be30, 0xbf518bb7, 0x3e7c7a73, + 0xbf36d819, 0xbee50bcd, 0xbe829d5c, 0xbf719c37, 0xbe9a4fa3, + 0x3e447c62, 0xbe96125b, 0xbe6b5444, 0xbf16155f, 0xbf4f238f, + 0xbbdbcefe); + asm volatile("vmfle.vv v4, v8, v12"); + VSET(1, e16, m2); + VCMP_U16(2, v4, 0x0665); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x3feba8d9296c7e74, 0x3fdefda0947f3460, 0x3fed8915c5665532, + 0xbfee55c27d3d743e, 0xbfc0c0dbc6990b38, 0xbfd84eacd38c6ca4, + 0x3fc41b3c98507fe0, 0xbfe8877fabcbce12, 0xbfd7e9bb5b0beaf8, + 0x3fdfa988fd8b0a24, 0xbfd367cf3ee9af68, 0x3feccb416af162fc, + 0x3fe41bcdc20ecd40, 0x3fbafaebeb19acf0, 0xbfd2cb447b63f610, + 0x3fdbb633afa4e520); + // -0.3562510538138417, -0.0135629748736219, 0.6176167733891369, + // 0.9703747829163081, -0.0909539316920625, -0.1057326828885887, + // -0.8792039527057112, -0.1745056251010144, 0.3110320594479206, + // 0.3238986651420683, -0.9079294226891812, -0.9490909352855985, + // 0.6962970677624296, 0.7585780695949504, -0.5927175227484118, + // -0.7793965434104730 + VLOAD_64(v24, 0xbfd6ccd13852f170, 0xbf8bc6e7ac263f80, 0x3fed8915c5665532, + 0x3fef0d4f6aafa2f6, 0xbfb748c1c20f5de0, 0xbfbb114c0f1ff4b0, + 0xbfec227053ec5198, 0xbfc6563348637140, 0x3fd3e7f302d586b4, + 0x3fd4bac177803510, 0xbfed0dc20130d694, 0xbfee5ef3f3ff6a12, + 0x3fe64810c9cae3fe, 0x3fe84645840bf0a2, 0xbfe2f78abcff0ede, + 0xbfe8f0d105120796); + asm volatile("vmfle.vv v8, v16, v24"); + VSET(1, e16, m2); + VCMP_U16(3, v8, 0x31bc); +}; + +// Simple random test with similar values + 1 subnormal (masked) +void TEST_CASE2(void) { + VSET(16, e16, m2); + // 0.2434, 0.7285, 0.7241, -0.2678, 0.0027, -0.7114, 0.2622, + // 0.8701, -0.5786, -0.4229, 0.5981, 0.6968, 0.7217, -0.2842, + // 0.1328, 0.1659 + VLOAD_16(v4, 0x33ca, 0x39d4, 0x39cb, 0xb449, 0x1975, 0xb9b1, 0x3432, 0x3af6, + 0xb8a1, 0xb6c4, 0x38c9, 0x3993, 0x39c6, 0xb48c, 0x3040, 0x314f); + // 0.7319, 0.7285, 0.7593, -0.6606, -0.4758, 0.8530, 0.0453, + // 0.0987, 0.1777, 0.3047, 0.2330, -0.3467, -0.4153, 0.7080, + // 0.3142, -0.9492 + VLOAD_16(v6, 0x39db, 0x39d4, 0x3a13, 0xb949, 0xb79d, 0x3ad3, 0x29cc, 0x2e51, + 0x31b0, 0x34e0, 0x3375, 0xb58c, 0xb6a5, 0x39aa, 0x3507, 0xbb98); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmfle.vv v2, v4, v6, v0.t"); + VSET(1, e16, m2); + VCMP_U16(4, v2, 0x2222); + + VSET(16, e32, m4); + // 0x00000000, 0.09933749, -0.34645590, -0.06222415, + // 0.96037650, -0.81018746, -0.69337404, 0.70466602, + // -0.30920035, -0.31596854, -0.92116749, 0.51336122, + // 0.22002794, 0.48599416, 0.69166088, 0.85755372 + VLOAD_32(v8, 0x00000000, 0x3dcb7174, 0xbeb162ab, 0xbd7edebf, 0x3f75db3c, + 0xbf4f6872, 0xbf3180f6, 0x3f3464fe, 0xbe9e4f82, 0xbea1c6a1, + 0xbf6bd1a2, 0x3f036ba4, 0x3e614f01, 0x3ef8d43a, 0x3f3110b0, + 0x3f5d88a4); + // 0x00000000, -0.64782482, 0.39402914, -0.81853813, + // 0.24656086, -0.71423489, -0.44735566, -0.25510681, + // -0.94378990, -0.30138883, 0.19188073, -0.29310879, + // -0.22981364, -0.58626360, -0.80913633, 0.85755372 + VLOAD_32(v12, 0x00000000, 0xbf25d7d9, 0x3ec9be30, 0xbf518bb7, 0x3e7c7a73, + 0xbf36d819, 0xbee50bcd, 0xbe829d5c, 0xbf719c37, 0xbe9a4fa3, + 0x3e447c62, 0xbe96125b, 0xbe6b5444, 0xbf16155f, 0xbf4f238f, + 0x3f5d88a4); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmfle.vv v4, v8, v12, v0.t"); + VSET(1, e16, m2); + VCMP_U16(5, v4, 0x8220); + + VSET(16, e64, m8); + // 0.8643613633211786, 0.4842301798024149, 0.9229840140784857, + // -0.9479687162489723, -0.1308855743137316, + // -0.3798019472030296, 0.1570811980936915, + // -0.7665403705017886, -0.3736408604742532, 0.4947226024634424, + // -0.3032110323317654, 0.8998114670494881, 0.6283940115157876, + // 0.1053912590957002, -0.2936564640984622, -0.7793965434104730 + VLOAD_64(v16, 0x3feba8d9296c7e74, 0x3fdefda0947f3460, 0x3fed8915c5665532, + 0xbfee55c27d3d743e, 0xbfc0c0dbc6990b38, 0xbfd84eacd38c6ca4, + 0x3fc41b3c98507fe0, 0xbfe8877fabcbce12, 0xbfd7e9bb5b0beaf8, + 0x3fdfa988fd8b0a24, 0xbfd367cf3ee9af68, 0x3feccb416af162fc, + 0x3fe41bcdc20ecd40, 0x3fbafaebeb19acf0, 0xbfd2cb447b63f610, + 0xbfe8f0d105120796); + // 0.8643613633211786, 0.4842301798024149, 0.6176167733891369, + // 0.9703747829163081, -0.0909539316920625, -0.1057326828885887, + // -0.8792039527057112, -0.1745056251010144, 0.3110320594479206, + // 0.3238986651420683, -0.9079294226891812, -0.9490909352855985, + // 0.6962970677624296, 0.7585780695949504, -0.5927175227484118, + // -0.7793965434104730 + VLOAD_64(v24, 0x3feba8d9296c7e74, 0x3fdefda0947f3460, 0x3fed8915c5665532, + 0xbfee55c27d3d743e, 0xbfb748c1c20f5de0, 0xbfbb114c0f1ff4b0, + 0xbfec227053ec5198, 0xbfc6563348637140, 0x3fd3e7f302d586b4, + 0x3fd4bac177803510, 0xbfed0dc20130d694, 0xbfee5ef3f3ff6a12, + 0x3fe64810c9cae3fe, 0x3fe84645840bf0a2, 0xbfe2f78abcff0ede, + 0xbfe8f0d105120796); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmfle.vv v8, v16, v24, v0.t"); + VSET(1, e16, m2); + VCMP_U16(6, v8, 0xa0aa); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.2649 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xb43d); + // -0.0651, 0.5806, 0.2563, -0.4783, 0.7393, -0.2649, -0.4590, + // 0.5469, -0.9082, 0.6235, -0.8276, -0.7939, -0.0236, -0.1166, + // 0.4026, 0.0022 + VLOAD_16(v4, 0xac2a, 0x38a5, 0x341a, 0xb7a7, 0x39ea, 0xb43d, 0xb758, 0x3860, + 0xbb44, 0x38fd, 0xba9f, 0xba5a, 0xa60b, 0xaf76, 0x3671, 0x1896); + asm volatile("vmfle.vf v2, v4, %[A]" ::[A] "f"(dscalar_16)); + VSET(1, e16, m2); + VCMP_U16(7, v2, 0x0d68); + + VSET(16, e32, m4); + double dscalar_32; + // 0.80517912 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f4e2038); + // -0.15601152, -0.92020410, -0.29387674, 0.98594254, + // 0.88163614, -0.44641387, 0.88191622, 0.15161350, + // -0.79952192, -0.03668820, -0.38464722, -0.54745716, + // 0.09956384, 0.21655059, -0.37557366, -0.79342169 + VLOAD_32(v8, 0xbe1fc17c, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, + 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, + 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, + 0xbf4b1daf); + asm volatile("vmfle.vf v4, v8, %[A]" ::[A] "f"(dscalar_32)); + VSET(1, e16, m2); + VCMP_U16(8, v4, 0xffff); + + VSET(16, e64, m8); + double dscalar_64; + // -0.3394093097660049 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfd5b8e1d359c984); + // 0.8852775142880511, -0.1502080091211320, + // -0.7804423569145378, 0.4585094341291300, + // 0.8417440789882031, -0.1215927835809432, 0.9442717441528423, + // -0.3993868853091622, 0.5719771249018739, + // 0.0497853851400327, 0.6627817945481365, 0.2150621318612425, + // -0.8506676370622683, -0.4531982633526939, + // 0.5943189287417812, -0.5034380636605356 + VLOAD_64(v16, 0x3fec543182780b14, 0xbfc33a041b62e250, 0xbfe8f9623feb8e20, + 0xbfd5b8e1d359c984, 0x3feaef91475b6422, 0xbfbf20b464e8e5d0, + 0x3fee377960758bfa, 0xbfd98f8e02b6aa78, 0x3fe24da2f8b06fde, + 0x3fa97d7851fd8b80, 0x3fe535822a7efd70, 0x3fcb8727eb79dda0, + 0xbfeb38ab561e5658, 0xbfdd013349ed0b50, 0x3fe304a9214adedc, + 0xbfe01c2a245f7960); + asm volatile("vmfle.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); + VSET(1, e16, m2); + VCMP_U16(9, v8, 0xb08c); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.2649 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xb43d); + // -0.2649, 0.5806, -0.2649, -0.4783, -0.2649, -0.2649, + // -0.2649, -0.2649, -0.2649, -0.2649, -0.2649, -0.2649, + // -0.2649, -0.2649, -0.2649, -0.2649, + VLOAD_16(v4, 0xb43d, 0x7653, 0xad3d, 0x033d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, + 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmfle.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + VSET(1, e16, m2); + VCMP_U16(10, v2, 0xaaa0); + + VSET(16, e32, m4); + double dscalar_32; + // 0.80517912 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f4e2038); + // 0.80517912, 0.80517912, -0.29387674, 0.98594254, + // 0.88163614, -0.44641387, 0.88191622, 0.15161350, + // -0.79952192, -0.03668820, -0.38464722, -0.54745716, + // 0.09956384, 0.21655059, -0.37557366, -0.79342169 + VLOAD_32(v8, 0x3f4e2038, 0x3f4e2038, 0xbe967703, 0x3f7c66bb, 0x3f61b2e8, + 0xbee4905c, 0x3f61c543, 0x3e1b4092, 0xbf4cad78, 0xbd16465d, + 0xbec4f07b, 0xbf0c2627, 0x3dcbe820, 0x3e5dbf70, 0xbec04b31, + 0xbf4b1daf); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmfle.vf v4, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + VSET(1, e16, m2); + VCMP_U16(11, v4, 0xaaa2); + + VSET(16, e64, m8); + double dscalar_64; + // -0.3394093097660049 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfd5b8e1d359c984); + // 0.8852775142880511, -0.1502080091211320, + // -0.7804423569145378, -0.3394093097660049, + // 0.8417440789882031, -0.1215927835809432, + // 0.9442717441528423, -0.3993868853091622, + // 0.5719771249018739, 0.0497853851400327, + // 0.6627817945481365, 0.2150621318612425, + // -0.8506676370622683, -0.4531982633526939, + // 0.5943189287417812, -0.5034380636605356 + VLOAD_64(v16, 0x3fec543182780b14, 0xbfc33a041b62e250, 0xbfe8f9623feb8e20, + 0xbfd5b8e1d359c984, 0x3feaef91475b6422, 0xbfbf20b464e8e5d0, + 0x3fee377960758bfa, 0xbfd98f8e02b6aa78, 0x3fe24da2f8b06fde, + 0x3fa97d7851fd8b80, 0x3fe535822a7efd70, 0x3fcb8727eb79dda0, + 0xbfeb38ab561e5658, 0xbfdd013349ed0b50, 0x3fe304a9214adedc, + 0xbfe01c2a245f7960); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmfle.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); + VSET(1, e16, m8); + VCMP_U16(12, v8, 0xa088); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmflt.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmflt.c new file mode 100644 index 000000000..d168798ec --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmflt.c @@ -0,0 +1,279 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot +// Matteo Perotti + +#include "float_macros.h" +#include "vector_macros.h" + +// This instruction writes a mask to a register, with a layout of elements as +// described in section "Mask Register Layout" +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 0.2434, 0.7285, 0.7241, -0.2678, 0.0027, -0.7114, 0.2622, + // 0.8701, -0.5786, -0.4229, 0.5981, 0.6968, 0.7217, -0.2842, + // 0.1328, 0.1659 + VLOAD_16(v4, 0x33ca, 0x39d4, 0x39cb, 0xb449, 0x1975, 0xb9b1, 0x3432, 0x3af6, + 0xb8a1, 0xb6c4, 0x38c9, 0x3993, 0x39c6, 0xb48c, 0x3040, 0x314f); + // 0.7319, 0.0590, 0.7593, -0.6606, -0.4758, 0.8530, 0.0453, + // 0.0987, 0.1777, 0.3047, 0.2330, -0.3467, -0.4153, 0.7080, + // 0.3142, -0.9492 + VLOAD_16(v6, 0x39db, 0x2b8c, 0x3a13, 0xb949, 0xb79d, 0x3ad3, 0x29cc, 0x2e51, + 0x31b0, 0x34e0, 0x3375, 0xb58c, 0xb6a5, 0x39aa, 0x3041, 0xbb98); + asm volatile("vmflt.vv v2, v4, v6"); + VSET(1, e16, m2); + VCMP_U16(1, v2, 0x6325); + + VSET(16, e32, m4); + // +0, sNaN, -0.34645590, -0.06222415, + // 0.96037650, -0.81018746, -0.69337404, 0.70466602, + // -0.30920035, -0.31596854, -0.92116749, 0.51336122, + // 0.22002794, 0.48599416, 0.69166088, 0.85755372 + VLOAD_32(v8, 0x00000000, 0xffffffff, 0xbeb162ab, 0xbd7edebf, 0x3f75db3c, + 0xbf4f6872, 0xbf3180f6, 0x3f3464fe, 0xbe9e4f82, 0xbea1c6a1, + 0xbf6bd1a2, 0x3f036ba4, 0x3e614f01, 0x3ef8d43a, 0x3f3110b0, + 0x3f5b88a4); + // -0, sNaN, 0.39402914, -0.81853813, + // 0.24656086, -0.71423489, -0.44735566, -0.25510681, + // -0.94378990, -0.30138883, 0.19188073, -0.29310879, + // -0.22981364, -0.58626360, -0.80913633, -0.00670803 + VLOAD_32(v12, 0x80000000, 0xffffffff, 0x3ec9be30, 0xbf518bb7, 0x3e7c7a73, + 0xbf36d819, 0xbee50bcd, 0xbe829d5c, 0xbf719c37, 0xbe9a4fa3, + 0x3e447c62, 0xbe96125b, 0xbe6b5444, 0xbf16155f, 0xbf4f238f, + 0xbbdbcefe); + asm volatile("vmflt.vv v4, v8, v12"); + VSET(1, e16, m2); + VCMP_U16(2, v4, 0x0664); + + VSET(16, e64, m8); + // 0.8643613633211786, 0.4842301798024149, 0.9229840140784857, + // -0.9479687162489723, -0.1308855743137316, + // -0.3798019472030296, 0.1570811980936915, + // -0.7665403705017886, -0.3736408604742532, 0.4947226024634424, + // -0.3032110323317654, 0.8998114670494881, 0.6283940115157876, + // 0.1053912590957002, -0.2936564640984622, 0.4329957213663693 + VLOAD_64(v16, 0x3feba8d9296c7e74, 0x3fdefda0947f3460, 0x3fed8915c5665532, + 0xbfee55c27d3d743e, 0xbfc0c0dbc6990b38, 0xbfd84eacd38c6ca4, + 0x3fc41b3c98507fe0, 0xbfe8877fabcbce12, 0xbfd7e9bb5b0beaf8, + 0x3fdfa988fd8b0a24, 0xbfd367cf3ee9af68, 0x3feccb416af162fc, + 0x3fe41bcdc20ecd40, 0x3fbafaebeb19acf0, 0xbfd2cb447b63f610, + 0x3fdbb633afa4e520); + // -0.3562510538138417, -0.0135629748736219, 0.6176167733891369, + // 0.9703747829163081, -0.0909539316920625, -0.1057326828885887, + // -0.8792039527057112, -0.1745056251010144, 0.3110320594479206, + // 0.3238986651420683, -0.9079294226891812, -0.9490909352855985, + // 0.6962970677624296, 0.7585780695949504, -0.5927175227484118, + // -0.7793965434104730 + VLOAD_64(v24, 0xbfd6ccd13852f170, 0xbf8bc6e7ac263f80, 0x3fed8915c5665532, + 0x3fef0d4f6aafa2f6, 0xbfb748c1c20f5de0, 0xbfbb114c0f1ff4b0, + 0xbfec227053ec5198, 0xbfc6563348637140, 0x3fd3e7f302d586b4, + 0x3fd4bac177803510, 0xbfed0dc20130d694, 0xbfee5ef3f3ff6a12, + 0x3fe64810c9cae3fe, 0x3fe84645840bf0a2, 0xbfe2f78abcff0ede, + 0xbfe8f0d105120796); + asm volatile("vmflt.vv v8, v16, v24"); + VSET(1, e16, m2); + VCMP_U16(3, v8, 0x31b8); +}; + +// Simple random test with similar values + 1 subnormal (masked) +void TEST_CASE2(void) { + VSET(16, e16, m2); + // 0.2434, 0.7285, 0.7241, -0.2678, 0.0027, -0.7114, 0.2622, + // 0.8701, -0.5786, -0.4229, 0.5981, 0.6968, 0.7217, -0.2842, + // 0.1328, 0.1659 + VLOAD_16(v4, 0x33ca, 0x39d4, 0x39cb, 0xb449, 0x1975, 0xb9b1, 0x3432, 0x3af6, + 0xb8a1, 0xb6c4, 0x38c9, 0x3993, 0x39c6, 0xb48c, 0x3040, 0x314f); + // 0.7319, 0.7285, 0.7593, -0.6606, -0.4758, 0.8530, 0.0453, + // 0.0987, 0.1777, 0.3047, 0.2330, -0.3467, -0.4153, 0.7080, + // 0.3142, -0.9492 + VLOAD_16(v6, 0x39db, 0x39d4, 0x3a13, 0xb949, 0xb79d, 0x3ad3, 0x29cc, 0x2e51, + 0x31b0, 0x34e0, 0x3375, 0xb58c, 0xb6a5, 0x39aa, 0x3507, 0xbb98); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmflt.vv v2, v4, v6, v0.t"); + VSET(1, e16, m2); + VCMP_U16(4, v2, 0x2220); + + VSET(16, e32, m4); + // 0x00000000, 0.09933749, -0.34645590, -0.06222415, + // 0.96037650, -0.81018746, -0.69337404, 0.70466602, + // -0.30920035, -0.31596854, -0.92116749, 0.51336122, + // 0.22002794, 0.48599416, 0.69166088, 0.85755372 + VLOAD_32(v8, 0x00000000, 0x3dcb7174, 0xbeb162ab, 0xbd7edebf, 0x3f75db3c, + 0xbf4f6872, 0xbf3180f6, 0x3f3464fe, 0xbe9e4f82, 0xbea1c6a1, + 0xbf6bd1a2, 0x3f036ba4, 0x3e614f01, 0x3ef8d43a, 0x3f3110b0, + 0x3f5d88a4); + // 0x00000000, -0.64782482, 0.39402914, -0.81853813, + // 0.24656086, -0.71423489, -0.44735566, -0.25510681, + // -0.94378990, -0.30138883, 0.19188073, -0.29310879, + // -0.22981364, -0.58626360, -0.80913633, 0.85755372 + VLOAD_32(v12, 0x00000000, 0xbf25d7d9, 0x3ec9be30, 0xbf518bb7, 0x3e7c7a73, + 0xbf36d819, 0xbee50bcd, 0xbe829d5c, 0xbf719c37, 0xbe9a4fa3, + 0x3e447c62, 0xbe96125b, 0xbe6b5444, 0xbf16155f, 0xbf4f238f, + 0x3f5d88a4); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmflt.vv v4, v8, v12, v0.t"); + VSET(1, e16, m2); + VCMP_U16(5, v4, 0x0220); + + VSET(16, e64, m8); + // 0.8643613633211786, 0.4842301798024149, 0.9229840140784857, + // -0.9479687162489723, -0.1308855743137316, + // -0.3798019472030296, 0.1570811980936915, + // -0.7665403705017886, -0.3736408604742532, 0.4947226024634424, + // -0.3032110323317654, 0.8998114670494881, 0.6283940115157876, + // 0.1053912590957002, -0.2936564640984622, -0.7793965434104730 + VLOAD_64(v16, 0x3feba8d9296c7e74, 0x3fdefda0947f3460, 0x3fed8915c5665532, + 0xbfee55c27d3d743e, 0xbfc0c0dbc6990b38, 0xbfd84eacd38c6ca4, + 0x3fc41b3c98507fe0, 0xbfe8877fabcbce12, 0xbfd7e9bb5b0beaf8, + 0x3fdfa988fd8b0a24, 0xbfd367cf3ee9af68, 0x3feccb416af162fc, + 0x3fe41bcdc20ecd40, 0x3fbafaebeb19acf0, 0xbfd2cb447b63f610, + 0xbfe8f0d105120796); + // 0.8643613633211786, 0.4842301798024149, 0.6176167733891369, + // 0.9703747829163081, -0.0909539316920625, -0.1057326828885887, + // -0.8792039527057112, -0.1745056251010144, 0.3110320594479206, + // 0.3238986651420683, -0.9079294226891812, -0.9490909352855985, + // 0.6962970677624296, 0.7585780695949504, -0.5927175227484118, + // -0.7793965434104730 + VLOAD_64(v24, 0x3feba8d9296c7e74, 0x3fdefda0947f3460, 0x3fed8915c5665532, + 0xbfee55c27d3d743e, 0xbfb748c1c20f5de0, 0xbfbb114c0f1ff4b0, + 0xbfec227053ec5198, 0xbfc6563348637140, 0x3fd3e7f302d586b4, + 0x3fd4bac177803510, 0xbfed0dc20130d694, 0xbfee5ef3f3ff6a12, + 0x3fe64810c9cae3fe, 0x3fe84645840bf0a2, 0xbfe2f78abcff0ede, + 0xbfe8f0d105120796); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmflt.vv v8, v16, v24, v0.t"); + VSET(1, e16, m2); + VCMP_U16(6, v8, 0x20a0); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.2649 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xb43d); + // -0.0651, 0.5806, 0.2563, -0.4783, 0.7393, -0.2649, -0.4590, + // 0.5469, -0.9082, 0.6235, -0.8276, -0.7939, -0.0236, -0.1166, + // 0.4026, 0.0022 + VLOAD_16(v4, 0xac2a, 0x38a5, 0x341a, 0xb7a7, 0x39ea, 0xb43d, 0xb758, 0x3860, + 0xbb44, 0x38fd, 0xba9f, 0xba5a, 0xa60b, 0xaf76, 0x3671, 0x1896); + asm volatile("vmflt.vf v2, v4, %[A]" ::[A] "f"(dscalar_16)); + VSET(1, e16, m2); + VCMP_U16(7, v2, 0x0d48); + + VSET(16, e32, m4); + double dscalar_32; + // 0.80517912 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f4e2038); + // -0.15601152, -0.92020410, -0.29387674, 0.98594254, + // 0.88163614, -0.44641387, 0.88191622, 0.15161350, + // -0.79952192, -0.03668820, -0.38464722, -0.54745716, + // 0.09956384, 0.21655059, -0.37557366, -0.79342169 + VLOAD_32(v8, 0xbe1fc17c, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, + 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, + 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, + 0xbf4b1daf); + asm volatile("vmflt.vf v4, v8, %[A]" ::[A] "f"(dscalar_32)); + VSET(1, e16, m2); + VCMP_U16(8, v4, 0x8001); + + VSET(16, e64, m8); + double dscalar_64; + // -0.3394093097660049 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfd5b8e1d359c984); + // 0.8852775142880511, -0.1502080091211320, + // -0.7804423569145378, 0.4585094341291300, + // 0.8417440789882031, -0.1215927835809432, 0.9442717441528423, + // -0.3993868853091622, 0.5719771249018739, + // 0.0497853851400327, 0.6627817945481365, 0.2150621318612425, + // -0.8506676370622683, -0.4531982633526939, + // 0.5943189287417812, -0.5034380636605356 + VLOAD_64(v16, 0x3fec543182780b14, 0xbfc33a041b62e250, 0xbfe8f9623feb8e20, + 0xbfd5b8e1d359c984, 0x3feaef91475b6422, 0xbfbf20b464e8e5d0, + 0x3fee377960758bfa, 0xbfd98f8e02b6aa78, 0x3fe24da2f8b06fde, + 0x3fa97d7851fd8b80, 0x3fe535822a7efd70, 0x3fcb8727eb79dda0, + 0xbfeb38ab561e5658, 0xbfdd013349ed0b50, 0x3fe304a9214adedc, + 0xbfe01c2a245f7960); + asm volatile("vmflt.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); + VSET(1, e16, m2); + VCMP_U16(9, v8, 0xb084); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.2649 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xb43d); + // -0.2649, 0.5806, -0.2649, -0.4783, -0.2649, -0.2649, + // -0.2649, -0.2649, -0.2649, -0.2649, -0.2649, -0.2649, + // -0.2649, -0.2649, -0.2649, -0.2649, + VLOAD_16(v4, 0xb43d, 0x7653, 0xad3d, 0x033d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, + 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmflt.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + VSET(1, e16, m2); + VCMP_U16(10, v2, 0x0000); + + VSET(16, e32, m4); + double dscalar_32; + // 0.80517912 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f4e2038); + // 0.80517912, 0.80517912, -0.29387674, 0.98594254, + // 0.88163614, -0.44641387, 0.88191622, 0.15161350, + // -0.79952192, -0.03668820, -0.38464722, -0.54745716, + // 0.09956384, 0.21655059, -0.37557366, -0.79342169 + VLOAD_32(v8, 0x3f4e2038, 0x3f4e2038, 0xbe967703, 0x3f7c66bb, 0x3f61b2e8, + 0xbee4905c, 0x3f61c543, 0x3e1b4092, 0xbf4cad78, 0xbd16465d, + 0xbec4f07b, 0xbf0c2627, 0x3dcbe820, 0x3e5dbf70, 0xbec04b31, + 0xbf4b1daf); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmflt.vf v4, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + VSET(1, e16, m2); + VCMP_U16(11, v4, 0xaaa0); + + VSET(16, e64, m8); + double dscalar_64; + // -0.3394093097660049 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfd5b8e1d359c984); + // 0.8852775142880511, -0.1502080091211320, + // -0.7804423569145378, -0.3394093097660049, + // 0.8417440789882031, -0.1215927835809432, + // 0.9442717441528423, -0.3993868853091622, + // 0.5719771249018739, 0.0497853851400327, + // 0.6627817945481365, 0.2150621318612425, + // -0.8506676370622683, -0.4531982633526939, + // 0.5943189287417812, -0.5034380636605356 + VLOAD_64(v16, 0x3fec543182780b14, 0xbfc33a041b62e250, 0xbfe8f9623feb8e20, + 0xbfd5b8e1d359c984, 0x3feaef91475b6422, 0xbfbf20b464e8e5d0, + 0x3fee377960758bfa, 0xbfd98f8e02b6aa78, 0x3fe24da2f8b06fde, + 0x3fa97d7851fd8b80, 0x3fe535822a7efd70, 0x3fcb8727eb79dda0, + 0xbfeb38ab561e5658, 0xbfdd013349ed0b50, 0x3fe304a9214adedc, + 0xbfe01c2a245f7960); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmflt.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); + VSET(1, e16, m2); + VCMP_U16(12, v8, 0xa080); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmfne.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmfne.c new file mode 100644 index 000000000..19adf60ba --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmfne.c @@ -0,0 +1,503 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot +// Matteo Perotti + +#include "float_macros.h" +#include "vector_macros.h" + +// This instruction writes a mask to a register, with a layout of elements as +// described in section "Mask Register Layout" +void TEST_CASE1(void) { + VSET(16, e16, m2); + // 0.2434, 0.7285, 0.7241, -0.2678, 0.0027, -0.7114, 0.2622, + // 0.8701, -0.5786, -0.4229, 0.5981, 0.6968, 0.7217, -0.2842, + // 0.1328, 0.1659 + VLOAD_16(v4, 0x33ca, 0x39d4, 0x39cb, 0xb449, 0x1975, 0xb9b1, 0x3432, 0x3af6, + 0xb8a1, 0xb6c4, 0x38c9, 0x3993, 0x39c6, 0xb48c, 0x3040, 0x314f); + // 0.7319, 0.0590, 0.7593, -0.6606, -0.4758, 0.8530, 0.0453, + // 0.0987, 0.1777, 0.3047, 0.2330, -0.3467, -0.4153, 0.7080, + // 0.3142, -0.9492 + VLOAD_16(v6, 0x39db, 0x2b8c, 0x3a13, 0xb949, 0xb79d, 0x3ad3, 0x29cc, 0x2e51, + 0x31b0, 0x34e0, 0x3375, 0xb58c, 0xb6a5, 0x39aa, 0x3041, 0xbb98); + asm volatile("vmfne.vv v2, v4, v6"); + VSET(1, e16, m2); + VCMP_U16(1, v2, 0xffff); + + VSET(16, e32, m4); + // +0, sNaN, -0.34645590, -0.06222415, + // 0.96037650, -0.81018746, -0.69337404, 0.70466602, + // -0.30920035, -0.31596854, -0.92116749, 0.51336122, + // 0.22002794, 0.48599416, 0.69166088, 0.85755372 + VLOAD_32(v8, 0x00000000, 0xffffffff, 0xbeb162ab, 0xbd7edebf, 0x3f75db3c, + 0xbf4f6872, 0xbf3180f6, 0x3f3464fe, 0xbe9e4f82, 0xbea1c6a1, + 0xbf6bd1a2, 0x3f036ba4, 0x3e614f01, 0x3ef8d43a, 0x3f3110b0, + 0x3f5b88a4); + // -0, sNaN, 0.39402914, -0.81853813, + // 0.24656086, -0.71423489, -0.44735566, -0.25510681, + // -0.94378990, -0.30138883, 0.19188073, -0.29310879, + // -0.22981364, -0.58626360, -0.80913633, -0.00670803 + VLOAD_32(v12, 0x80000000, 0xffffffff, 0x3ec9be30, 0xbf518bb7, 0x3e7c7a73, + 0xbf36d819, 0xbee50bcd, 0xbe829d5c, 0xbf719c37, 0xbe9a4fa3, + 0x3e447c62, 0xbe96125b, 0xbe6b5444, 0xbf16155f, 0xbf4f238f, + 0xbbdbcefe); + asm volatile("vmfne.vv v4, v8, v12"); + VSET(1, e16, m2); + VCMP_U16(2, v4, 0xfffe); + + VSET(16, e64, m8); + // 0.8643613633211786, 0.4842301798024149, 0.9229840140784857, + // -0.9479687162489723, -0.1308855743137316, + // -0.3798019472030296, 0.1570811980936915, + // -0.7665403705017886, -0.3736408604742532, 0.4947226024634424, + // -0.3032110323317654, 0.8998114670494881, 0.6283940115157876, + // 0.1053912590957002, -0.2936564640984622, 0.4329957213663693 + VLOAD_64(v16, 0x3feba8d9296c7e74, 0x3fdefda0947f3460, 0x3fed8915c5665532, + 0xbfee55c27d3d743e, 0xbfc0c0dbc6990b38, 0xbfd84eacd38c6ca4, + 0x3fc41b3c98507fe0, 0xbfe8877fabcbce12, 0xbfd7e9bb5b0beaf8, + 0x3fdfa988fd8b0a24, 0xbfd367cf3ee9af68, 0x3feccb416af162fc, + 0x3fe41bcdc20ecd40, 0x3fbafaebeb19acf0, 0xbfd2cb447b63f610, + 0x3fdbb633afa4e520); + // -0.3562510538138417, -0.0135629748736219, 0.6176167733891369, + // 0.9703747829163081, -0.0909539316920625, -0.1057326828885887, + // -0.8792039527057112, -0.1745056251010144, 0.3110320594479206, + // 0.3238986651420683, -0.9079294226891812, -0.9490909352855985, + // 0.6962970677624296, 0.7585780695949504, -0.5927175227484118, + // -0.7793965434104730 + VLOAD_64(v24, 0xbfd6ccd13852f170, 0xbf8bc6e7ac263f80, 0x3fed8915c5665532, + 0x3fef0d4f6aafa2f6, 0xbfb748c1c20f5de0, 0xbfbb114c0f1ff4b0, + 0xbfec227053ec5198, 0xbfc6563348637140, 0x3fd3e7f302d586b4, + 0x3fd4bac177803510, 0xbfed0dc20130d694, 0xbfee5ef3f3ff6a12, + 0x3fe64810c9cae3fe, 0x3fe84645840bf0a2, 0xbfe2f78abcff0ede, + 0xbfe8f0d105120796); + asm volatile("vmfne.vv v8, v16, v24"); + VSET(1, e16, m2); + VCMP_U16(3, v8, 0xfffb); +}; + +// Simple random test with similar values + 1 subnormal (masked) +void TEST_CASE2(void) { + VSET(16, e16, m2); + // 0.2434, 0.7285, 0.7241, -0.2678, 0.0027, -0.7114, 0.2622, + // 0.8701, -0.5786, -0.4229, 0.5981, 0.6968, 0.7217, -0.2842, + // 0.1328, 0.1659 + VLOAD_16(v4, 0x33ca, 0x39d4, 0x39cb, 0xb449, 0x1975, 0xb9b1, 0x3432, 0x3af6, + 0xb8a1, 0xb6c4, 0x38c9, 0x3993, 0x39c6, 0xb48c, 0x3040, 0x314f); + // 0.7319, 0.7285, 0.7593, -0.6606, -0.4758, 0.8530, 0.0453, + // 0.0987, 0.1777, 0.3047, 0.2330, -0.3467, -0.4153, 0.7080, + // 0.3142, -0.9492 + VLOAD_16(v6, 0x39db, 0x39d4, 0x3a13, 0xb949, 0xb79d, 0x3ad3, 0x29cc, 0x2e51, + 0x31b0, 0x34e0, 0x3375, 0xb58c, 0xb6a5, 0x39aa, 0x3507, 0xbb98); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmfne.vv v2, v4, v6, v0.t"); + VSET(1, e16, m2); + VCMP_U16(4, v2, 0xaaa8); + + VSET(16, e32, m4); + // 0x00000000, 0.09933749, -0.34645590, -0.06222415, + // 0.96037650, -0.81018746, -0.69337404, 0.70466602, + // -0.30920035, -0.31596854, -0.92116749, 0.51336122, + // 0.22002794, 0.48599416, 0.69166088, 0.85755372 + VLOAD_32(v8, 0x00000000, 0x3dcb7174, 0xbeb162ab, 0xbd7edebf, 0x3f75db3c, + 0xbf4f6872, 0xbf3180f6, 0x3f3464fe, 0xbe9e4f82, 0xbea1c6a1, + 0xbf6bd1a2, 0x3f036ba4, 0x3e614f01, 0x3ef8d43a, 0x3f3110b0, + 0x3f5d88a4); + // 0x00000000, -0.64782482, 0.39402914, -0.81853813, + // 0.24656086, -0.71423489, -0.44735566, -0.25510681, + // -0.94378990, -0.30138883, 0.19188073, -0.29310879, + // -0.22981364, -0.58626360, -0.80913633, 0.85755372 + VLOAD_32(v12, 0x00000000, 0xbf25d7d9, 0x3ec9be30, 0xbf518bb7, 0x3e7c7a73, + 0xbf36d819, 0xbee50bcd, 0xbe829d5c, 0xbf719c37, 0xbe9a4fa3, + 0x3e447c62, 0xbe96125b, 0xbe6b5444, 0xbf16155f, 0xbf4f238f, + 0x3f5d88a4); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmfne.vv v4, v8, v12, v0.t"); + VSET(1, e16, m2); + VCMP_U16(5, v4, 0x2aaa); + + VSET(16, e64, m8); + // 0.8643613633211786, 0.4842301798024149, 0.9229840140784857, + // -0.9479687162489723, -0.1308855743137316, + // -0.3798019472030296, 0.1570811980936915, + // -0.7665403705017886, -0.3736408604742532, 0.4947226024634424, + // -0.3032110323317654, 0.8998114670494881, 0.6283940115157876, + // 0.1053912590957002, -0.2936564640984622, -0.7793965434104730 + VLOAD_64(v16, 0x3feba8d9296c7e74, 0x3fdefda0947f3460, 0x3fed8915c5665532, + 0xbfee55c27d3d743e, 0xbfc0c0dbc6990b38, 0xbfd84eacd38c6ca4, + 0x3fc41b3c98507fe0, 0xbfe8877fabcbce12, 0xbfd7e9bb5b0beaf8, + 0x3fdfa988fd8b0a24, 0xbfd367cf3ee9af68, 0x3feccb416af162fc, + 0x3fe41bcdc20ecd40, 0x3fbafaebeb19acf0, 0xbfd2cb447b63f610, + 0xbfe8f0d105120796); + // 0.8643613633211786, 0.4842301798024149, 0.6176167733891369, + // 0.9703747829163081, -0.0909539316920625, -0.1057326828885887, + // -0.8792039527057112, -0.1745056251010144, 0.3110320594479206, + // 0.3238986651420683, -0.9079294226891812, -0.9490909352855985, + // 0.6962970677624296, 0.7585780695949504, -0.5927175227484118, + // -0.7793965434104730 + VLOAD_64(v24, 0x3feba8d9296c7e74, 0x3fdefda0947f3460, 0x3fed8915c5665532, + 0xbfee55c27d3d743e, 0xbfb748c1c20f5de0, 0xbfbb114c0f1ff4b0, + 0xbfec227053ec5198, 0xbfc6563348637140, 0x3fd3e7f302d586b4, + 0x3fd4bac177803510, 0xbfed0dc20130d694, 0xbfee5ef3f3ff6a12, + 0x3fe64810c9cae3fe, 0x3fe84645840bf0a2, 0xbfe2f78abcff0ede, + 0xbfe8f0d105120796); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmfne.vv v8, v16, v24, v0.t"); + VSET(1, e16, m2); + VCMP_U16(6, v8, 0x2aa0); +}; + +// Simple random test with similar values (vector-scalar) +void TEST_CASE3(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.2649 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xb43d); + // -0.0651, 0.5806, 0.2563, -0.4783, 0.7393, -0.2649, -0.4590, + // 0.5469, -0.9082, 0.6235, -0.8276, -0.7939, -0.0236, -0.1166, + // 0.4026, 0.0022 + VLOAD_16(v4, 0xac2a, 0x38a5, 0x341a, 0xb7a7, 0x39ea, 0xb43d, 0xb758, 0x3860, + 0xbb44, 0x38fd, 0xba9f, 0xba5a, 0xa60b, 0xaf76, 0x3671, 0x1896); + asm volatile("vmfne.vf v2, v4, %[A]" ::[A] "f"(dscalar_16)); + VSET(1, e16, m2); + VCMP_U16(7, v2, 0xffdf); + + VSET(16, e32, m4); + double dscalar_32; + // 0.80517912 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f4e2038); + // -0.15601152, -0.92020410, -0.29387674, 0.98594254, + // 0.88163614, -0.44641387, 0.88191622, 0.15161350, + // -0.79952192, -0.03668820, -0.38464722, -0.54745716, + // 0.09956384, 0.21655059, -0.37557366, -0.79342169 + VLOAD_32(v8, 0xbe1fc17c, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, + 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, + 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, 0x3f4e2038, + 0xbf4b1daf); + asm volatile("vmfne.vf v4, v8, %[A]" ::[A] "f"(dscalar_32)); + VSET(1, e16, m2); + VCMP_U16(8, v4, 0x8001); + + VSET(16, e64, m8); + double dscalar_64; + // -0.3394093097660049 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfd5b8e1d359c984); + // 0.8852775142880511, -0.1502080091211320, + // -0.7804423569145378, 0.4585094341291300, + // 0.8417440789882031, -0.1215927835809432, 0.9442717441528423, + // -0.3993868853091622, 0.5719771249018739, + // 0.0497853851400327, 0.6627817945481365, 0.2150621318612425, + // -0.8506676370622683, -0.4531982633526939, + // 0.5943189287417812, -0.5034380636605356 + VLOAD_64(v16, 0x3fec543182780b14, 0xbfc33a041b62e250, 0xbfe8f9623feb8e20, + 0xbfd5b8e1d359c984, 0x3feaef91475b6422, 0xbfbf20b464e8e5d0, + 0x3fee377960758bfa, 0xbfd98f8e02b6aa78, 0x3fe24da2f8b06fde, + 0x3fa97d7851fd8b80, 0x3fe535822a7efd70, 0x3fcb8727eb79dda0, + 0xbfeb38ab561e5658, 0xbfdd013349ed0b50, 0x3fe304a9214adedc, + 0xbfe01c2a245f7960); + asm volatile("vmfne.vf v8, v16, %[A]" ::[A] "f"(dscalar_64)); + VSET(1, e16, m2); + VCMP_U16(9, v8, 0xfff7); +}; + +// Simple random test with similar values (vector-scalar) (masked) +void TEST_CASE4(void) { + VSET(16, e16, m2); + double dscalar_16; + // -0.2649 + BOX_HALF_IN_DOUBLE(dscalar_16, 0xb43d); + // -0.2649, 0.5806, -0.2649, -0.4783, -0.2649, -0.2649, + // -0.2649, -0.2649, -0.2649, -0.2649, -0.2649, -0.2649, + // -0.2649, -0.2649, -0.2649, -0.2649, + VLOAD_16(v4, 0xb43d, 0x7653, 0xad3d, 0x033d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, + 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d, 0xb43d); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmfne.vf v2, v4, %[A], v0.t" ::[A] "f"(dscalar_16)); + VSET(1, e16, m2); + VCMP_U16(10, v2, 0x000a); + + VSET(16, e32, m4); + double dscalar_32; + // 0.80517912 + BOX_FLOAT_IN_DOUBLE(dscalar_32, 0x3f4e2038); + // 0.80517912, 0.80517912, -0.29387674, 0.98594254, + // 0.88163614, -0.44641387, 0.88191622, 0.15161350, + // -0.79952192, -0.03668820, -0.38464722, -0.54745716, + // 0.09956384, 0.21655059, -0.37557366, -0.79342169 + VLOAD_32(v8, 0x3f4e2038, 0x3f4e2038, 0xbe967703, 0x3f7c66bb, 0x3f61b2e8, + 0xbee4905c, 0x3f61c543, 0x3e1b4092, 0xbf4cad78, 0xbd16465d, + 0xbec4f07b, 0xbf0c2627, 0x3dcbe820, 0x3e5dbf70, 0xbec04b31, + 0xbf4b1daf); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmfne.vf v4, v8, %[A], v0.t" ::[A] "f"(dscalar_32)); + VSET(1, e16, m2); + VCMP_U16(11, v4, 0xaaa8); + + VSET(16, e64, m8); + double dscalar_64; + // -0.3394093097660049 + BOX_DOUBLE_IN_DOUBLE(dscalar_64, 0xbfd5b8e1d359c984); + // 0.8852775142880511, -0.1502080091211320, + // -0.7804423569145378, -0.3394093097660049, + // 0.8417440789882031, -0.1215927835809432, + // 0.9442717441528423, -0.3993868853091622, + // 0.5719771249018739, 0.0497853851400327, + // 0.6627817945481365, 0.2150621318612425, + // -0.8506676370622683, -0.4531982633526939, + // 0.5943189287417812, -0.5034380636605356 + VLOAD_64(v16, 0x3fec543182780b14, 0xbfc33a041b62e250, 0xbfe8f9623feb8e20, + 0xbfd5b8e1d359c984, 0x3feaef91475b6422, 0xbfbf20b464e8e5d0, + 0x3fee377960758bfa, 0xbfd98f8e02b6aa78, 0x3fe24da2f8b06fde, + 0x3fa97d7851fd8b80, 0x3fe535822a7efd70, 0x3fcb8727eb79dda0, + 0xbfeb38ab561e5658, 0xbfdd013349ed0b50, 0x3fe304a9214adedc, + 0xbfe01c2a245f7960); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmfne.vf v8, v16, %[A], v0.t" ::[A] "f"(dscalar_64)); + VSET(1, e16, m2); + VCMP_U16(12, v8, 0xaaa2); +}; + +// Check if only the correct destination bits are written +void TEST_CASE5(void) { + // Fill 64-bits with 1 + VSET(1, e64, m1); + VLOAD_64(v1, 0xffffffffffffffff); + // Perform vmfne.vv on 16 different elements, and then check that the last (64 + // - 16 = 48) bits were not overwritten with zeroes + VSET(16, e16, m1); + // 0.2434, 0.7285, 0.7241, -0.2678, 0.0027, -0.7114, 0.2622, + // 0.8701, -0.5786, -0.4229, 0.5981, 0.6968, 0.7217, -0.2842, + // 0.1328, 0.1659 + VLOAD_16(v2, 0x33ca, 0x39d4, 0x39cb, 0xb449, 0x1975, 0xb9b1, 0x3432, 0x3af6, + 0xb8a1, 0xb6c4, 0x38c9, 0x3993, 0x39c6, 0xb48c, 0x3040, 0x314f); + // 0.7319, 0.0590, 0.7593, -0.6606, -0.4758, 0.8530, 0.0453, + // 0.0987, 0.1777, 0.3047, 0.2330, -0.3467, -0.4153, 0.7080, + // 0.3142, -0.9492 + VLOAD_16(v3, 0x33ca, 0x2b8c, 0x3a13, 0xb949, 0xb79d, 0x3ad3, 0x29cc, 0x2e51, + 0x31b0, 0x34e0, 0x3375, 0xb58c, 0xb6a5, 0x39aa, 0x3041, 0xbb98); + asm volatile("vmfne.vv v1, v2, v3"); + VSET(1, e64, m1); + VCMP_U64(13, v1, 0xfffffffffffffffe); + + // Fill 64-bits with 1 + VSET(1, e64, m1); + VLOAD_64(v1, 0xffffffffffffffff); + // Perform vmfne.vv on 16 different elements, and then check that the last (64 + // - 16 = 48) bits were not overwritten with zeroes + VSET(16, e32, m1); + // -0.72077256, sNaN, -0.34645590, -0.06222415, + // 0.96037650, -0.81018746, -0.69337404, 0.70466602, + // -0.30920035, -0.31596854, -0.92116749, 0.51336122, + // 0.22002794, 0.48599416, 0.69166088, 0.85755372 + VLOAD_32(v2, 0x70000000, 0xffffffff, 0xbeb162ab, 0xbd7edebf, 0x3f75db3c, + 0xbf4f6872, 0xbf3180f6, 0x3f3464fe, 0xbe9e4f82, 0xbea1c6a1, + 0xbf6bd1a2, 0x3f036ba4, 0x3e614f01, 0x3ef8d43a, 0x3f3110b0, + 0x3f5b88a4); + // 0.79994357, sNaN, -0.34645590, -0.81853813, + // 0.24656086, -0.71423489, -0.44735566, -0.25510681, + // -0.94378990, -0.30138883, 0.19188073, -0.29310879, + // -0.22981364, -0.58626360, -0.80913633, -0.00670803 + VLOAD_32(v3, 0x80000000, 0xffffffff, 0xbeb162ab, 0xbf518bb7, 0x3e7c7a73, + 0xbf36d819, 0xbee50bcd, 0xbe829d5c, 0xbf719c37, 0xbe9a4fa3, + 0x3e447c62, 0xbe96125b, 0xbe6b5444, 0xbf16155f, 0xbf4f238f, + 0xbbdbcefe); + asm volatile("vmfne.vv v1, v2, v3"); + VSET(1, e64, m1); + VCMP_U64(14, v1, 0xfffffffffffffffb); + + // Fill 64-bits with 1 + VSET(1, e64, m1); + VLOAD_64(v1, 0xffffffffffffffff); + // Perform vmfne.vv on 16 different elements, and then check that the last (64 + // - 16 = 48) bits were not overwritten with zeroes + VSET(16, e64, m1); + // 0.8643613633211786, 0.4842301798024149, 0.9229840140784857, + // -0.9479687162489723, -0.1308855743137316, + // -0.3798019472030296, 0.1570811980936915, + // -0.7665403705017886, -0.3736408604742532, 0.4947226024634424, + // -0.3032110323317654, 0.8998114670494881, 0.6283940115157876, + // 0.1053912590957002, -0.2936564640984622, 0.4329957213663693 + VLOAD_64(v2, 0x3feba8d9296c7e74, 0x3fdefda0947f3460, 0xbf3180f63f75db3c, + 0xbfee55c27d3d743e, 0xbfc0c0dbc6990b38, 0xbfd84eacd38c6ca4, + 0x3fc41b3c98507fe0, 0xbfe8877fabcbce12, 0xbfd7e9bb5b0beaf8, + 0x3fdfa988fd8b0a24, 0xbfd367cf3ee9af68, 0x3feccb416af162fc, + 0x3fe41bcdc20ecd40, 0x3fbafaebeb19acf0, 0xbfd2cb447b63f610, + 0x3fdbb633afa4e520); + // 0.8643613633211786, -0.0135629748736219, 0.6176167733891369, + // 0.9703747829163081, -0.0909539316920625, -0.1057326828885887, + // -0.8792039527057112, -0.1745056251010144, 0.3110320594479206, + // 0.3238986651420683, -0.9079294226891812, -0.9490909352855985, + // 0.6962970677624296, 0.7585780695949504, -0.5927175227484118, + // -0.7793965434104730 + VLOAD_64(v3, 0x3feba8d9296c7e74, 0xbf8bc6e7ac263f80, 0x3fed8915c5665532, + 0x3fef0d4f6aafa2f6, 0xbfb748c1c20f5de0, 0xbfbb114c0f1ff4b0, + 0xbfec227053ec5198, 0xbfc6563348637140, 0x3fd3e7f302d586b4, + 0x3fd4bac177803510, 0xbfed0dc20130d694, 0xbfee5ef3f3ff6a12, + 0x3fe64810c9cae3fe, 0x3fe84645840bf0a2, 0xbfe2f78abcff0ede, + 0xbfe8f0d105120796); + asm volatile("vmfne.vv v1, v2, v3"); + VSET(1, e64, m1); + VCMP_U64(15, v1, 0xfffffffffffffffe); +}; + +// Write to v0 during a masked operation, WAR dependency should be respected +void TEST_CASE6(void) { + VSET(16, e16, m2); + // 0.2434, 0.7285, 0.7241, 0.7241, 0.0027, -0.7114, 0.8701, + // 0.8701, -0.5786, -0.4229, 0.6968, 0.6968, 0.7217, -0.2842, + // 0.1659, 0.1659 + VLOAD_16(v4, 0x33ca, 0x39d4, 0x39cb, 0xb449, 0x1975, 0xb9b1, 0x3af6, 0x3af6, + 0xb8a1, 0xb6c4, 0x3993, 0x3993, 0x39c6, 0xb48c, 0x314f, 0x314f); + // 0.2434, 0.7285, -0.2678, -0.2678, 0.0027, -0.7114, 0.2622, + // 0.2622, -0.5786, -0.4229, 0.5981, 0.5981, 0.7217, -0.2842, + // 0.1328, 0.1328 + VLOAD_16(v6, 0x33ca, 0x39d4, 0xb449, 0x39cb, 0x1975, 0xb9b1, 0x3432, 0x3432, + 0xb8a1, 0xb6c4, 0x38c9, 0x38c9, 0x39c6, 0xb48c, 0x3040, 0x3040); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmfne.vv v0, v4, v6, v0.t"); + VSET(1, e16, m2); + VCMP_U16(16, v0, 0x8888); + + VSET(16, e32, m4); + // 0x00000000, 0.09933749, -0.34645590, -0.06222415, + // 0.96037650, -0.81018746, -0.69337404, 0.70466602, + // -0.30920035, -0.31596854, -0.92116749, 0.51336122, + // 0.22002794, 0.48599416, 0.69166088, 0.85755372 + VLOAD_32(v8, 0x00000000, 0x3dcb7174, 0xbeb162ab, 0xbd7edebf, 0x3f75db3c, + 0xbf4f6872, 0xbf3180f6, 0x3f3464fe, 0xbe9e4f82, 0xbea1c6a1, + 0xbf6bd1a2, 0x3f036ba4, 0x3e614f01, 0x3ef8d43a, 0x3f3110b0, + 0x3f5d88a4); + // 0x00000000, 0.09933749, 0.39402914, -0.81853813, + // 0.96037650, -0.81018746, -0.44735566, -0.25510681, + // -0.30920035, -0.31596854, 0.19188073, -0.29310879, + // 0.22002794, 0.48599416, -0.80913633, -0.30138883 + VLOAD_32(v12, 0x00000000, 0x3dcb7174, 0x3ec9be30, 0xbf518bb7, 0x3f75db3c, + 0xbf4f6872, 0xbee50bcd, 0xbe829d5c, 0xbe9e4f82, 0xbea1c6a1, + 0x3e447c62, 0xbe96125b, 0x3e614f01, 0x3ef8d43a, 0xbf4f238f, + 0xbe9a4fa3); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmfne.vv v0, v8, v12, v0.t"); + VSET(1, e16, m2); + VCMP_U16(17, v0, 0x8888); + + VSET(16, e64, m8); + // 0.8643613633211786, 0.4842301798024149, 0.9229840140784857, + // -0.8792039527057112, -0.1308855743137316, + // -0.3798019472030296, 0.1570811980936915, + // -0.7665403705017886, -0.3736408604742532, 0.4947226024634424, + // -0.3032110323317654, 0.8998114670494881, 0.6283940115157876, + // 0.1053912590957002, -0.2936564640984622, -0.7793965434104730 + VLOAD_64(v16, 0x3feba8d9296c7e74, 0x3fdefda0947f3460, 0x3fed8915c5665532, + 0xbfec227053ec5198, 0xbfc0c0dbc6990b38, 0xbfd84eacd38c6ca4, + 0x3fc41b3c98507fe0, 0xbfe8877fabcbce12, 0xbfd7e9bb5b0beaf8, + 0x3fdfa988fd8b0a24, 0xbfd367cf3ee9af68, 0x3feccb416af162fc, + 0x3fe41bcdc20ecd40, 0x3fbafaebeb19acf0, 0xbfd2cb447b63f610, + 0xbfe8f0d105120796); + // 0.8643613633211786, 0.4842301798024149, -0.8792039527057112, + // 0.9703747829163081, -0.1308855743137316, -0.3798019472030296, + // -0.8792039527057112, -0.1745056251010144, + // -0.3736408604742532, 0.4947226024634424, + // -0.9079294226891812, -0.9490909352855985, 0.6283940115157876, + // 0.1053912590957002, -0.5927175227484118, -0.3032110323317654 + VLOAD_64(v24, 0x3feba8d9296c7e74, 0x3fdefda0947f3460, 0xbfec227053ec5198, + 0x9fee55c27d3d743e, 0xbfc0c0dbc6990b38, 0xbfd84eacd38c6ca4, + 0xbfec227053ec5198, 0xbfc6563348637140, 0xbfd7e9bb5b0beaf8, + 0x3fdfa988fd8b0a24, 0xbfed0dc20130d694, 0xbfee5ef3f3ff6a12, + 0x3fe41bcdc20ecd40, 0x3fbafaebeb19acf0, 0xbfe2f78abcff0ede, + 0xbfd367cf3ee9af68); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmfne.vv v0, v16, v24, v0.t"); + VSET(1, e16, m2); + VCMP_U16(18, v0, 0x8888); +}; + +// Test sNaN/qNaN behaviour +void TEST_CASE7(void) { + CLEAR_FFLAGS; + // First, give only qNaN (no exception is generated) + VSET(16, e16, m2); + CHECK_FFLAGS(0); + VLOAD_16(v4, qNaNh, qNaNh, 0x39cb, qNaNh, 0x1975, 0xb9b1, 0x3af6, 0x3af6, + 0xb8a1, 0xb6c4, 0x3993, 0x3993, qNaNh, 0xb48c, qNaNh, qNaNh); + VLOAD_16(v6, 0x33ca, qNaNh, qNaNh, 0x39cb, 0x1975, 0xb9b1, 0x3432, 0x3432, + 0xb8a1, 0xb6c4, 0x38c9, 0x38c9, 0x39c6, qNaNh, qNaNh, 0x3040); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmfne.vv v0, v4, v6"); + VSET(1, e16, m2); + VCMP_U16(19, v0, 0xfccf); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x3f75db3c, qNaNf, qNaNf, qNaNf, 0x3f75db3c, 0xbf4f6872, + 0xbf3180f6, 0x3f3464fe, 0xbe9e4f82, 0xbea1c6a1, 0xbf6bd1a2, + 0x3f036ba4, qNaNf, qNaNf, 0x3f3110b0, qNaNf); + VLOAD_32(v16, 0x3f75db3c, 0x3dcb7174, qNaNf, 0xbf518bb7, 0x3f75db3c, + 0xbf4f6872, 0xbee50bcd, 0xbe829d5c, 0xbe9e4f82, 0xbea1c6a1, + 0x3e447c62, 0xbe96125b, qNaNf, 0x3ef8d43a, qNaNf, qNaNf); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmfne.vv v0, v8, v16"); + VSET(1, e16, m2); + VCMP_U16(20, v0, 0xfcce); + + VSET(16, e64, m8); + VLOAD_64(v16, qNaNd, qNaNd, 0x3fed8915c5665532, 0xbfec227053ec5198, + 0xbfc0c0dbc6990b38, 0xbfd84eacd38c6ca4, 0x3fc41b3c98507fe0, + 0xbfe8877fabcbce12, 0xbfd7e9bb5b0beaf8, 0x3fdfa988fd8b0a24, + 0xbfd367cf3ee9af68, 0x3feccb416af162fc, qNaNd, qNaNd, + 0xbfd2cb447b63f610, qNaNd); + VLOAD_64(v24, qNaNd, 0x3fdefda0947f3460, qNaNd, 0x9fee55c27d3d743e, + 0xbfc0c0dbc6990b38, 0xbfd84eacd38c6ca4, 0xbfec227053ec5198, + 0xbfc6563348637140, 0xbfd7e9bb5b0beaf8, 0x3fdfa988fd8b0a24, + 0xbfed0dc20130d694, 0xbfee5ef3f3ff6a12, qNaNd, 0x3fbafaebeb19acf0, + qNaNd, qNaNd); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmfne.vv v0, v16, v24"); + VSET(1, e16, m2); + VCMP_U16(21, v0, 0xfccf); + CHECK_FFLAGS(0); + + // Give sNaN (Invalid operation) + VSET(16, e32, m4); + VLOAD_32(v8, 0x3f75db3c, sNaNf, sNaNf, qNaNf, 0x3f75db3c, 0xbf4f6872, + 0xbf3180f6, 0x3f3464fe, 0xbe9e4f82, 0xbea1c6a1, 0xbf6bd1a2, + 0x3f036ba4, qNaNf, qNaNf, 0x3f3110b0, qNaNf); + VLOAD_32(v12, 0x3f75db3c, 0x3dcb7174, qNaNf, 0xbf518bb7, 0x3f75db3c, + 0xbf4f6872, 0xbee50bcd, 0xbe829d5c, 0xbe9e4f82, 0xbea1c6a1, + 0x3e447c62, 0xbe96125b, qNaNf, 0x3ef8d43a, qNaNf, qNaNf); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmfne.vv v0, v8, v12"); + VSET(1, e16, m2); + VCMP_U16(22, v0, 0xfcce); + CHECK_FFLAGS(NV); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + // TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmin.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmin.c new file mode 100644 index 000000000..fe3e183c6 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmin.c @@ -0,0 +1,181 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_16(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + 50, 7000, 400, 19901); + asm volatile("vmin.vv v2, v4, v6"); + VCMP_I16(1, v2, 50, -80, 400, -19900, 50, -80, 400, -19900, 50, -80, 400, + -19900, 50, -80, 400, -19900); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_32(v12, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + asm volatile("vmin.vv v4, v8, v12"); + VCMP_I32(2, v4, 50, -80, 400, -19900, 50, -80, 400, -19900, 50, -80, 400, + -19900, 50, -80, 400, -19900); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_64(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + asm volatile("vmin.vv v8, v16, v24"); + VCMP_I64(3, v8, 50, -80, 400, -19900, 50, -80, 400, -19900, 50, -80, 400, + -19900, 50, -80, 400, -19900); +}; + +void TEST_CASE2(void) { + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_16(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef); + asm volatile("vmin.vv v2, v4, v6, v0.t"); + VCMP_I16(4, v2, 0xbeef, 0xbeef, 400, -19900, 0xbeef, 0xbeef, 400, -19900, + 0xbeef, 0xbeef, 400, -19900, 0xbeef, 0xbeef, 400, -19900); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_32(v12, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_32(v4, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef); + asm volatile("vmin.vv v4, v8, v12, v0.t"); + VCMP_I32(5, v4, 0xdeadbeef, 0xdeadbeef, 400, -19900, 0xdeadbeef, 0xdeadbeef, + 400, -19900, 0xdeadbeef, 0xdeadbeef, 400, -19900, 0xdeadbeef, + 0xdeadbeef, 400, -19900); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_64(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef); + asm volatile("vmin.vv v8, v16, v24, v0.t"); + VCMP_I64(6, v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 400, -19900, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 400, -19900, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 400, -19900, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 400, -19900); +}; + +void TEST_CASE3(void) { + const uint64_t scalar = 40; + + VSET(16, e8, m1); + VLOAD_8(v2, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, + -25, 99); + asm volatile("vmin.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_I8(7, v1, 40, -8, -25, 40, 40, -8, -25, 40, 40, -8, -25, 40, 40, -8, -25, + 40); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + asm volatile("vmin.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VCMP_I16(8, v2, 40, -8, -25, 40, 40, -8, -25, 40, 40, -8, -25, 40, 40, -8, + -25, 40); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + asm volatile("vmin.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VCMP_I32(9, v4, 40, -8, -25, 40, 40, -8, -25, 40, 40, -8, -25, 40, 40, -8, + -25, 40); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + asm volatile("vmin.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I64(10, v8, 40, -8, -25, 40, 40, -8, -25, 40, 40, -8, -25, 40, 40, -8, + -25, 40); +}; + +void TEST_CASE4(void) { + const uint64_t scalar = 40; + + VSET(16, e8, m1); + VLOAD_8(v2, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, + -25, 99); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_8(v1, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + 0xef, 0xef, 0xef, 0xef, 0xef); + asm volatile("vmin.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I8(11, v1, 0xef, 0xef, -25, 40, 0xef, 0xef, -25, 40, 0xef, 0xef, -25, 40, + 0xef, 0xef, -25, 40); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef); + asm volatile("vmin.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I16(12, v2, 0xbeef, 0xbeef, -25, 40, 0xbeef, 0xbeef, -25, 40, 0xbeef, + 0xbeef, -25, 40, 0xbeef, 0xbeef, -25, 40); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_32(v4, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef); + asm volatile("vmin.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I32(13, v4, 0xdeadbeef, 0xdeadbeef, -25, 40, 0xdeadbeef, 0xdeadbeef, -25, + 40, 0xdeadbeef, 0xdeadbeef, -25, 40, 0xdeadbeef, 0xdeadbeef, -25, + 40); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef); + asm volatile("vmin.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I64(14, v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, -25, 40, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, -25, 40, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, -25, 40, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + -25, 40); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vminu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vminu.c new file mode 100644 index 000000000..5472f5cb0 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vminu.c @@ -0,0 +1,176 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_16(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + 50, 7000, 400, 19901); + asm volatile("vminu.vv v2, v4, v6"); + VCMP_U16(1, v2, 50, 80, 400, 19900, 50, 80, 400, 19900, 50, 80, 400, 19900, + 50, 80, 400, 19900); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_32(v12, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + asm volatile("vminu.vv v4, v8, v12"); + VCMP_U32(2, v4, 50, 80, 400, 19900, 50, 80, 400, 19900, 50, 80, 400, 19900, + 50, 80, 400, 19900); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_64(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + asm volatile("vminu.vv v8, v16, v24"); + VCMP_U64(3, v8, 50, 80, 400, 19900, 50, 80, 400, 19900, 50, 80, 400, 19900, + 50, 80, 400, 19900); +}; + +void TEST_CASE2(void) { + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_16(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef); + asm volatile("vminu.vv v2, v4, v6, v0.t"); + VCMP_U16(4, v2, 0xbeef, 0xbeef, 400, 19900, 0xbeef, 0xbeef, 400, 19900, + 0xbeef, 0xbeef, 400, 19900, 0xbeef, 0xbeef, 400, 19900); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_32(v12, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_32(v4, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef); + asm volatile("vminu.vv v4, v8, v12, v0.t"); + VCMP_U32(5, v4, 0xdeadbeef, 0xdeadbeef, 400, 19900, 0xdeadbeef, 0xdeadbeef, + 400, 19900, 0xdeadbeef, 0xdeadbeef, 400, 19900, 0xdeadbeef, + 0xdeadbeef, 400, 19900); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_64(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef); + asm volatile("vminu.vv v8, v16, v24, v0.t"); + VCMP_U64(6, v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 400, 19900, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 400, 19900, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 400, 19900, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 400, 19900); +}; + +void TEST_CASE3(void) { + const uint64_t scalar = 40; + + VSET(16, e8, m1); + VLOAD_8(v2, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, + 199); + asm volatile("vminu.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U8(7, v1, 40, 8, 25, 40, 40, 8, 25, 40, 40, 8, 25, 40, 40, 8, 25, 40); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + asm volatile("vminu.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VCMP_U16(8, v2, 40, 8, 25, 40, 40, 8, 25, 40, 40, 8, 25, 40, 40, 8, 25, 40); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + asm volatile("vminu.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U32(9, v4, 40, 8, 25, 40, 40, 8, 25, 40, 40, 8, 25, 40, 40, 8, 25, 40); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + asm volatile("vminu.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U64(10, v8, 40, 8, 25, 40, 40, 8, 25, 40, 40, 8, 25, 40, 40, 8, 25, 40); +}; + +void TEST_CASE4(void) { + const uint64_t scalar = 40; + + VSET(16, e8, m1); + VLOAD_8(v2, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, + 199); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_8(v1, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + 0xef, 0xef, 0xef, 0xef, 0xef); + asm volatile("vminu.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(11, v1, 0xef, 0xef, 25, 40, 0xef, 0xef, 25, 40, 0xef, 0xef, 25, 40, + 0xef, 0xef, 25, 40); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef); + asm volatile("vminu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(12, v2, 0xbeef, 0xbeef, 25, 40, 0xbeef, 0xbeef, 25, 40, 0xbeef, + 0xbeef, 25, 40, 0xbeef, 0xbeef, 25, 40); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_32(v4, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef); + asm volatile("vminu.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(13, v4, 0xdeadbeef, 0xdeadbeef, 25, 40, 0xdeadbeef, 0xdeadbeef, 25, + 40, 0xdeadbeef, 0xdeadbeef, 25, 40, 0xdeadbeef, 0xdeadbeef, 25, 40); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef); + asm volatile("vminu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(14, v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 25, 40, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 25, 40, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 25, 40, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 25, 40); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmnand.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmnand.c new file mode 100644 index 000000000..61db49ecc --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmnand.c @@ -0,0 +1,68 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + asm volatile("vmnand.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(1, v1, 0x7B, 0xDE); +} + +void TEST_CASE2() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0xFF, 0xFF); + asm volatile("vmnand.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(2, v1, 0x32, 0x10); +} + +void TEST_CASE3() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x00, 0x00); + asm volatile("vmnand.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(3, v1, 0xFF, 0xFF); +} + +void TEST_CASE4() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x0F, 0xF0); + asm volatile("vmnand.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(4, v1, 0xF2, 0x1F); +} + +void TEST_CASE5() { + VSET(16, e8, m1); + VLOAD_8(v1, 0xFF, 0xFF); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + VSET(13, e8, m1); + asm volatile("vmnand.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(5, v1, 0x7B, 0xFE); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmnor.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmnor.c new file mode 100644 index 000000000..15322ac72 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmnor.c @@ -0,0 +1,68 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + asm volatile("vmnor.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(1, v1, 0x32, 0x10); +} + +void TEST_CASE2() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0xFF, 0xFF); + asm volatile("vmnor.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(2, v1, 0x00, 0x00); +} + +void TEST_CASE3() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x00, 0x00); + asm volatile("vmnor.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(3, v1, 0x32, 0x10); +} + +void TEST_CASE4() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x0F, 0xF0); + asm volatile("vmnor.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(4, v1, 0x30, 0x00); +} + +void TEST_CASE5() { + VSET(16, e8, m1); + VLOAD_8(v1, 0xFF, 0xFF); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + VSET(13, e8, m1); + asm volatile("vmnor.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(5, v1, 0x32, 0xF0); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmor.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmor.c new file mode 100644 index 000000000..8a1693082 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmor.c @@ -0,0 +1,68 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + asm volatile("vmor.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(1, v1, 0xCD, 0xEF); +} + +void TEST_CASE2() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0xFF, 0xFF); + asm volatile("vmor.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(2, v1, 0xFF, 0xFF); +} + +void TEST_CASE3() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x00, 0x00); + asm volatile("vmor.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(3, v1, 0xCD, 0xEF); +} + +void TEST_CASE4() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x0F, 0xF0); + asm volatile("vmor.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(4, v1, 0xCF, 0xFF); +} + +void TEST_CASE5() { + VSET(16, e8, m1); + VLOAD_8(v1, 0xFF, 0xFF); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + VSET(13, e8, m1); + asm volatile("vmor.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(5, v1, 0xCD, 0xEF); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmornot.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmornot.c new file mode 100644 index 000000000..8e9497283 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmornot.c @@ -0,0 +1,68 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + asm volatile("vmornot.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(1, v1, 0xFF, 0xFF); +} + +void TEST_CASE2() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0xFF, 0xFF); + asm volatile("vmornot.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(2, v1, 0xCD, 0xEF); +} + +void TEST_CASE3() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x00, 0x00); + asm volatile("vmornot.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(3, v1, 0xFF, 0xFF); +} + +void TEST_CASE4() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x0F, 0xF0); + asm volatile("vmornot.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(4, v1, 0xFD, 0xEF); +} + +void TEST_CASE5() { + VSET(16, e8, m1); + VLOAD_8(v1, 0xFF, 0xFF); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + VSET(13, e8, m1); + asm volatile("vmornot.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(5, v1, 0xFF, 0xFF); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsbc.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsbc.c new file mode 100644 index 000000000..00a4e6a62 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsbc.c @@ -0,0 +1,160 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 16, 0xef, 10, 0xff, 16, 0xef, 10, 0xff, 16, 0xef, 10, 0xff, 16, + 0xef, 10, 0xff); + VLOAD_8(v2, 4, 0xef, 12, 0x80, 4, 0xef, 12, 0x80, 4, 0xef, 12, 0x80, 4, 0xef, + 12, 0x80); + VLOAD_8(v0, 0x99, 0x99); + asm volatile("vmsbc.vvm v3, v1, v2, v0"); + VSET(2, e8, m1); + VCMP_U8(1, v3, 0x44, 0x44); + + VSET(8, e16, m1); + VLOAD_16(v1, 16, 0xbeef, 10, 0xffff, 16, 0xbeef, 10, 0xffff); + VLOAD_16(v2, 4, 0xbeef, 12, 0x8000, 4, 0xbeef, 12, 0x8000); + VLOAD_8(v0, 0x99); + VCLEAR(v3); + asm volatile("vmsbc.vvm v3, v1, v2, v0"); + VSET(1, e8, m1); + VCMP_U8(2, v3, 0x44); + + VSET(4, e32, m1); + VLOAD_32(v1, 16, 0xdeadbeef, 10, 0xffffffff); + VLOAD_32(v2, 4, 0xdeadbeef, 12, 0x80000000); + VLOAD_8(v0, 0x09); + VCLEAR(v3); + asm volatile("vmsbc.vvm v3, v1, v2, v0"); + VSET(1, e8, m1); + VCMP_U8(3, v3, 0x04); + + VSET(2, e64, m1); + VLOAD_64(v1, 16, 0xdeadbeefdeadbeef); + VLOAD_64(v2, 4, 0xdeadbeefdeadbeef); + VLOAD_8(v0, 0x3); + VCLEAR(v3); + asm volatile("vmsbc.vvm v3, v1, v2, v0"); + VSET(1, e8, m1); + VCMP_U8(4, v3, 0x02); +}; + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 16, 0xef, 10, 0xff, 16, 0xef, 10, 0xff, 16, 0xef, 10, 0xff, 16, + 0xef, 10, 0xff); + VLOAD_8(v2, 4, 0xef, 12, 0x80, 4, 0xef, 12, 0x80, 4, 0xef, 12, 0x80, 4, 0xef, + 12, 0x80); + asm volatile("vmsbc.vv v3, v1, v2"); + VSET(2, e8, m1); + VCMP_U8(5, v3, 0x44, 0x44); + + VSET(8, e16, m1); + VLOAD_16(v1, 16, 0xbeef, 10, 0xffff, 16, 0xbeef, 10, 0xffff); + VLOAD_16(v2, 4, 0xbeef, 12, 0x8000, 4, 0xbeef, 12, 0x8000); + VCLEAR(v3); + asm volatile("vmsbc.vv v3, v1, v2"); + VSET(1, e8, m1); + VCMP_U8(6, v3, 0x44); + + VSET(4, e32, m1); + VLOAD_32(v1, 16, 0xdeadbeef, 10, 0xffffffff); + VLOAD_32(v2, 4, 0xdeadbeef, 12, 0x80000000); + VCLEAR(v3); + asm volatile("vmsbc.vv v3, v1, v2"); + VSET(1, e8, m1); + VCMP_U8(7, v3, 0x04); + + VSET(2, e64, m1); + VLOAD_64(v1, 16, 0xdeadbeefdeadbeef); + VLOAD_64(v2, 4, 0xdeadbeefdeadbeef); + VCLEAR(v3); + asm volatile("vmsbc.vv v3, v1, v2"); + VSET(1, e8, m1); + VCMP_U8(8, v3, 0x00); +}; + +void TEST_CASE3(void) { + const uint64_t scalar = 20; + + VSET(16, e8, m1); + VLOAD_8(v1, 20, 10, 30, 25, 20, 10, 30, 25, 20, 10, 30, 25, 20, 10, 30, 25); + VLOAD_8(v0, 8, 0, 0, 0, 8, 0, 0, 0, 8, 0, 0, 0, 8, 0, 0, 0); + asm volatile("vmsbc.vxm v3, v1, %[A], v0" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(9, v3, 0x22, 0x22); + + VSET(8, e16, m1); + VLOAD_16(v1, 20, 10, 30, 25, 20, 10, 30, 25); + VLOAD_16(v0, 8, 0, 0, 0, 8, 0, 0, 0); + VCLEAR(v2); + asm volatile("vmsbc.vxm v2, v1, %[A], v0" ::[A] "r"(scalar)); + VSET(1, e8, m1); + VCMP_U8(10, v2, 0x22); + + VSET(4, e32, m1); + VLOAD_32(v1, 20, 10, 30, 25); + VLOAD_32(v0, 8, 0, 0, 0, 8, 0, 0, 0, 8, 0, 0, 0, 8, 0, 0, 0); + VCLEAR(v2); + asm volatile("vmsbc.vxm v2, v1, %[A], v0" ::[A] "r"(scalar)); + VSET(1, e8, m1); + VCMP_U8(11, v2, 0x02); + + VSET(2, e64, m1); + VLOAD_64(v1, 20, 10); + VLOAD_64(v0, 8, 0); + VCLEAR(v2); + asm volatile("vmsbc.vxm v2, v1, %[A], v0" ::[A] "r"(scalar)); + VSET(1, e8, m1); + VCMP_U8(12, v2, 0x02); +}; + +void TEST_CASE4(void) { + const uint64_t scalar = 20; + + VSET(16, e8, m1); + VLOAD_8(v1, 20, 10, 30, 25, 20, 10, 30, 25, 20, 10, 30, 25, 20, 10, 30, 25); + asm volatile("vmsbc.vx v3, v1, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(13, v3, 0x22, 0x22); + + VSET(8, e16, m1); + VLOAD_16(v1, 20, 10, 30, 25, 20, 10, 30, 25); + VCLEAR(v2); + asm volatile("vmsbc.vx v2, v1, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(14, v2, 0x22); + + VSET(4, e32, m1); + VLOAD_32(v1, 20, 10, 30, 25); + VCLEAR(v2); + asm volatile("vmsbc.vx v2, v1, %[A]" ::[A] "r"(scalar)); + VSET(1, e8, m1); + VCMP_U8(15, v2, 0x02); + + VSET(2, e64, m1); + VLOAD_64(v1, 20, 10); + VCLEAR(v2); + asm volatile("vmsbc.vx v2, v1, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(16, v2, 0x02); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsbf.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsbf.c new file mode 100644 index 000000000..2f8346db3 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsbf.c @@ -0,0 +1,33 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(8, e8, m1); + VLOAD_8(v3, 8, 0, 0, 0, 0, 0, 0, 0); + __asm__ volatile("vmsbf.m v2, v3"); + VCMP_U8(1, v2, 7, 0, 0, 0, 0, 0, 0, 0); +} + +void TEST_CASE2() { + VSET(8, e8, m1); + VLOAD_8(v3, 8, 0, 0, 0, 0, 0, 0, 0); + VLOAD_8(v0, 3, 0, 0, 0, 0, 0, 0, 0); + VCLEAR(v2); + __asm__ volatile("vmsbf.m v2, v3, v0.t"); + VCMP_U8(2, v2, 3, 0, 0, 0, 0, 0, 0, 0); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + TEST_CASE1(); + TEST_CASE2(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmseq.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmseq.c new file mode 100644 index 000000000..824221d88 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmseq.c @@ -0,0 +1,306 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0xff, 0x00, 0xf0, 0x0f, 0xff, 0x00, 0xf0, 0x0f, 0xff, 0x00, 0xf0, + 0x0f, 0xff, 0x00, 0xf0, 0x0f); + VLOAD_8(v3, 0xf2, 0x01, 0xf0, 0x0f, 0xf2, 0x01, 0xf0, 0x0f, 0xf2, 0x01, 0xf0, + 0x0f, 0xf2, 0x01, 0xf0, 0x0f); + VCLEAR(v1); + asm volatile("vmseq.vv v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(1, v1, 0xcc, 0xcc); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xffff, 0x0000, 0xf0f0, 0x0f0f, 0xffff, 0x0000, 0xf0f0, 0x0f0f, + 0xffff, 0x0000, 0xf0f0, 0x0f0f, 0xffff, 0x0000, 0xf0f0, 0x0f0f); + VLOAD_16(v6, 0xf2ff, 0x0100, 0xf0f0, 0x0f0f, 0xf2ff, 0x0100, 0xf0f0, 0x0f0f, + 0xf2ff, 0x0100, 0xf0f0, 0x0f0f, 0xf2ff, 0x0100, 0xf0f0, 0x0f0f); + VCLEAR(v2); + asm volatile("vmseq.vv v2, v4, v6"); + VSET(2, e8, m1); + VCMP_U8(2, v2, 0xcc, 0xcc); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, + 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, 0x00000000, + 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, 0x00000000, 0xf0f0f0f0, + 0x0f0f0f0f); + VLOAD_32(v12, 0xfff2ffff, 0x01000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xfff2ffff, + 0x01000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xfff2ffff, 0x01000000, + 0xf0f0f0f0, 0x0f0f0f0f, 0xfff2ffff, 0x01000000, 0xf0f0f0f0, + 0x0f0f0f0f); + VCLEAR(v4); + asm volatile("vmseq.vv v4, v8, v12"); + VSET(2, e8, m1); + VCMP_U8(3, v4, 0xcc, 0xcc); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f, 0xffffffffffffffff, 0x0000000000000000, + 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, 0xffffffffffffffff, + 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, + 0xffffffffffffffff, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f); + VLOAD_64(v24, 0xfff2ffffffffffff, 0x0100000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f, 0xfff2ffffffffffff, 0x0100000000000000, + 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, 0xfff2ffffffffffff, + 0x0100000000000000, 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, + 0xfff2ffffffffffff, 0x0100000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f); + VCLEAR(v8); + asm volatile("vmseq.vv v8, v16, v24"); + VSET(2, e8, m8); + VCMP_U8(4, v8, 0xcc, 0xcc); +}; + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0xff, 0x00, 0xf0, 0x0f, 0xff, 0x00, 0xf0, 0x0f, 0xff, 0x00, 0xf0, + 0x0f, 0xff, 0x00, 0xf0, 0x0f); + VLOAD_8(v3, 0xf2, 0x01, 0xf0, 0x0f, 0xf2, 0x01, 0xf0, 0x0f, 0xf2, 0x01, 0xf0, + 0x0f, 0xf2, 0x01, 0xf0, 0x0f); + VLOAD_8(v0, 0xaa, 0xaa); + VCLEAR(v1); + asm volatile("vmseq.vv v1, v2, v3, v0.t"); + VSET(2, e8, m1); + VCMP_U8(5, v1, 0x88, 0x88); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xffff, 0x0000, 0xf0f0, 0x0f0f, 0xffff, 0x0000, 0xf0f0, 0x0f0f, + 0xffff, 0x0000, 0xf0f0, 0x0f0f, 0xffff, 0x0000, 0xf0f0, 0x0f0f); + VLOAD_16(v8, 0xf2ff, 0x0100, 0xf0f0, 0x0f0f, 0xf2ff, 0x0100, 0xf0f0, 0x0f0f, + 0xf2ff, 0x0100, 0xf0f0, 0x0f0f, 0xf2ff, 0x0100, 0xf0f0, 0x0f0f); + VLOAD_8(v0, 0xaa, 0xaa); + VCLEAR(v2); + asm volatile("vmseq.vv v2, v4, v8, v0.t"); + VSET(2, e8, m1); + VCMP_U8(6, v2, 0x88, 0x88); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, + 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, 0x00000000, + 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, 0x00000000, 0xf0f0f0f0, + 0x0f0f0f0f); + VLOAD_32(v12, 0xfff2ffff, 0x01000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xfff2ffff, + 0x01000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xfff2ffff, 0x01000000, + 0xf0f0f0f0, 0x0f0f0f0f, 0xfff2ffff, 0x01000000, 0xf0f0f0f0, + 0x0f0f0f0f); + VLOAD_8(v0, 0xaa, 0xaa); + VCLEAR(v4); + asm volatile("vmseq.vv v4, v8, v12, v0.t"); + VSET(2, e8, m1); + VCMP_U8(7, v4, 0x88, 0x88); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f, 0xffffffffffffffff, 0x0000000000000000, + 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, 0xffffffffffffffff, + 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, + 0xffffffffffffffff, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f); + VLOAD_64(v24, 0xfff2ffffffffffff, 0x0100000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f, 0xfff2ffffffffffff, 0x0100000000000000, + 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, 0xfff2ffffffffffff, + 0x0100000000000000, 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, + 0xfff2ffffffffffff, 0x0100000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f); + VLOAD_8(v0, 0xaa, 0xaa); + VCLEAR(v8); + asm volatile("vmseq.vv v8, v16, v24, v0.t"); + VSET(2, e8, m8); + VCMP_U8(8, v8, 0x88, 0x88); +}; + +void TEST_CASE3(void) { + const uint64_t scalar = 0x00000000ffffffff; + + VSET(16, e8, m1); + VLOAD_8(v2, 0xff, 0x00, 0xf0, 0x0f, 0xff, 0x00, 0xf0, 0x0f, 0xff, 0x00, 0xf0, + 0x0f, 0xff, 0x00, 0xf0, 0x0f); + VCLEAR(v1); + asm volatile("vmseq.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(9, v1, 0x11, 0x11); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xffff, 0x0000, 0xf0f0, 0x0f0f, 0xffff, 0x0000, 0xf0f0, 0x0f0f, + 0xffff, 0x0000, 0xf0f0, 0x0f0f, 0xffff, 0x0000, 0xf0f0, 0x0f0f); + VCLEAR(v2); + asm volatile("vmseq.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(10, v2, 0x11, 0x11); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, + 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, 0x00000000, + 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, 0x00000000, 0xf0f0f0f0, + 0x0f0f0f0f); + VCLEAR(v4); + asm volatile("vmseq.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(11, v4, 0x11, 0x11); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f, 0xffffffffffffffff, 0x0000000000000000, + 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, 0xffffffffffffffff, + 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, + 0xffffffffffffffff, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f); + VCLEAR(v8); + asm volatile("vmseq.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(12, v8, 0x00, 0x00); +}; + +void TEST_CASE4(void) { + const uint64_t scalar = 0x00000000ffffffff; + + VSET(16, e8, m1); + VLOAD_8(v2, 0xff, 0x00, 0xf0, 0x0f, 0xff, 0x00, 0xf0, 0x0f, 0xff, 0x00, 0xf0, + 0x0f, 0xff, 0x00, 0xf0, 0x0f); + VLOAD_8(v0, 0x10, 0x10); + VCLEAR(v1); + asm volatile("vmseq.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(13, v1, 0x10, 0x10); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xffff, 0x0000, 0xf0f0, 0x0f0f, 0xffff, 0x0000, 0xf0f0, 0x0f0f, + 0xffff, 0x0000, 0xf0f0, 0x0f0f, 0xffff, 0x0000, 0xf0f0, 0x0f0f); + VLOAD_8(v0, 0x10, 0x10); + VCLEAR(v2); + asm volatile("vmseq.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(14, v2, 0x10, 0x10); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, + 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, 0x00000000, + 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, 0x00000000, 0xf0f0f0f0, + 0x0f0f0f0f); + VLOAD_8(v0, 0x10, 0x10); + VCLEAR(v4); + asm volatile("vmseq.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(15, v4, 0x10, 0x10); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f, 0xffffffffffffffff, 0x0000000000000000, + 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, 0xffffffffffffffff, + 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, + 0xffffffffffffffff, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f); + VLOAD_8(v0, 0x10, 0x10); + VCLEAR(v8); + asm volatile("vmseq.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(16, v8, 0x00, 0x00); +}; + +void TEST_CASE5(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x0f, 0x00, 0xf0, 0x0f, 0x0f, 0x00, 0xf0, 0x0f, 0x0f, 0x00, 0xf0, + 0x0f, 0x0f, 0x00, 0xf0, 0x0f); + VCLEAR(v1); + asm volatile("vmseq.vi v1, v2, 15"); + VSET(2, e8, m1); + VCMP_U8(17, v1, 0x99, 0x99); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x000f, 0x0000, 0xf0f0, 0x0f0f, 0x000f, 0x0000, 0xf0f0, 0x0f0f, + 0x000f, 0x0000, 0xf0f0, 0x0f0f, 0x000f, 0x0000, 0xf0f0, 0x0f0f); + VCLEAR(v2); + asm volatile("vmseq.vi v2, v4, 15"); + VSET(2, e8, m1); + VCMP_U8(18, v2, 0x11, 0x11); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x0000000f, 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0x0000000f, + 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0x0000000f, 0x00000000, + 0xf0f0f0f0, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0xf0f0f0f0, + 0x0f0f0f0f); + VCLEAR(v4); + asm volatile("vmseq.vi v4, v8, 15"); + VSET(2, e8, m1); + VCMP_U8(19, v4, 0x11, 0x11); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x000000000000000f, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f, 0x000000000000000f, 0x0000000000000000, + 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, 0x000000000000000f, + 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, + 0x000000000000000f, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f); + VCLEAR(v8); + asm volatile("vmseq.vi v8, v16, 15"); + VSET(2, e8, m8); + VCMP_U8(20, v8, 0x11, 0x11); +}; + +void TEST_CASE6(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x0f, 0x00, 0xf0, 0x0f, 0x0f, 0x00, 0xf0, 0x0f, 0x0f, 0x00, 0xf0, + 0x0f, 0x0f, 0x00, 0xf0, 0x0f); + VLOAD_8(v0, 0x10, 0x10); + VCLEAR(v1); + asm volatile("vmseq.vi v1, v2, 15, v0.t"); + VSET(2, e8, m1); + VCMP_U8(21, v1, 0x10, 0x10); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x000f, 0x0000, 0xf0f0, 0x0f0f, 0x000f, 0x0000, 0xf0f0, 0x0f0f, + 0x000f, 0x0000, 0xf0f0, 0x0f0f, 0x000f, 0x0000, 0xf0f0, 0x0f0f); + VLOAD_8(v0, 0x10, 0x10); + VCLEAR(v2); + asm volatile("vmseq.vi v2, v4, 15, v0.t"); + VSET(2, e8, m1); + VCMP_U8(22, v2, 0x10, 0x10); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x0000000f, 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0x0000000f, + 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0x0000000f, 0x00000000, + 0xf0f0f0f0, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0xf0f0f0f0, + 0x0f0f0f0f); + VLOAD_8(v0, 0x10, 0x10); + VCLEAR(v4); + asm volatile("vmseq.vi v4, v8, 15, v0.t"); + VSET(2, e8, m1); + VCMP_U8(23, v4, 0x10, 0x10); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x000000000000000f, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f, 0x000000000000000f, 0x0000000000000000, + 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, 0x000000000000000f, + 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, + 0x000000000000000f, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f); + VLOAD_8(v0, 0x10, 0x10); + VCLEAR(v8); + asm volatile("vmseq.vi v8, v16, 15, v0.t"); + VSET(2, e8, m8); + VCMP_U8(24, v8, 0x10, 0x10); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsgt.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsgt.c new file mode 100644 index 000000000..27107bc4e --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsgt.c @@ -0,0 +1,168 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + const uint64_t scalar = 40; + + VSET(16, e8, m1); + VLOAD_8(v2, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, + -25, 99); + VCLEAR(v1); + asm volatile("vmsgt.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(1, v1, 0x99, 0x99); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VCLEAR(v2); + asm volatile("vmsgt.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(2, v2, 0x99, 0x99); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VCLEAR(v4); + asm volatile("vmsgt.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(3, v4, 0x99, 0x99); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VCLEAR(v8); + asm volatile("vmsgt.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m8); + VCMP_U8(4, v8, 0x99, 0x99); +}; + +void TEST_CASE2(void) { + const uint64_t scalar = 40; + + VSET(16, e8, m1); + VLOAD_8(v2, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, + -25, 99); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v1); + asm volatile("vmsgt.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(5, v1, 0x88, 0x88); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v2); + asm volatile("vmsgt.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(6, v2, 0x88, 0x88); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v4); + asm volatile("vmsgt.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(7, v4, 0x88, 0x88); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v8); + asm volatile("vmsgt.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m8); + VCMP_U8(8, v8, 0x88, 0x88); +}; + +void TEST_CASE3(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, + -25, 99); + VCLEAR(v1); + asm volatile("vmsgt.vi v1, v2, 15"); + VSET(2, e8, m1); + VCMP_U8(9, v1, 0x99, 0x99); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VCLEAR(v2); + asm volatile("vmsgt.vi v2, v4, 15"); + VSET(2, e8, m1); + VCMP_U8(10, v2, 0x99, 0x99); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VCLEAR(v4); + asm volatile("vmsgt.vi v4, v8, 15"); + VSET(2, e8, m1); + VCMP_U8(11, v4, 0x99, 0x99); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VCLEAR(v8); + asm volatile("vmsgt.vi v8, v16, 15"); + VSET(2, e8, m8); + VCMP_U8(12, v8, 0x99, 0x99); +}; + +void TEST_CASE4(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, + -25, 99); + VLOAD_8(v0, 0x88, 0x88); + VCLEAR(v1); + asm volatile("vmsgt.vi v1, v2, 15, v0.t"); + VSET(2, e8, m1); + VCMP_U8(13, v1, 0x88, 0x88); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0x88, 0x88); + VCLEAR(v2); + asm volatile("vmsgt.vi v2, v4, 15, v0.t"); + VSET(2, e8, m1); + VCMP_U8(14, v2, 0x88, 0x88); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0x88, 0x88); + VCLEAR(v4); + asm volatile("vmsgt.vi v4, v8, 15, v0.t"); + VSET(2, e8, m1); + VCMP_U8(15, v4, 0x88, 0x88); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0x88, 0x88); + VCLEAR(v8); + asm volatile("vmsgt.vi v8, v16, 15, v0.t"); + VSET(2, e8, m8); + VCMP_U8(16, v8, 0x88, 0x88); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsgtu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsgtu.c new file mode 100644 index 000000000..d235f2ff1 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsgtu.c @@ -0,0 +1,168 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + const uint64_t scalar = 40; + + VSET(16, e8, m1); + VLOAD_8(v2, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, + 199); + VCLEAR(v1); + asm volatile("vmsgtu.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(1, v1, 0x99, 0x99); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VCLEAR(v2); + asm volatile("vmsgtu.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(2, v2, 0x99, 0x99); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VCLEAR(v4); + asm volatile("vmsgtu.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(3, v4, 0x99, 0x99); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VCLEAR(v8); + asm volatile("vmsgtu.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m8); + VCMP_U8(4, v8, 0x99, 0x99); +}; + +void TEST_CASE2(void) { + const uint64_t scalar = 40; + + VSET(16, e8, m1); + VLOAD_8(v2, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, + 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v1); + asm volatile("vmsgtu.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(5, v1, 0x88, 0x88); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v2); + asm volatile("vmsgtu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(6, v2, 0x88, 0x88); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v4); + asm volatile("vmsgtu.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(7, v4, 0x88, 0x88); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v8); + asm volatile("vmsgtu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m8); + VCMP_U8(8, v8, 0x88, 0x88); +}; + +void TEST_CASE3(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, + 199); + VCLEAR(v1); + asm volatile("vmsgtu.vi v1, v2, 15"); + VSET(2, e8, m1); + VCMP_U8(9, v1, 0xDD, 0xDD); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VCLEAR(v2); + asm volatile("vmsgtu.vi v2, v4, 15"); + VSET(2, e8, m1); + VCMP_U8(10, v2, 0xDD, 0xDD); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VCLEAR(v4); + asm volatile("vmsgtu.vi v4, v8, 15"); + VSET(2, e8, m1); + VCMP_U8(11, v4, 0xDD, 0xDD); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VCLEAR(v8); + asm volatile("vmsgtu.vi v8, v16, 15"); + VSET(2, e8, m8); + VCMP_U8(12, v8, 0xDD, 0xDD); +}; + +void TEST_CASE4(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, + 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v1); + asm volatile("vmsgtu.vi v1, v2, 15, v0.t"); + VSET(2, e8, m1); + VCMP_U8(13, v1, 0xCC, 0xCC); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v2); + asm volatile("vmsgtu.vi v2, v4, 15, v0.t"); + VSET(2, e8, m1); + VCMP_U8(14, v2, 0xCC, 0xCC); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v4); + asm volatile("vmsgtu.vi v4, v8, 15, v0.t"); + VSET(2, e8, m1); + VCMP_U8(15, v4, 0xCC, 0xCC); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v8); + asm volatile("vmsgtu.vi v8, v16, 15, v0.t"); + VSET(2, e8, m8); + VCMP_U8(16, v8, 0xCC, 0xCC); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsif.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsif.c new file mode 100644 index 000000000..9ea465b69 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsif.c @@ -0,0 +1,33 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(8, e8, m1); + VLOAD_8(v3, 8, 0, 0, 0, 0, 0, 0, 0); + __asm__ volatile("vmsif.m v2, v3"); + VCMP_U8(1, v2, 15, 0, 0, 0, 0, 0, 0, 0); +} + +void TEST_CASE2() { + VSET(8, e8, m1); + VLOAD_8(v3, 8, 0, 0, 0, 0, 0, 0, 0); + VLOAD_8(v0, 11, 0, 0, 0, 0, 0, 0, 0); + VCLEAR(v2); + __asm__ volatile("vmsif.m v2, v3, v0.t"); + VCMP_U8(2, v2, 11, 0, 0, 0, 0, 0, 0, 0); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + TEST_CASE1(); + TEST_CASE2(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsle.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsle.c new file mode 100644 index 000000000..fc57ec628 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsle.c @@ -0,0 +1,237 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e16, m2); + VLOAD_16(v4, 50, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_16(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + 50, 7000, 400, 19901); + VCLEAR(v2); + asm volatile("vmsle.vv v2, v4, v6"); + VSET(2, e8, m1); + VCMP_U8(1, v2, 0xAB, 0xAA); + + VSET(16, e32, m4); + VLOAD_32(v8, 50, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_32(v12, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VCLEAR(v4); + asm volatile("vmsle.vv v4, v8, v12"); + VSET(2, e8, m1); + VCMP_U8(2, v4, 0xAB, 0xAA); + + VSET(16, e64, m8); + VLOAD_64(v16, 50, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_64(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VCLEAR(v8); + asm volatile("vmsle.vv v8, v16, v24"); + VSET(2, e8, m8); + VCMP_U8(3, v8, 0xAB, 0xAA); +}; + +void TEST_CASE2(void) { + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_16(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v2); + asm volatile("vmsle.vv v2, v4, v6, v0.t"); + VSET(2, e8, m1); + VCMP_U8(4, v2, 0x88, 0x88); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_32(v12, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v4); + asm volatile("vmsle.vv v4, v8, v12, v0.t"); + VSET(2, e8, m1); + VCMP_U8(5, v4, 0x88, 0x88); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_64(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v8); + asm volatile("vmsle.vv v8, v16, v24, v0.t"); + VSET(2, e8, m8); + VCMP_U8(6, v8, 0x88, 0x88); +}; + +void TEST_CASE3(void) { + const uint64_t scalar = 40; + + VSET(16, e8, m1); + VLOAD_8(v2, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, + -25, 99); + VCLEAR(v1); + asm volatile("vmsle.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(7, v1, 0x66, 0x66); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VCLEAR(v2); + asm volatile("vmsle.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(8, v2, 0x66, 0x66); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VCLEAR(v4); + asm volatile("vmsle.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(9, v4, 0x66, 0x66); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VCLEAR(v8); + asm volatile("vmsle.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m8); + VCMP_U8(10, v8, 0x66, 0x66); +}; + +void TEST_CASE4(void) { + const uint64_t scalar = 40; + + VSET(16, e8, m1); + VLOAD_8(v2, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, + -25, 99); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v1); + asm volatile("vmsle.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(11, v1, 0x44, 0x44); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v2); + asm volatile("vmsle.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(12, v2, 0x44, 0x44); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v4); + asm volatile("vmsle.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(13, v4, 0x44, 0x44); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v8); + asm volatile("vmsle.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m8); + VCMP_U8(14, v8, 0x44, 0x44); +}; + +void TEST_CASE5(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, + -25, 99); + VCLEAR(v1); + asm volatile("vmsle.vi v1, v2, 15"); + VSET(2, e8, m1); + VCMP_U8(15, v1, 0x66, 0x66); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VCLEAR(v2); + asm volatile("vmsle.vi v2, v4, 15"); + VSET(2, e8, m1); + VCMP_U8(16, v2, 0x66, 0x66); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VCLEAR(v4); + asm volatile("vmsle.vi v4, v8, 15"); + VSET(2, e8, m1); + VCMP_U8(17, v4, 0x66, 0x66); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VCLEAR(v8); + asm volatile("vmsle.vi v8, v16, 15"); + VSET(2, e8, m8); + VCMP_U8(18, v8, 0x66, 0x66); +}; + +void TEST_CASE6(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, + -25, 99); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v1); + asm volatile("vmsle.vi v1, v2, 15, v0.t"); + VSET(2, e8, m1); + VCMP_U8(19, v1, 0x44, 0x44); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v2); + asm volatile("vmsle.vi v2, v4, 15, v0.t"); + VSET(2, e8, m1); + VCMP_U8(20, v2, 0x44, 0x44); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v4); + asm volatile("vmsle.vi v4, v8, 15, v0.t"); + VSET(2, e8, m1); + VCMP_U8(21, v4, 0x44, 0x44); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v8); + asm volatile("vmsle.vi v8, v16, 15, v0.t"); + VSET(2, e8, m8); + VCMP_U8(22, v8, 0x44, 0x44); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsleu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsleu.c new file mode 100644 index 000000000..af4f083e4 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsleu.c @@ -0,0 +1,237 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e16, m2); + VLOAD_16(v4, 50, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_16(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + 50, 7000, 400, 19901); + VCLEAR(v2); + asm volatile("vmsleu.vv v2, v4, v6"); + VSET(2, e8, m1); + VCMP_U8(1, v2, 0xAB, 0xAA); + + VSET(16, e32, m4); + VLOAD_32(v8, 50, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_32(v12, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VCLEAR(v4); + asm volatile("vmsleu.vv v4, v8, v12"); + VSET(2, e8, m1); + VCMP_U8(2, v4, 0xAB, 0xAA); + + VSET(16, e64, m8); + VLOAD_64(v16, 50, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_64(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VCLEAR(v8); + asm volatile("vmsleu.vv v8, v16, v24"); + VSET(2, e8, m8); + VCMP_U8(3, v8, 0xAB, 0xAA); +}; + +void TEST_CASE2(void) { + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_16(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v2); + asm volatile("vmsleu.vv v2, v4, v6, v0.t"); + VSET(2, e8, m1); + VCMP_U8(4, v2, 0x88, 0x88); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_32(v12, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v4); + asm volatile("vmsleu.vv v4, v8, v12, v0.t"); + VSET(2, e8, m1); + VCMP_U8(5, v4, 0x88, 0x88); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_64(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v8); + asm volatile("vmsleu.vv v8, v16, v24, v0.t"); + VSET(2, e8, m8); + VCMP_U8(6, v8, 0x88, 0x88); +}; + +void TEST_CASE3(void) { + const uint64_t scalar = 40; + + VSET(16, e8, m1); + VLOAD_8(v2, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, + 199); + VCLEAR(v1); + asm volatile("vmsleu.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(7, v1, 0x66, 0x66); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VCLEAR(v2); + asm volatile("vmsleu.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(8, v2, 0x66, 0x66); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VCLEAR(v4); + asm volatile("vmsleu.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(9, v4, 0x66, 0x66); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VCLEAR(v8); + asm volatile("vmsleu.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m8); + VCMP_U8(10, v8, 0x66, 0x66); +}; + +void TEST_CASE4(void) { + const uint64_t scalar = 40; + + VSET(16, e8, m1); + VLOAD_8(v2, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, + 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v1); + asm volatile("vmsleu.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(11, v1, 0x44, 0x44); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v2); + asm volatile("vmsleu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(12, v2, 0x44, 0x44); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v4); + asm volatile("vmsleu.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(13, v4, 0x44, 0x44); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v8); + asm volatile("vmsleu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m8); + VCMP_U8(14, v8, 0x44, 0x44); +}; + +void TEST_CASE5(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, + 199); + VCLEAR(v1); + asm volatile("vmsleu.vi v1, v2, 15"); + VSET(2, e8, m1); + VCMP_U8(15, v1, 0x22, 0x22); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VCLEAR(v2); + asm volatile("vmsleu.vi v2, v4, 15"); + VSET(2, e8, m1); + VCMP_U8(16, v2, 0x22, 0x22); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VCLEAR(v4); + asm volatile("vmsleu.vi v4, v8, 15"); + VSET(2, e8, m1); + VCMP_U8(17, v4, 0x22, 0x22); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VCLEAR(v8); + asm volatile("vmsleu.vi v8, v16, 15"); + VSET(2, e8, m8); + VCMP_U8(18, v8, 0x22, 0x22); +}; + +void TEST_CASE6(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, + 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v1); + asm volatile("vmsleu.vi v1, v2, 15, v0.t"); + VSET(2, e8, m1); + VCMP_U8(19, v1, 0x00, 0x00); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v2); + asm volatile("vmsleu.vi v2, v4, 15, v0.t"); + VSET(2, e8, m1); + VCMP_U8(20, v2, 0x00, 0x00); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v4); + asm volatile("vmsleu.vi v4, v8, 15, v0.t"); + VSET(2, e8, m1); + VCMP_U8(21, v4, 0x00, 0x00); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v8); + asm volatile("vmsleu.vi v8, v16, 15, v0.t"); + VSET(2, e8, m8); + VCMP_U8(22, v8, 0x00, 0x00); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmslt.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmslt.c new file mode 100644 index 000000000..749d90f75 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmslt.c @@ -0,0 +1,163 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_16(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + 50, 7000, 400, 19901); + VCLEAR(v2); + asm volatile("vmslt.vv v2, v4, v6"); + VSET(2, e8, m1); + VCMP_U8(1, v2, 0xAA, 0xAA); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_32(v12, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VCLEAR(v4); + asm volatile("vmslt.vv v4, v8, v12"); + VSET(2, e8, m1); + VCMP_U8(2, v4, 0xAA, 0xAA); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_64(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VCLEAR(v8); + asm volatile("vmslt.vv v8, v16, v24"); + VSET(2, e8, m8); + VCMP_U8(3, v8, 0xAA, 0xAA); +}; + +void TEST_CASE2(void) { + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_16(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v2); + asm volatile("vmslt.vv v2, v4, v6, v0.t"); + VSET(2, e8, m1); + VCMP_U8(4, v2, 0x88, 0x88); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_32(v12, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v4); + asm volatile("vmslt.vv v4, v8, v12, v0.t"); + VSET(2, e8, m1); + VCMP_U8(5, v4, 0x88, 0x88); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -80, 2560, -19900, 12345, -80, 2560, -19900, 12345, -80, + 2560, -19900, 12345, -80, 2560, -19900); + VLOAD_64(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v8); + asm volatile("vmslt.vv v8, v16, v24, v0.t"); + VSET(2, e8, m8); + VCMP_U8(6, v8, 0x88, 0x88); +}; + +void TEST_CASE3(void) { + const uint64_t scalar = 40; + + VSET(16, e8, m1); + VLOAD_8(v2, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, + -25, 99); + VCLEAR(v1); + asm volatile("vmslt.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(7, v1, 0x66, 0x66); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VCLEAR(v2); + asm volatile("vmslt.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(8, v2, 0x66, 0x66); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VCLEAR(v4); + asm volatile("vmslt.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(9, v4, 0x66, 0x66); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VCLEAR(v8); + asm volatile("vmslt.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m8); + VCMP_U8(10, v8, 0x66, 0x66); +}; + +void TEST_CASE4(void) { + const uint64_t scalar = 40; + + VSET(16, e8, m1); + VLOAD_8(v2, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, -25, 99, 123, -8, + -25, 99); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v1); + asm volatile("vmslt.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(11, v1, 0x44, 0x44); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v2); + asm volatile("vmslt.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(12, v2, 0x44, 0x44); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v4); + asm volatile("vmslt.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(13, v4, 0x44, 0x44); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, -8, -25, 199, 12345, -8, -25, 199, 12345, -8, -25, 199, + 12345, -8, -25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v8); + asm volatile("vmslt.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m8); + VCMP_U8(14, v8, 0x44, 0x44); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsltu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsltu.c new file mode 100644 index 000000000..6c9c7b314 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsltu.c @@ -0,0 +1,163 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_16(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + 50, 7000, 400, 19901); + VCLEAR(v2); + asm volatile("vmsltu.vv v2, v4, v6"); + VSET(2, e8, m1); + VCMP_U8(1, v2, 0xAA, 0xAA); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_32(v12, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VCLEAR(v4); + asm volatile("vmsltu.vv v4, v8, v12"); + VSET(2, e8, m1); + VCMP_U8(2, v4, 0xAA, 0xAA); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_64(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VCLEAR(v8); + asm volatile("vmsltu.vv v8, v16, v24"); + VSET(2, e8, m8); + VCMP_U8(3, v8, 0xAA, 0xAA); +}; + +void TEST_CASE2(void) { + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_16(v6, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, 19901, + 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v2); + asm volatile("vmsltu.vv v2, v4, v6, v0.t"); + VSET(2, e8, m1); + VCMP_U8(4, v2, 0x88, 0x88); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_32(v12, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v4); + asm volatile("vmsltu.vv v4, v8, v12, v0.t"); + VSET(2, e8, m1); + VCMP_U8(5, v4, 0x88, 0x88); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 80, 2560, 19900, 12345, 80, 2560, 19900, 12345, 80, 2560, + 19900, 12345, 80, 2560, 19900); + VLOAD_64(v24, 50, 7000, 400, 19901, 50, 7000, 400, 19901, 50, 7000, 400, + 19901, 50, 7000, 400, 19901); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v8); + asm volatile("vmsltu.vv v8, v16, v24, v0.t"); + VSET(2, e8, m8); + VCMP_U8(6, v8, 0x88, 0x88); +}; + +void TEST_CASE3(void) { + const uint64_t scalar = 40; + + VSET(16, e8, m1); + VLOAD_8(v2, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, + 199); + VCLEAR(v1); + asm volatile("vmsltu.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(7, v1, 0x66, 0x66); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VCLEAR(v2); + asm volatile("vmsltu.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(8, v2, 0x66, 0x66); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VCLEAR(v4); + asm volatile("vmsltu.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(9, v4, 0x66, 0x66); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VCLEAR(v8); + asm volatile("vmsltu.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m8); + VCMP_U8(10, v8, 0x66, 0x66); +}; + +void TEST_CASE4(void) { + const uint64_t scalar = 40; + + VSET(16, e8, m1); + VLOAD_8(v2, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, 199, 123, 8, 25, + 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v1); + asm volatile("vmsltu.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(11, v1, 0x44, 0x44); + + VSET(16, e16, m2); + VLOAD_16(v4, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v2); + asm volatile("vmsltu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(12, v2, 0x44, 0x44); + + VSET(16, e32, m4); + VLOAD_32(v8, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v4); + asm volatile("vmsltu.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(13, v4, 0x44, 0x44); + + VSET(16, e64, m8); + VLOAD_64(v16, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, 8, 25, 199, 12345, + 8, 25, 199); + VLOAD_8(v0, 0xCC, 0xCC); + VCLEAR(v8); + asm volatile("vmsltu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m8); + VCMP_U8(14, v8, 0x44, 0x44); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsne.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsne.c new file mode 100644 index 000000000..ddf5c0025 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsne.c @@ -0,0 +1,306 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0xff, 0x00, 0xf0, 0x0f, 0xff, 0x00, 0xf0, 0x0f, 0xff, 0x00, 0xf0, + 0x0f, 0xff, 0x00, 0xf0, 0x0f); + VLOAD_8(v3, 0xf2, 0x01, 0xf0, 0x0f, 0xf2, 0x01, 0xf0, 0x0f, 0xf2, 0x01, 0xf0, + 0x0f, 0xf2, 0x01, 0xf0, 0x0f); + VCLEAR(v1); + asm volatile("vmsne.vv v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(1, v1, 0x33, 0x33); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xffff, 0x0000, 0xf0f0, 0x0f0f, 0xffff, 0x0000, 0xf0f0, 0x0f0f, + 0xffff, 0x0000, 0xf0f0, 0x0f0f, 0xffff, 0x0000, 0xf0f0, 0x0f0f); + VLOAD_16(v8, 0xf2ff, 0x0100, 0xf0f0, 0x0f0f, 0xf2ff, 0x0100, 0xf0f0, 0x0f0f, + 0xf2ff, 0x0100, 0xf0f0, 0x0f0f, 0xf2ff, 0x0100, 0xf0f0, 0x0f0f); + VCLEAR(v2); + asm volatile("vmsne.vv v2, v4, v8"); + VSET(2, e8, m1); + VCMP_U8(2, v2, 0x33, 0x33); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, + 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, 0x00000000, + 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, 0x00000000, 0xf0f0f0f0, + 0x0f0f0f0f); + VLOAD_32(v12, 0xfff2ffff, 0x01000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xfff2ffff, + 0x01000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xfff2ffff, 0x01000000, + 0xf0f0f0f0, 0x0f0f0f0f, 0xfff2ffff, 0x01000000, 0xf0f0f0f0, + 0x0f0f0f0f); + VCLEAR(v4); + asm volatile("vmsne.vv v4, v8, v12"); + VSET(2, e8, m1); + VCMP_U8(3, v1, 0x33, 0x33); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f, 0xffffffffffffffff, 0x0000000000000000, + 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, 0xffffffffffffffff, + 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, + 0xffffffffffffffff, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f); + VLOAD_64(v24, 0xfff2ffffffffffff, 0x0100000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f, 0xfff2ffffffffffff, 0x0100000000000000, + 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, 0xfff2ffffffffffff, + 0x0100000000000000, 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, + 0xfff2ffffffffffff, 0x0100000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f); + VCLEAR(v8); + asm volatile("vmsne.vv v8, v16, v24"); + VSET(2, e8, m8); + VCMP_U8(4, v8, 0x33, 0x33); +}; + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0xff, 0x00, 0xf0, 0x0f, 0xff, 0x00, 0xf0, 0x0f, 0xff, 0x00, 0xf0, + 0x0f, 0xff, 0x00, 0xf0, 0x0f); + VLOAD_8(v3, 0xf2, 0x01, 0xf0, 0x0f, 0xf2, 0x01, 0xf0, 0x0f, 0xf2, 0x01, 0xf0, + 0x0f, 0xf2, 0x01, 0xf0, 0x0f); + VLOAD_8(v0, 0xaa, 0xaa); + VCLEAR(v1); + asm volatile("vmsne.vv v1, v2, v3, v0.t"); + VSET(2, e8, m1); + VCMP_U8(5, v1, 0x22, 0x22); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xffff, 0x0000, 0xf0f0, 0x0f0f, 0xffff, 0x0000, 0xf0f0, 0x0f0f, + 0xffff, 0x0000, 0xf0f0, 0x0f0f, 0xffff, 0x0000, 0xf0f0, 0x0f0f); + VLOAD_16(v6, 0xf2ff, 0x0100, 0xf0f0, 0x0f0f, 0xf2ff, 0x0100, 0xf0f0, 0x0f0f, + 0xf2ff, 0x0100, 0xf0f0, 0x0f0f, 0xf2ff, 0x0100, 0xf0f0, 0x0f0f); + VLOAD_8(v0, 0xaa, 0xaa); + VCLEAR(v2); + asm volatile("vmsne.vv v2, v4, v6, v0.t"); + VSET(2, e8, m1); + VCMP_U8(6, v2, 0x22, 0x22); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, + 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, 0x00000000, + 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, 0x00000000, 0xf0f0f0f0, + 0x0f0f0f0f); + VLOAD_32(v12, 0xfff2ffff, 0x01000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xfff2ffff, + 0x01000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xfff2ffff, 0x01000000, + 0xf0f0f0f0, 0x0f0f0f0f, 0xfff2ffff, 0x01000000, 0xf0f0f0f0, + 0x0f0f0f0f); + VLOAD_8(v0, 0xaa, 0xaa); + VCLEAR(v4); + asm volatile("vmsne.vv v4, v8, v12, v0.t"); + VSET(2, e8, m1); + VCMP_U8(7, v4, 0x22, 0x22); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f, 0xffffffffffffffff, 0x0000000000000000, + 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, 0xffffffffffffffff, + 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, + 0xffffffffffffffff, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f); + VLOAD_64(v24, 0xfff2ffffffffffff, 0x0100000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f, 0xfff2ffffffffffff, 0x0100000000000000, + 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, 0xfff2ffffffffffff, + 0x0100000000000000, 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, + 0xfff2ffffffffffff, 0x0100000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f); + VLOAD_8(v0, 0xaa, 0xaa); + VCLEAR(v8); + asm volatile("vmsne.vv v8, v16, v24, v0.t"); + VSET(2, e8, m8); + VCMP_U8(8, v8, 0x22, 0x22); +}; + +void TEST_CASE3(void) { + const uint64_t scalar = 0x00000000ffffffff; + + VSET(16, e8, m1); + VLOAD_8(v2, 0xff, 0x00, 0xf0, 0x0f, 0xff, 0x00, 0xf0, 0x0f, 0xff, 0x00, 0xf0, + 0x0f, 0xff, 0x00, 0xf0, 0x0f); + VCLEAR(v1); + asm volatile("vmsne.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(9, v1, 0xee, 0xee); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xffff, 0x0000, 0xf0f0, 0x0f0f, 0xffff, 0x0000, 0xf0f0, 0x0f0f, + 0xffff, 0x0000, 0xf0f0, 0x0f0f, 0xffff, 0x0000, 0xf0f0, 0x0f0f); + VCLEAR(v2); + asm volatile("vmsne.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(10, v2, 0xee, 0xee); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, + 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, 0x00000000, + 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, 0x00000000, 0xf0f0f0f0, + 0x0f0f0f0f); + VCLEAR(v4); + asm volatile("vmsne.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(11, v4, 0xee, 0xee); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f, 0xffffffffffffffff, 0x0000000000000000, + 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, 0xffffffffffffffff, + 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, + 0xffffffffffffffff, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f); + VCLEAR(v8); + asm volatile("vmsne.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VSET(2, e8, m8); + VCMP_U8(12, v8, 0xff, 0xff); +}; + +void TEST_CASE4(void) { + const uint64_t scalar = 0x00000000ffffffff; + + VSET(16, e8, m1); + VLOAD_8(v2, 0xff, 0x00, 0xf0, 0x0f, 0xff, 0x00, 0xf0, 0x0f, 0xff, 0x00, 0xf0, + 0x0f, 0xff, 0x00, 0xf0, 0x0f); + VLOAD_8(v0, 0x10, 0x10); + VCLEAR(v1); + asm volatile("vmsne.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(13, v1, 0x00, 0x00); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xffff, 0x0000, 0xf0f0, 0x0f0f, 0xffff, 0x0000, 0xf0f0, 0x0f0f, + 0xffff, 0x0000, 0xf0f0, 0x0f0f, 0xffff, 0x0000, 0xf0f0, 0x0f0f); + VLOAD_8(v0, 0x10, 0x10); + VCLEAR(v2); + asm volatile("vmsne.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(14, v2, 0x00, 0x00); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, + 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, 0x00000000, + 0xf0f0f0f0, 0x0f0f0f0f, 0xffffffff, 0x00000000, 0xf0f0f0f0, + 0x0f0f0f0f); + VLOAD_8(v0, 0x10, 0x10); + VCLEAR(v4); + asm volatile("vmsne.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m1); + VCMP_U8(15, v4, 0x00, 0x00); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f, 0xffffffffffffffff, 0x0000000000000000, + 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, 0xffffffffffffffff, + 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, + 0xffffffffffffffff, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f); + VLOAD_8(v0, 0x10, 0x10); + VCLEAR(v8); + asm volatile("vmsne.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VSET(2, e8, m8); + VCMP_U8(16, v8, 0x10, 0x10); +}; + +void TEST_CASE5(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x0f, 0x00, 0xf0, 0x0f, 0x0f, 0x00, 0xf0, 0x0f, 0x0f, 0x00, 0xf0, + 0x0f, 0x0f, 0x00, 0xf0, 0x0f); + VCLEAR(v1); + asm volatile("vmsne.vi v1, v2, 15"); + VSET(2, e8, m1); + VCMP_U8(17, v1, 0x66, 0x66); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x000f, 0x0000, 0xf0f0, 0x0f0f, 0x000f, 0x0000, 0xf0f0, 0x0f0f, + 0x000f, 0x0000, 0xf0f0, 0x0f0f, 0x000f, 0x0000, 0xf0f0, 0x0f0f); + VCLEAR(v2); + asm volatile("vmsne.vi v2, v4, 15"); + VSET(2, e8, m1); + VCMP_U8(18, v2, 0xee, 0xee); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x0000000f, 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0x0000000f, + 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0x0000000f, 0x00000000, + 0xf0f0f0f0, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0xf0f0f0f0, + 0x0f0f0f0f); + VCLEAR(v4); + asm volatile("vmsne.vi v4, v8, 15"); + VSET(2, e8, m1); + VCMP_U8(19, v4, 0xee, 0xee); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x000000000000000f, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f, 0x000000000000000f, 0x0000000000000000, + 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, 0x000000000000000f, + 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, + 0x000000000000000f, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f); + VCLEAR(v8); + asm volatile("vmsne.vi v8, v16, 15"); + VSET(2, e8, m8); + VCMP_U8(20, v8, 0xee, 0xee); +}; + +void TEST_CASE6(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x0f, 0x00, 0xf0, 0x0f, 0x0f, 0x00, 0xf0, 0x0f, 0x0f, 0x00, 0xf0, + 0x0f, 0x0f, 0x00, 0xf0, 0x0f); + VLOAD_8(v0, 0x10, 0x10); + VCLEAR(v1); + asm volatile("vmsne.vi v1, v2, 15, v0.t"); + VSET(2, e8, m1); + VCMP_U8(21, v1, 0x00, 0x00); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x000f, 0x0000, 0xf0f0, 0x0f0f, 0x000f, 0x0000, 0xf0f0, 0x0f0f, + 0x000f, 0x0000, 0xf0f0, 0x0f0f, 0x000f, 0x0000, 0xf0f0, 0x0f0f); + VLOAD_8(v0, 0x10, 0x10); + VCLEAR(v2); + asm volatile("vmsne.vi v2, v4, 15, v0.t"); + VSET(2, e8, m1); + VCMP_U8(22, v2, 0x00, 0x00); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x0000000f, 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0x0000000f, + 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f, 0x0000000f, 0x00000000, + 0xf0f0f0f0, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0xf0f0f0f0, + 0x0f0f0f0f); + VLOAD_8(v0, 0x10, 0x10); + VCLEAR(v4); + asm volatile("vmsne.vi v4, v8, 15, v0.t"); + VSET(2, e8, m1); + VCMP_U8(23, v4, 0x00, 0x00); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x000000000000000f, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f, 0x000000000000000f, 0x0000000000000000, + 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, 0x000000000000000f, + 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, 0x0f0f0f0f0f0f0f0f, + 0x000000000000000f, 0x0000000000000000, 0xf0f0f0f0f0f0f0f0, + 0x0f0f0f0f0f0f0f0f); + VLOAD_8(v0, 0x10, 0x10); + VCLEAR(v8); + asm volatile("vmsne.vi v8, v16, 15, v0.t"); + VSET(2, e8, m8); + VCMP_U8(24, v8, 0x00, 0x00); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsof.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsof.c new file mode 100644 index 000000000..b5dc5aae1 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmsof.c @@ -0,0 +1,33 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v3, 8, 0, 0, 0, 0, 0, 0, 0); + __asm__ volatile("vmsof.m v2, v3"); + VCMP_U8(1, v2, 8, 0, 0, 0, 0, 0, 0, 0); +} + +void TEST_CASE2() { + VSET(16, e8, m1); + VLOAD_8(v3, 0, 0, 0, 1, 0, 0, 0, 0); + VLOAD_8(v0, 3, 0, 0, 0, 0, 0, 0, 0); + VCLEAR(v2); + __asm__ volatile("vmsof.m v2, v3, v0.t"); + VCMP_U8(2, v2, 0, 0, 0, 0, 0, 0, 0, 0); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + TEST_CASE1(); + TEST_CASE2(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmul.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmul.c new file mode 100644 index 000000000..0ed9ce5cd --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmul.c @@ -0,0 +1,232 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0xb3, 0x5d, 0x3d, 0xa4, 0xbf, 0xc7, 0x6b, 0x95, 0xf9, 0x64, 0x52, + 0x57, 0xbc, 0x1f, 0xd5, 0x13); + VLOAD_8(v3, 0x46, 0x37, 0xf5, 0x2b, 0x55, 0x05, 0xcb, 0x76, 0x31, 0x30, 0x78, + 0xb3, 0x6a, 0xae, 0x5a, 0xe1); + asm volatile("vmul.vv v1, v2, v3"); + VCMP_I8(1, v1, 0xf2, 0xfb, 0x61, 0x8c, 0x6b, 0xe3, 0xd9, 0xae, 0xa9, 0xc0, + 0x70, 0xd5, 0xd8, 0x12, 0xe2, 0xb3); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x8132, 0x94b5, 0x245c, 0xd15d, 0xbfca, 0x18b6, 0xd5ba, 0x9299, + 0xccbd, 0x9ad4, 0xce46, 0xfbba, 0x10cc, 0xc463, 0x5298, 0x7b3e); + VLOAD_16(v6, 0xede6, 0x010b, 0xa570, 0x21c5, 0xfe5a, 0x5386, 0x16c9, 0x45fb, + 0x1439, 0x436f, 0x6f56, 0x90f7, 0x77c0, 0x0751, 0x64c3, 0x36e8); + asm volatile("vmul.vv v2, v4, v6"); + VCMP_I16(2, v2, 0x5cec, 0x18c7, 0x3440, 0x1991, 0xd904, 0xf144, 0xcb0a, + 0xf903, 0x5a15, 0x9dec, 0xa584, 0x8076, 0x6d00, 0xd853, 0x49c8, + 0xc430); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x9c2bdc58, 0xe3995899, 0xbbbc0eda, 0x0729ff92, 0xa757a2c5, + 0xd5f3a23e, 0x9a295390, 0xb2367b2e, 0xfee5b6a2, 0x07cb59c1, + 0x6bf5cf9a, 0x7d75506b, 0x013c1e90, 0x600f9ca8, 0x6d4d0522, + 0x0a225ff0); + VLOAD_32(v12, 0xc51e02f8, 0xae06b334, 0x397b1ec7, 0xc46f34fe, 0x4f9db2ab, + 0x957c2534, 0x70f8e127, 0xa79ebcec, 0x0a542044, 0x20e6ac3e, + 0xd61caed7, 0x6f4e7820, 0x27c56901, 0x0aaf1d61, 0xa95c6f5c, + 0x5b7aedf3); + asm volatile("vmul.vv v4, v8, v12"); + VCMP_I32(3, v4, 0x2c862540, 0x85aefa14, 0xa5ab1776, 0x3be33adc, 0x5487b397, + 0x57f7ea98, 0xc23d4af0, 0x55135668, 0xad00c308, 0x46f368be, + 0x2f640656, 0x91f63560, 0x1e952e90, 0xd18163a8, 0xf71f9638, + 0x79d240d0); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x077de9270ce00632, 0x2dadf4e020f3d47a, 0xb54ca84f9fe0573b, + 0x7be639dfdb02db6f, 0x61bb44569da93eff, 0xcd7f973ce822182b, + 0x5434a22e7432397c, 0xcaadfd89d8dd1ad5, 0x5474c56d9089d672, + 0x700e415c07b99bf9, 0xb89d409d4323a9c8, 0x68ccc7411db0ab09, + 0xdf4fe3fa4e113e98, 0xa98a2e2575b04c41, 0x26ddf248ccb4a7aa, + 0xdda1822d4972ee47); + VLOAD_64(v24, 0xcd2888c8bb07b736, 0xf20013627ac47d4f, 0xc05dbcd989ef329c, + 0xc272db2ebcf7cfad, 0x8869302db041176d, 0xd3d90968a9ee01c2, + 0x9cdb9f91a3381f51, 0x99ad38b70907ee8d, 0xf7d629b266c67cf1, + 0x706f9b996cdd60f2, 0x4caa2335622bd6a0, 0x94171c9dfbbb186f, + 0x7b6e42290f54ecc6, 0xa545b8670143bfbc, 0x9f430bf94b2805c9, + 0xb45030fc2b4cef12); + asm volatile("vmul.vv v8, v16, v24"); + VCMP_I64(4, v8, 0x90d27e278d0d0c8c, 0x5ea9d3e60b6623a6, 0x6823b3e240d3adf4, + 0xc0dcea378c760b03, 0x17692726a477bb93, 0x784c7f2ee6e87b96, + 0xd1aae9975ffa343c, 0xfdcd46ca398ccd51, 0x405f01791dce1952, + 0x16063fbe99e7d162, 0xc9d244cddacf4d00, 0x22024848323600e7, + 0xb6dfea3bb8ea8990, 0x566db9e82c5f7ebc, 0x5fc0f2db41adf67a, + 0xdaab68ca209d09fe); +}; + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0xb3, 0x5d, 0x3d, 0xa4, 0xbf, 0xc7, 0x6b, 0x95, 0xf9, 0x64, 0x52, + 0x57, 0xbc, 0x1f, 0xd5, 0x13); + VLOAD_8(v3, 0x46, 0x37, 0xf5, 0x2b, 0x55, 0x05, 0xcb, 0x76, 0x31, 0x30, 0x78, + 0xb3, 0x6a, 0xae, 0x5a, 0xe1); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v1); + asm volatile("vmul.vv v1, v2, v3, v0.t"); + VCMP_I8(5, v1, 0, 0xfb, 0, 0x8c, 0, 0xe3, 0, 0xae, 0, 0xc0, 0, 0xd5, 0, 0x12, + 0, 0xb3); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x8132, 0x94b5, 0x245c, 0xd15d, 0xbfca, 0x18b6, 0xd5ba, 0x9299, + 0xccbd, 0x9ad4, 0xce46, 0xfbba, 0x10cc, 0xc463, 0x5298, 0x7b3e); + VLOAD_16(v6, 0xede6, 0x010b, 0xa570, 0x21c5, 0xfe5a, 0x5386, 0x16c9, 0x45fb, + 0x1439, 0x436f, 0x6f56, 0x90f7, 0x77c0, 0x0751, 0x64c3, 0x36e8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmul.vv v2, v4, v6, v0.t"); + VCMP_I16(6, v2, 0, 0x18c7, 0, 0x1991, 0, 0xf144, 0, 0xf903, 0, 0x9dec, 0, + 0x8076, 0, 0xd853, 0, 0xc430); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x9c2bdc58, 0xe3995899, 0xbbbc0eda, 0x0729ff92, 0xa757a2c5, + 0xd5f3a23e, 0x9a295390, 0xb2367b2e, 0xfee5b6a2, 0x07cb59c1, + 0x6bf5cf9a, 0x7d75506b, 0x013c1e90, 0x600f9ca8, 0x6d4d0522, + 0x0a225ff0); + VLOAD_32(v12, 0xc51e02f8, 0xae06b334, 0x397b1ec7, 0xc46f34fe, 0x4f9db2ab, + 0x957c2534, 0x70f8e127, 0xa79ebcec, 0x0a542044, 0x20e6ac3e, + 0xd61caed7, 0x6f4e7820, 0x27c56901, 0x0aaf1d61, 0xa95c6f5c, + 0x5b7aedf3); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmul.vv v4, v8, v12, v0.t"); + VCMP_I32(7, v4, 0, 0x85aefa14, 0, 0x3be33adc, 0, 0x57f7ea98, 0, 0x55135668, 0, + 0x46f368be, 0, 0x91f63560, 0, 0xd18163a8, 0, 0x79d240d0); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x077de9270ce00632, 0x2dadf4e020f3d47a, 0xb54ca84f9fe0573b, + 0x7be639dfdb02db6f, 0x61bb44569da93eff, 0xcd7f973ce822182b, + 0x5434a22e7432397c, 0xcaadfd89d8dd1ad5, 0x5474c56d9089d672, + 0x700e415c07b99bf9, 0xb89d409d4323a9c8, 0x68ccc7411db0ab09, + 0xdf4fe3fa4e113e98, 0xa98a2e2575b04c41, 0x26ddf248ccb4a7aa, + 0xdda1822d4972ee47); + VLOAD_64(v24, 0xcd2888c8bb07b736, 0xf20013627ac47d4f, 0xc05dbcd989ef329c, + 0xc272db2ebcf7cfad, 0x8869302db041176d, 0xd3d90968a9ee01c2, + 0x9cdb9f91a3381f51, 0x99ad38b70907ee8d, 0xf7d629b266c67cf1, + 0x706f9b996cdd60f2, 0x4caa2335622bd6a0, 0x94171c9dfbbb186f, + 0x7b6e42290f54ecc6, 0xa545b8670143bfbc, 0x9f430bf94b2805c9, + 0xb45030fc2b4cef12); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmul.vv v8, v16, v24, v0.t"); + VCMP_I64(8, v8, 0, 0x5ea9d3e60b6623a6, 0, 0xc0dcea378c760b03, 0, + 0x784c7f2ee6e87b96, 0, 0xfdcd46ca398ccd51, 0, 0x16063fbe99e7d162, 0, + 0x22024848323600e7, 0, 0x566db9e82c5f7ebc, 0, 0xdaab68ca209d09fe); +}; + +void TEST_CASE3(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x80, 0xb2, 0xb6, 0xd7, 0x4f, 0xbe, 0xee, 0x53, 0xab, 0x57, 0xe4, + 0x28, 0x6a, 0x91, 0x14, 0x4f); + int64_t scalar = 5; + asm volatile("vmul.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_I8(9, v1, 0x80, 0x7a, 0x8e, 0x33, 0x8b, 0xb6, 0xa6, 0x9f, 0x57, 0xb3, + 0x74, 0xc8, 0x12, 0xd5, 0x64, 0x8b); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xbab0, 0x83a5, 0x06b6, 0x22ba, 0x91b8, 0x7720, 0xc2c7, 0x3494, + 0xd281, 0x6d38, 0x378d, 0xa91d, 0xd731, 0xa4c7, 0x4d8f, 0x2422); + scalar = -5383; + asm volatile("vmul.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VCMP_I16(10, v2, 0x7530, 0xdd7d, 0xe306, 0xcaea, 0xebf8, 0x1e20, 0x598f, + 0x6bf4, 0xa979, 0x6b78, 0xea25, 0xff35, 0x18a9, 0x2b8f, 0x2617, + 0x3912); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x8bb4a8bc, 0x9799b344, 0xcd8c1672, 0xeb2d7c0f, 0x55474d7d, + 0x3dae9eaf, 0xc19a3519, 0x6922f03c, 0x42edfa01, 0x1f60b344, + 0x82f31d5e, 0x0faa2e5c, 0x74e95cfa, 0x9fcdae3b, 0xe6c4e0a0, + 0x45549cbc); + scalar = 6474219; + asm volatile("vmul.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VCMP_I32(11, v4, 0xaf4a8094, 0x77dff36c, 0x44dc1ca6, 0x16e6a8c5, 0xee2546bf, + 0x78e111a5, 0x1fd15ef3, 0xe8a9a314, 0xfe2147eb, 0x5a8cf36c, + 0x5536c34a, 0xbed6ca74, 0x23eca37e, 0xe2314329, 0x6857d2e0, + 0x13b37c94); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xc238e0a3df21299c, 0xb642655c3ab064d5, 0xd19f84bab77e5602, + 0x4e6e3c114a19f160, 0xfd403cbcc59407a1, 0xef3e81a68ae0e48c, + 0xd93a7b1ab54d024e, 0x5f7460aa9f4c4920, 0x4c91150cd4b54f60, + 0x18f2a6528629633f, 0x201b8bdb3c140400, 0x6be03c1074d46ada, + 0xcd0e6874555602d4, 0xb70264bd366ff52f, 0xc0b1fa64cec9368d, + 0x13e86249a0235941); + scalar = -598189234597999223; + asm volatile("vmul.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I64(12, v8, 0x61ead1213f09307c, 0x7d03f4c84c5e86fd, 0x4aa0acc4e01fa112, + 0x77bc957fdeec0c60, 0x762b14c112e60229, 0xbac65562e2366aec, + 0xcc243dd1e80ab1be, 0xa871135122a1c220, 0x3d0db00992575a60, + 0x8b07763affcd8fb7, 0xc8377fc475ea2400, 0xd8827c839711c0aa, + 0x5a5779ec4f334774, 0x243fd844e74ed927, 0x49204b7602871a75, + 0x33ae5ea6d335d0c9); +}; + +void TEST_CASE4(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x80, 0xb2, 0xb6, 0xd7, 0x4f, 0xbe, 0xee, 0x53, 0xab, 0x57, 0xe4, + 0x28, 0x6a, 0x91, 0x14, 0x4f); + int64_t scalar = 5; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v1); + asm volatile("vmul.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I8(13, v1, 0, 0x7a, 0, 0x33, 0, 0xb6, 0, 0x9f, 0, 0xb3, 0, 0xc8, 0, 0xd5, + 0, 0x8b); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xbab0, 0x83a5, 0x06b6, 0x22ba, 0x91b8, 0x7720, 0xc2c7, 0x3494, + 0xd281, 0x6d38, 0x378d, 0xa91d, 0xd731, 0xa4c7, 0x4d8f, 0x2422); + scalar = -5383; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmul.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I16(14, v2, 0, 0xdd7d, 0, 0xcaea, 0, 0x1e20, 0, 0x6bf4, 0, 0x6b78, 0, + 0xff35, 0, 0x2b8f, 0, 0x3912); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x8bb4a8bc, 0x9799b344, 0xcd8c1672, 0xeb2d7c0f, 0x55474d7d, + 0x3dae9eaf, 0xc19a3519, 0x6922f03c, 0x42edfa01, 0x1f60b344, + 0x82f31d5e, 0x0faa2e5c, 0x74e95cfa, 0x9fcdae3b, 0xe6c4e0a0, + 0x45549cbc); + scalar = 6474219; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmul.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I32(15, v4, 0, 0x77dff36c, 0, 0x16e6a8c5, 0, 0x78e111a5, 0, 0xe8a9a314, + 0, 0x5a8cf36c, 0, 0xbed6ca74, 0, 0xe2314329, 0, 0x13b37c94); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xc238e0a3df21299c, 0xb642655c3ab064d5, 0xd19f84bab77e5602, + 0x4e6e3c114a19f160, 0xfd403cbcc59407a1, 0xef3e81a68ae0e48c, + 0xd93a7b1ab54d024e, 0x5f7460aa9f4c4920, 0x4c91150cd4b54f60, + 0x18f2a6528629633f, 0x201b8bdb3c140400, 0x6be03c1074d46ada, + 0xcd0e6874555602d4, 0xb70264bd366ff52f, 0xc0b1fa64cec9368d, + 0x13e86249a0235941); + scalar = -598189234597999223; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmul.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I64(16, v8, 0, 0x7d03f4c84c5e86fd, 0, 0x77bc957fdeec0c60, 0, + 0xbac65562e2366aec, 0, 0xa871135122a1c220, 0, 0x8b07763affcd8fb7, 0, + 0xd8827c839711c0aa, 0, 0x243fd844e74ed927, 0, 0x33ae5ea6d335d0c9); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmulh.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmulh.c new file mode 100644 index 000000000..5ee5b94ef --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmulh.c @@ -0,0 +1,232 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0xc2, 0xf6, 0xad, 0x0f, 0xc6, 0xeb, 0xca, 0xf3, 0xf3, 0xd9, 0xf4, + 0xf6, 0x27, 0x57, 0x4f, 0xef); + VLOAD_8(v3, 0xf9, 0x0c, 0xa8, 0x05, 0x23, 0xff, 0x48, 0x74, 0xd4, 0x6b, 0x5b, + 0x07, 0x8b, 0x2e, 0x9e, 0x5f); + asm volatile("vmulh.vv v1, v2, v3"); + VCMP_I8(1, v1, 0x01, 0xff, 0x1c, 0x00, 0xf8, 0x00, 0xf0, 0xfa, 0x02, 0xef, + 0xfb, 0xff, 0xee, 0x0f, 0xe1, 0xf9); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x911a, 0x9f44, 0x3d2a, 0xa2a8, 0x5aae, 0x0231, 0x56fa, 0xb8b8, + 0x55df, 0x8a78, 0x413c, 0xeb32, 0x6bc4, 0x3e47, 0x3d79, 0x2c8f); + VLOAD_16(v6, 0x89fd, 0x6bb7, 0x4a94, 0x770c, 0x7c87, 0x8b01, 0xbb6a, 0x900d, + 0xb589, 0x709e, 0xc75d, 0xafa5, 0x4fd5, 0x2d77, 0x8dbf, 0x3a0a); + asm volatile("vmulh.vv v2, v4, v6"); + VCMP_I16(2, v2, 0x331f, 0xd74c, 0x11d1, 0xd497, 0x2c1c, 0xfeff, 0xe8b2, + 0x1f2b, 0xe705, 0xcc4b, 0xf191, 0x0687, 0x219b, 0x0b0f, 0xe490, + 0x0a1a); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xc66253f4, 0x710c314c, 0xa6fe579b, 0xa7947b70, 0xbf94259f, + 0x211088fe, 0x64bfd390, 0x1d49c8d8, 0x7a12a08a, 0x190ee9ae, + 0x361172f8, 0x52457515, 0x05d4b17b, 0x7bb6d43b, 0x96270cc7, + 0x62d35f88); + VLOAD_32(v12, 0xd14a266e, 0xe4f43ca5, 0x1c067312, 0xa1909d51, 0x35b8d1aa, + 0xdcd3e2ea, 0x05cec46d, 0xbe70ebd4, 0xe15e49c5, 0x81be068b, + 0x49fd9ad8, 0x6c2a5abd, 0x26216dd6, 0x9e3188ac, 0x14af13c4, + 0xd98c6d7f); + asm volatile("vmulh.vv v4, v8, v12"); + VCMP_I32(3, v4, 0x0a83425c, 0xf40e8502, 0xf6419389, 0x209df360, 0xf27b2982, + 0xfb750aac, 0x02491ecb, 0xf87fe57b, 0xf164b493, 0xf3a433c8, + 0x0fa089bb, 0x22c2e9f3, 0x00de5543, 0xd0bbf2cc, 0xf772a985, + 0xf128024f); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x9def74822cdf1a42, 0x29307e854b225449, 0x071cdf51785d150e, + 0xe8ced2e9009d363f, 0xa88c741be4e81893, 0x4a7655ec12afe587, + 0x50c5efa017138cb9, 0x88e076b6ef49619d, 0x5745683769adf333, + 0x5b3b01f4b1c4fd42, 0x8a3d55e48864d144, 0x2eac97fae4174cac, + 0xdb8804ccf6f55686, 0xf7bea87bac575241, 0x250ed7ddade1432d, + 0x06ae542295f32453); + VLOAD_64(v24, 0xc5c41e47b5f3de5f, 0xa03833fb95a7e7e2, 0x74f0573dba05b058, + 0x687968e9ba2a98ad, 0x29f4aaf3e5e4f2b6, 0x2c40a650d53f6f08, + 0x491da2c816388b78, 0x2822d8207421ec15, 0x5dd8d394b292512a, + 0x4169844eea56920d, 0x97183b6e1e85fd70, 0x224077bf8899614c, + 0x3a9c0520417d4f32, 0xee47b09a33f49fca, 0x3f9f1140fbd02e0a, + 0x6106ad88eabfc3e2); + asm volatile("vmulh.vv v8, v16, v24"); + VCMP_I64(4, v8, 0x164eafe1cab0639c, 0xf096db86d4d06824, 0x033fc2aecddc0dd7, + 0xf68905ef31703000, 0xf1aaea11162383ae, 0x0cdf24ba4cf3be38, + 0x1711cb1d2f008de9, 0xed52dbcaa3de5ca2, 0x1ffe218cf60b6bf9, + 0x174f95d97aff7bf9, 0x3041b22ecc97909a, 0x063ead2a7756c9da, + 0xf7a6973e6c9e5ce6, 0x0092485623082173, 0x0935add704f8cec8, + 0x02883a7e75391040); +}; + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0xc2, 0xf6, 0xad, 0x0f, 0xc6, 0xeb, 0xca, 0xf3, 0xf3, 0xd9, 0xf4, + 0xf6, 0x27, 0x57, 0x4f, 0xef); + VLOAD_8(v3, 0xf9, 0x0c, 0xa8, 0x05, 0x23, 0xff, 0x48, 0x74, 0xd4, 0x6b, 0x5b, + 0x07, 0x8b, 0x2e, 0x9e, 0x5f); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v1); + asm volatile("vmulh.vv v1, v2, v3, v0.t"); + VCMP_I8(5, v1, 0, 0xff, 0, 0x00, 0, 0x00, 0, 0xfa, 0, 0xef, 0, 0xff, 0, 0x0f, + 0, 0xf9); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x911a, 0x9f44, 0x3d2a, 0xa2a8, 0x5aae, 0x0231, 0x56fa, 0xb8b8, + 0x55df, 0x8a78, 0x413c, 0xeb32, 0x6bc4, 0x3e47, 0x3d79, 0x2c8f); + VLOAD_16(v6, 0x89fd, 0x6bb7, 0x4a94, 0x770c, 0x7c87, 0x8b01, 0xbb6a, 0x900d, + 0xb589, 0x709e, 0xc75d, 0xafa5, 0x4fd5, 0x2d77, 0x8dbf, 0x3a0a); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmulh.vv v2, v4, v6, v0.t"); + VCMP_I16(6, v2, 0, 0xd74c, 0, 0xd497, 0, 0xfeff, 0, 0x1f2b, 0, 0xcc4b, 0, + 0x0687, 0, 0x0b0f, 0, 0x0a1a); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xc66253f4, 0x710c314c, 0xa6fe579b, 0xa7947b70, 0xbf94259f, + 0x211088fe, 0x64bfd390, 0x1d49c8d8, 0x7a12a08a, 0x190ee9ae, + 0x361172f8, 0x52457515, 0x05d4b17b, 0x7bb6d43b, 0x96270cc7, + 0x62d35f88); + VLOAD_32(v12, 0xd14a266e, 0xe4f43ca5, 0x1c067312, 0xa1909d51, 0x35b8d1aa, + 0xdcd3e2ea, 0x05cec46d, 0xbe70ebd4, 0xe15e49c5, 0x81be068b, + 0x49fd9ad8, 0x6c2a5abd, 0x26216dd6, 0x9e3188ac, 0x14af13c4, + 0xd98c6d7f); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmulh.vv v4, v8, v12, v0.t"); + VCMP_I32(7, v4, 0, 0xf40e8502, 0, 0x209df360, 0, 0xfb750aac, 0, 0xf87fe57b, 0, + 0xf3a433c8, 0, 0x22c2e9f3, 0, 0xd0bbf2cc, 0, 0xf128024f); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x9def74822cdf1a42, 0x29307e854b225449, 0x071cdf51785d150e, + 0xe8ced2e9009d363f, 0xa88c741be4e81893, 0x4a7655ec12afe587, + 0x50c5efa017138cb9, 0x88e076b6ef49619d, 0x5745683769adf333, + 0x5b3b01f4b1c4fd42, 0x8a3d55e48864d144, 0x2eac97fae4174cac, + 0xdb8804ccf6f55686, 0xf7bea87bac575241, 0x250ed7ddade1432d, + 0x06ae542295f32453); + VLOAD_64(v24, 0xc5c41e47b5f3de5f, 0xa03833fb95a7e7e2, 0x74f0573dba05b058, + 0x687968e9ba2a98ad, 0x29f4aaf3e5e4f2b6, 0x2c40a650d53f6f08, + 0x491da2c816388b78, 0x2822d8207421ec15, 0x5dd8d394b292512a, + 0x4169844eea56920d, 0x97183b6e1e85fd70, 0x224077bf8899614c, + 0x3a9c0520417d4f32, 0xee47b09a33f49fca, 0x3f9f1140fbd02e0a, + 0x6106ad88eabfc3e2); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmulh.vv v8, v16, v24, v0.t"); + VCMP_I64(8, v8, 0, 0xf096db86d4d06824, 0, 0xf68905ef31703000, 0, + 0x0cdf24ba4cf3be38, 0, 0xed52dbcaa3de5ca2, 0, 0x174f95d97aff7bf9, 0, + 0x063ead2a7756c9da, 0, 0x0092485623082173, 0, 0x02883a7e75391040); +}; + +void TEST_CASE3(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x92, 0xce, 0xdd, 0x64, 0x60, 0x29, 0xa6, 0xd5, 0x07, 0x8c, 0x71, + 0x94, 0x95, 0xf6, 0xd4, 0xbd); + int64_t scalar = 5; + asm volatile("vmulh.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_I8(9, v1, 0xfd, 0xff, 0xff, 0x01, 0x01, 0x00, 0xfe, 0xff, 0x00, 0xfd, + 0x02, 0xfd, 0xfd, 0xff, 0xff, 0xfe); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x3b9b, 0x7758, 0x030f, 0x9f60, 0x13e2, 0x8f0d, 0xfc9d, 0x3922, + 0x3a43, 0x58b5, 0xb9e9, 0xa4e8, 0x4bac, 0x5636, 0x9f4a, 0xbd52); + scalar = -5383; + asm volatile("vmulh.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VCMP_I16(10, v2, 0xfb1a, 0xf632, 0xffbf, 0x07ef, 0xfe5d, 0x0947, 0x0047, + 0xfb4e, 0xfb36, 0xf8b6, 0x05c1, 0x077b, 0xf9c8, 0xf8eb, 0x07f1, + 0x057a); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x7efca225, 0xdbc0a9ca, 0x0cf02cf8, 0xc19bdc84, 0x7fa3ca90, + 0x3d878c29, 0x15809928, 0x7b0b7421, 0x48b872f5, 0xafbfeab4, + 0xe79dc9ba, 0xe60a8fc0, 0x1fd7e866, 0xed7df17c, 0x0684a7ee, + 0xb2b01a61); + scalar = 6474219; + asm volatile("vmulh.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VCMP_I32(11, v4, 0x003100de, 0xfff20329, 0x0004fe25, 0xffe7ec74, 0x00314160, + 0x0017be6b, 0x00084c30, 0x002f7b6f, 0x001c0ff7, 0xffe1082b, + 0xfff6972b, 0xfff5fb91, 0x000c49c4, 0xfff8db9d, 0x000283ec, + 0xffe22a6f); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x42e9b386e7453715, 0xd6aae3fda4b2f3e8, 0xfcbec1ad7996a0b2, + 0x4fcb68f516b589c9, 0x414b0eeb29c35e62, 0x038c6221829f4241, + 0xf2c2c11f26e326b0, 0xed9ad0ce4d50a009, 0xc57105a57d435897, + 0x90c1615935c1abd0, 0xf5b41f9a0a988065, 0xb09790bdcbecee7b, + 0x1d9da4f87df33b54, 0xe347aadb53bdc879, 0x7a39a7269cbae2a7, + 0x422ed2952e246f26); + scalar = -598189234597999223; + asm volatile("vmulh.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I64(12, v8, 0xfdd4850b300f6008, 0x01571f899f226d57, 0x001b0534decdc9a2, + 0xfd6994f8de6e51aa, 0xfde1f73873e6758a, 0xffe28b043b9b8971, + 0x006de7f819baba3d, 0x0098b57f65f599e1, 0x01e62040839e971b, + 0x039b807e6f36fd81, 0x005579d7e0c206af, 0x0293356120e5cee9, + 0xff0a24a69d9af87b, 0x00ee6bb505683322, 0xfc095797a6198143, + 0xfdda946cdb2f169c); +}; + +void TEST_CASE4(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x92, 0xce, 0xdd, 0x64, 0x60, 0x29, 0xa6, 0xd5, 0x07, 0x8c, 0x71, + 0x94, 0x95, 0xf6, 0xd4, 0xbd); + int64_t scalar = 5; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v1); + asm volatile("vmulh.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I8(13, v1, 0, 0xff, 0, 0x01, 0, 0x00, 0, 0xff, 0, 0xfd, 0, 0xfd, 0, 0xff, + 0, 0xfe); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x3b9b, 0x7758, 0x030f, 0x9f60, 0x13e2, 0x8f0d, 0xfc9d, 0x3922, + 0x3a43, 0x58b5, 0xb9e9, 0xa4e8, 0x4bac, 0x5636, 0x9f4a, 0xbd52); + scalar = -5383; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmulh.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I16(14, v2, 0, 0xf632, 0, 0x07ef, 0, 0x0947, 0, 0xfb4e, 0, 0xf8b6, 0, + 0x077b, 0, 0xf8eb, 0, 0x057a); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x7efca225, 0xdbc0a9ca, 0x0cf02cf8, 0xc19bdc84, 0x7fa3ca90, + 0x3d878c29, 0x15809928, 0x7b0b7421, 0x48b872f5, 0xafbfeab4, + 0xe79dc9ba, 0xe60a8fc0, 0x1fd7e866, 0xed7df17c, 0x0684a7ee, + 0xb2b01a61); + scalar = 6474219; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmulh.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I32(15, v4, 0, 0xfff20329, 0, 0xffe7ec74, 0, 0x0017be6b, 0, 0x002f7b6f, + 0, 0xffe1082b, 0, 0xfff5fb91, 0, 0xfff8db9d, 0, 0xffe22a6f); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x42e9b386e7453715, 0xd6aae3fda4b2f3e8, 0xfcbec1ad7996a0b2, + 0x4fcb68f516b589c9, 0x414b0eeb29c35e62, 0x038c6221829f4241, + 0xf2c2c11f26e326b0, 0xed9ad0ce4d50a009, 0xc57105a57d435897, + 0x90c1615935c1abd0, 0xf5b41f9a0a988065, 0xb09790bdcbecee7b, + 0x1d9da4f87df33b54, 0xe347aadb53bdc879, 0x7a39a7269cbae2a7, + 0x422ed2952e246f26); + scalar = -598189234597999223; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmulh.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I64(16, v8, 0, 0x01571f899f226d57, 0, 0xfd6994f8de6e51aa, 0, + 0xffe28b043b9b8971, 0, 0x0098b57f65f599e1, 0, 0x039b807e6f36fd81, 0, + 0x0293356120e5cee9, 0, 0x00ee6bb505683322, 0, 0xfdda946cdb2f169c); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmulhsu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmulhsu.c new file mode 100644 index 000000000..1bc3cc448 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmulhsu.c @@ -0,0 +1,232 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x53, 0x4f, 0xde, 0xea, 0x47, 0x3c, 0x41, 0xf8, 0xd6, 0xd0, 0x93, + 0x35, 0xfc, 0x70, 0x33, 0xe4); + VLOAD_8(v3, 0xaa, 0x24, 0xaa, 0xde, 0x92, 0x00, 0x7f, 0xe5, 0xb3, 0xf8, 0xa0, + 0xa8, 0xbb, 0xc6, 0x65, 0x81); + asm volatile("vmulhsu.vv v1, v2, v3"); + VCMP_I8(1, v1, 0x37, 0x0b, 0xe9, 0xec, 0x28, 0x00, 0x20, 0xf8, 0xe2, 0xd1, + 0xbb, 0x22, 0xfd, 0x56, 0x14, 0xf1); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x0f50, 0x47f9, 0xa4ca, 0xf94d, 0x720c, 0x444c, 0x3681, 0x96bd, + 0x5d37, 0xd64e, 0xe792, 0xdb64, 0xfaa6, 0xafe6, 0xf4e8, 0xe5ea); + VLOAD_16(v6, 0x7784, 0xa42e, 0x499b, 0x0c01, 0x9d2b, 0x600d, 0x2bbd, 0xcb41, + 0xdda1, 0xb5d7, 0xafbc, 0xc74f, 0xab45, 0x986f, 0xf0f2, 0xcf3c); + asm volatile("vmulhsu.vv v2, v4, v6"); + VCMP_I16(2, v2, 0x0726, 0x2e28, 0xe5c6, 0xffaf, 0x4604, 0x199f, 0x094f, + 0xac6d, 0x50b3, 0xe262, 0xef3a, 0xe37f, 0xfc6b, 0xd04d, 0xf58f, + 0xeae2); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xeded4bf3, 0xc9e27167, 0x4175509c, 0x80a3ae04, 0x9f1b2c07, + 0x87ea397b, 0x862e2800, 0x3cd09f37, 0x9a313d78, 0x596661ee, + 0x31f99717, 0x64e65802, 0xbd567027, 0xf7c459be, 0x57b6d9cd, + 0x94bc3eb4); + VLOAD_32(v12, 0xa147b233, 0x19880f3d, 0x8dd8815e, 0xbc318dca, 0x2c436b94, + 0x29ba4191, 0x637f89b7, 0xe39d7818, 0xf48ff2d6, 0xb1dc7c7e, + 0xfa5da298, 0x5c1aae36, 0x83e04069, 0xecf36c08, 0x40d2e3a3, + 0xe7468a97); + asm volatile("vmulhsu.vv v4, v8, v12"); + VCMP_I32(3, v4, 0xf49d2cff, 0xfa9a5a26, 0x2444f976, 0xa25f8c94, 0xef3f26f6, + 0xec6d24a0, 0xd0a728d5, 0x361265a6, 0x9ebdaf85, 0x3e1cc92b, + 0x30e004f5, 0x244d4baf, 0xdda8d640, 0xf8612ea2, 0x1635f870, + 0x9f184dfb); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x2b1f761d24dcff24, 0x1174fcea60fbf146, 0xaa5068c22e71489d, + 0x422ad458a7cbf321, 0x01e0f752e5d8bb37, 0xe7762162abff4c4c, + 0x36279dbbf009199d, 0x188dda33d835d9e4, 0xa0f5a67450e87d77, + 0xb43066649033e7ac, 0xb47ff6241cc77c2c, 0xfce0bafc1d36b615, + 0x045b90f3b63e0f7f, 0x514e5121be1f02e5, 0x06c9e97573723e47, + 0x406879d908a80b41); + VLOAD_64(v24, 0xd50adee8d491db29, 0xc7d423514dd58616, 0x5b22f7a3971e17f9, + 0xb9ad8b0339e659cd, 0x5af15755f3954b0f, 0x6b2fb3e49bd48e69, + 0x084244757fba5561, 0xf2d5b41ee89411fa, 0x8585111aaee16c07, + 0xcd1a427644b0ad59, 0x2356738fd6b04f3a, 0x89d936a76f0a518a, + 0x5f2df66443ff24b3, 0x6cbfcf273c43ae6b, 0xabb59d9f05a03eef, + 0xb84832df19fc19b6); + asm volatile("vmulhsu.vv v8, v16, v24"); + VCMP_I64(4, v8, 0x23e3020d5d8e40d8, 0x0da067e42d62fa2a, 0xe17ee107c3fdd97f, + 0x2ffdce53a7ef7aa4, 0x00aadc600f6180bd, 0xf5b9cd660e9f294b, + 0x01bf419feafa3fe5, 0x174a979243e0945b, 0xce6e38c0508aba17, + 0xc342fb3a620dde75, 0xf593ff8eafcca075, 0xfe519de4c807844e, + 0x019ec3149daf2fc0, 0x2289f5738e0e6d23, 0x048dafe18fe3288b, + 0x2e5d41c2cc9b604f); +}; + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x53, 0x4f, 0xde, 0xea, 0x47, 0x3c, 0x41, 0xf8, 0xd6, 0xd0, 0x93, + 0x35, 0xfc, 0x70, 0x33, 0xe4); + VLOAD_8(v3, 0xaa, 0x24, 0xaa, 0xde, 0x92, 0x00, 0x7f, 0xe5, 0xb3, 0xf8, 0xa0, + 0xa8, 0xbb, 0xc6, 0x65, 0x81); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v1); + asm volatile("vmulhsu.vv v1, v2, v3, v0.t"); + VCMP_I8(5, v1, 0, 0x0b, 0, 0xec, 0, 0x00, 0, 0xf8, 0, 0xd1, 0, 0x22, 0, 0x56, + 0, 0xf1); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x0f50, 0x47f9, 0xa4ca, 0xf94d, 0x720c, 0x444c, 0x3681, 0x96bd, + 0x5d37, 0xd64e, 0xe792, 0xdb64, 0xfaa6, 0xafe6, 0xf4e8, 0xe5ea); + VLOAD_16(v6, 0x7784, 0xa42e, 0x499b, 0x0c01, 0x9d2b, 0x600d, 0x2bbd, 0xcb41, + 0xdda1, 0xb5d7, 0xafbc, 0xc74f, 0xab45, 0x986f, 0xf0f2, 0xcf3c); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmulhsu.vv v2, v4, v6, v0.t"); + VCMP_I16(6, v2, 0, 0x2e28, 0, 0xffaf, 0, 0x199f, 0, 0xac6d, 0, 0xe262, 0, + 0xe37f, 0, 0xd04d, 0, 0xeae2); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xeded4bf3, 0xc9e27167, 0x4175509c, 0x80a3ae04, 0x9f1b2c07, + 0x87ea397b, 0x862e2800, 0x3cd09f37, 0x9a313d78, 0x596661ee, + 0x31f99717, 0x64e65802, 0xbd567027, 0xf7c459be, 0x57b6d9cd, + 0x94bc3eb4); + VLOAD_32(v12, 0xa147b233, 0x19880f3d, 0x8dd8815e, 0xbc318dca, 0x2c436b94, + 0x29ba4191, 0x637f89b7, 0xe39d7818, 0xf48ff2d6, 0xb1dc7c7e, + 0xfa5da298, 0x5c1aae36, 0x83e04069, 0xecf36c08, 0x40d2e3a3, + 0xe7468a97); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmulhsu.vv v4, v8, v12, v0.t"); + VCMP_I32(7, v4, 0, 0xfa9a5a26, 0, 0xa25f8c94, 0, 0xec6d24a0, 0, 0x361265a6, 0, + 0x3e1cc92b, 0, 0x244d4baf, 0, 0xf8612ea2, 0, 0x9f184dfb); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x2b1f761d24dcff24, 0x1174fcea60fbf146, 0xaa5068c22e71489d, + 0x422ad458a7cbf321, 0x01e0f752e5d8bb37, 0xe7762162abff4c4c, + 0x36279dbbf009199d, 0x188dda33d835d9e4, 0xa0f5a67450e87d77, + 0xb43066649033e7ac, 0xb47ff6241cc77c2c, 0xfce0bafc1d36b615, + 0x045b90f3b63e0f7f, 0x514e5121be1f02e5, 0x06c9e97573723e47, + 0x406879d908a80b41); + VLOAD_64(v24, 0xd50adee8d491db29, 0xc7d423514dd58616, 0x5b22f7a3971e17f9, + 0xb9ad8b0339e659cd, 0x5af15755f3954b0f, 0x6b2fb3e49bd48e69, + 0x084244757fba5561, 0xf2d5b41ee89411fa, 0x8585111aaee16c07, + 0xcd1a427644b0ad59, 0x2356738fd6b04f3a, 0x89d936a76f0a518a, + 0x5f2df66443ff24b3, 0x6cbfcf273c43ae6b, 0xabb59d9f05a03eef, + 0xb84832df19fc19b6); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmulhsu.vv v8, v16, v24, v0.t"); + VCMP_I64(8, v8, 0, 0x0da067e42d62fa2a, 0, 0x2ffdce53a7ef7aa4, 0, + 0xf5b9cd660e9f294b, 0, 0x174a979243e0945b, 0, 0xc342fb3a620dde75, 0, + 0xfe519de4c807844e, 0, 0x2289f5738e0e6d23, 0, 0x2e5d41c2cc9b604f); +}; + +void TEST_CASE3(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x21, 0x87, 0xa0, 0xa8, 0x6a, 0x6f, 0x6a, 0x6b, 0x74, 0x99, 0x37, + 0xa4, 0xdc, 0x4f, 0xc3, 0x55); + uint64_t scalar = 5; + asm volatile("vmulhsu.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_I8(9, v1, 0x00, 0xfd, 0xfe, 0xfe, 0x02, 0x02, 0x02, 0x02, 0x02, 0xfd, + 0x01, 0xfe, 0xff, 0x01, 0xfe, 0x01); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x62b8, 0xc1e3, 0xb151, 0x08ce, 0x06c4, 0x1d2f, 0x7448, 0xfcd5, + 0x398c, 0xb933, 0x436d, 0x748f, 0x58d9, 0x1cd6, 0x86db, 0x20f2); + scalar = 816; + asm volatile("vmulhsu.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VCMP_I16(10, v2, 0x013a, 0xff3a, 0xff05, 0x001c, 0x0015, 0x005d, 0x0172, + 0xfff5, 0x00b7, 0xff1e, 0x00d6, 0x0173, 0x011b, 0x005b, 0xfe7d, + 0x0069); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xa7cac3f7, 0xb3894e05, 0xbac8e70b, 0x05479577, 0x19d8bf63, + 0xb952c1ad, 0x9eaa74c0, 0x9e38d5c8, 0x51c77b3b, 0xa5f44521, + 0x65042faa, 0x8e7e5345, 0x76ae481c, 0x0ab27b6f, 0xa388cf2b, + 0x58218f7f); + scalar = 7389998; + asm volatile("vmulhsu.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VCMP_I32(11, v4, 0xffd92575, 0xffde51c5, 0xffe1831f, 0x00025357, 0x000b6288, + 0xffe0de52, 0xffd5205d, 0xffd4ee51, 0x0024059f, 0xffd85637, + 0x002c7ed9, 0xffce00ba, 0x003446bb, 0x0004b63d, 0xffd7455b, + 0x0026d1e0); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x43c5f863d4be9b51, 0x70b017b4c5d0d11e, 0x9e008a07f48796fe, + 0x6f0fa9a63860308a, 0x07b5d372a7be167c, 0x234be9472899553e, + 0x25655d82cb668037, 0x959d6233470780ee, 0xf3d683308326232a, + 0x7b6dddfcd86d6737, 0x02b8177716c29a3e, 0x11220f42ce0594b4, + 0x8382e0c79caa1e6c, 0x0d1593d36c1dc00e, 0x9f8eb889cc8e98c6, + 0x37411f40369680d2); + scalar = 321156886679781445; + asm volatile("vmulhsu.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I64(12, v8, 0x012e0fe6705cf26d, 0x01f63e6c65840868, 0xfe4b3a837bcf749f, + 0x01eefe6ad67c584e, 0x00225d3cec11ae29, 0x009d50942207fb0e, + 0x00a6abfb9cc735df, 0xfe25d8c13270b026, 0xffc9cb59c445c91a, + 0x02261e05ece3e474, 0x000c1e7198242fc2, 0x004c5c6f8fd9129e, + 0xfdd529022c30504c, 0x003a50e3baabab1e, 0xfe522930314d6d7d, + 0x00f6440fa9591c62); +}; + +void TEST_CASE4(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x21, 0x87, 0xa0, 0xa8, 0x6a, 0x6f, 0x6a, 0x6b, 0x74, 0x99, 0x37, + 0xa4, 0xdc, 0x4f, 0xc3, 0x55); + uint64_t scalar = 5; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v1); + asm volatile("vmulhsu.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I8(13, v1, 0, 0xfd, 0, 0xfe, 0, 0x02, 0, 0x02, 0, 0xfd, 0, 0xfe, 0, 0x01, + 0, 0x01); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x62b8, 0xc1e3, 0xb151, 0x08ce, 0x06c4, 0x1d2f, 0x7448, 0xfcd5, + 0x398c, 0xb933, 0x436d, 0x748f, 0x58d9, 0x1cd6, 0x86db, 0x20f2); + scalar = 816; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmulhsu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I16(14, v2, 0, 0xff3a, 0, 0x001c, 0, 0x005d, 0, 0xfff5, 0, 0xff1e, 0, + 0x0173, 0, 0x005b, 0, 0x0069); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xa7cac3f7, 0xb3894e05, 0xbac8e70b, 0x05479577, 0x19d8bf63, + 0xb952c1ad, 0x9eaa74c0, 0x9e38d5c8, 0x51c77b3b, 0xa5f44521, + 0x65042faa, 0x8e7e5345, 0x76ae481c, 0x0ab27b6f, 0xa388cf2b, + 0x58218f7f); + scalar = 7389998; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmulhsu.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I32(15, v4, 0, 0xffde51c5, 0, 0x00025357, 0, 0xffe0de52, 0, 0xffd4ee51, + 0, 0xffd85637, 0, 0xffce00ba, 0, 0x0004b63d, 0, 0x0026d1e0); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x43c5f863d4be9b51, 0x70b017b4c5d0d11e, 0x9e008a07f48796fe, + 0x6f0fa9a63860308a, 0x07b5d372a7be167c, 0x234be9472899553e, + 0x25655d82cb668037, 0x959d6233470780ee, 0xf3d683308326232a, + 0x7b6dddfcd86d6737, 0x02b8177716c29a3e, 0x11220f42ce0594b4, + 0x8382e0c79caa1e6c, 0x0d1593d36c1dc00e, 0x9f8eb889cc8e98c6, + 0x37411f40369680d2); + scalar = 321156886679781445; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmulhsu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I64(16, v8, 0, 0x01f63e6c65840868, 0, 0x01eefe6ad67c584e, 0, + 0x009d50942207fb0e, 0, 0xfe25d8c13270b026, 0, 0x02261e05ece3e474, 0, + 0x004c5c6f8fd9129e, 0, 0x003a50e3baabab1e, 0, 0x00f6440fa9591c62); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmulhu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmulhu.c new file mode 100644 index 000000000..bfcc137ea --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmulhu.c @@ -0,0 +1,232 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x3b, 0xb2, 0xd1, 0x3e, 0x39, 0x2c, 0x08, 0xc5, 0xbf, 0x54, 0x6c, + 0xde, 0x87, 0xcb, 0x7a, 0x83); + VLOAD_8(v3, 0x55, 0xde, 0xf4, 0x14, 0x3c, 0xed, 0x47, 0x1b, 0xca, 0x0b, 0xc4, + 0xe3, 0xd8, 0x8f, 0xa0, 0x0d); + asm volatile("vmulhu.vv v1, v2, v3"); + VCMP_U8(1, v1, 0x13, 0x9a, 0xc7, 0x04, 0x0d, 0x28, 0x02, 0x14, 0x96, 0x03, + 0x52, 0xc4, 0x71, 0x71, 0x4c, 0x06); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xe6e1, 0x02a1, 0x2911, 0xe3c3, 0xe141, 0x69e6, 0x4133, 0xf783, + 0x91ef, 0x1897, 0xf0bb, 0x0e07, 0xb8eb, 0x3f5a, 0x9f5d, 0xa626); + VLOAD_16(v6, 0x4fcb, 0x8a38, 0xbaa0, 0x8a97, 0xe409, 0x558e, 0x582b, 0x62b1, + 0xf7bb, 0x181f, 0x2b5a, 0xdf85, 0x44f3, 0x27fe, 0x8412, 0xcda0); + asm volatile("vmulhu.vv v2, v4, v6"); + VCMP_U16(2, v2, 0x47f6, 0x016b, 0x1df0, 0x7b4d, 0xc8a5, 0x2364, 0x1674, + 0x5f6b, 0x8d38, 0x0251, 0x28c4, 0x0c3f, 0x31cd, 0x09e5, 0x5237, + 0x8574); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xf129e694, 0x8dfc72a3, 0xc9911598, 0xd20083ec, 0xe7f36604, + 0x1ab510aa, 0xc290b86c, 0xa7e9a02e, 0x5c3f3bb3, 0x70a3dfae, + 0x16baad22, 0x21758cfb, 0x09033e60, 0x8b31075e, 0x6439b7bf, + 0xead33cf0); + VLOAD_32(v12, 0x3f2ef56d, 0x12649032, 0x6c0a880b, 0x7cb2477a, 0x41525037, + 0x02a39cfa, 0xf7595181, 0x0c230035, 0x86cf9ea9, 0x0f66ddd3, + 0x13351370, 0xbe489ce5, 0x4127f488, 0xe6b5e1b3, 0xc6918270, + 0xccc8626a); + asm volatile("vmulhu.vv v4, v8, v12"); + VCMP_U32(3, v4, 0x3b858c79, 0x0a3386a5, 0x55117fe4, 0x664a7ee4, 0x3b2f618b, + 0x00467bcb, 0xbbfd8432, 0x07f5e895, 0x3093e98c, 0x06c6dd00, + 0x01b49139, 0x18debc33, 0x024b3af0, 0x7d70f100, 0x4dbd9bdf, + 0xbbd823dc); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x4aee1e4f9775ff4b, 0x045a804c3d3e7dc0, 0x1a2f38060efcd306, + 0x34e09e5173ee6301, 0xd1f03c2e38769683, 0x1b1f454816d4ed10, + 0xed4a4f231da4abb3, 0xc87b025e6da277dc, 0x8da43ddf6feb6aae, + 0x7dcf10ced634db74, 0x736fd9583bc2de91, 0xa66de0036d350cbc, + 0x40bf5ec7afca9ec2, 0x5bb552a7b134ba79, 0x6ae5d02d7c121603, + 0x8a7621ad8d6f104a); + VLOAD_64(v24, 0x8f2c0088bea2739e, 0x4ed8c54dad60d3cb, 0x51e715e5cf56b2e6, + 0xa1b1262536ea3c57, 0x67f334468e5cde4f, 0x8ae5618164bd63fd, + 0x2f8be93c1d7807c3, 0x0444a9f4ccff2a2c, 0x6cac5e35bf847d59, + 0x1d92c5117b87a392, 0x124597d21d757a4e, 0x4ec5a9fb5b8a6591, + 0xb5b4189dd6080734, 0xf75ddacea0effff6, 0x5c3cb19fbc1c7580, + 0xff93a562f06d3641); + asm volatile("vmulhu.vv v8, v16, v24"); + VCMP_U64(4, v8, 0x29e7e403b1955330, 0x015742ce71e2c757, 0x08609392d9402e03, + 0x2165dabfb788d03d, 0x553f1a1e61409141, 0x0eb728a66479b5fb, + 0x2c125410c5448322, 0x0357b1cf05241ad9, 0x3c20a893e10635bb, + 0x0e8895d7f39e953c, 0x083d3ee38137c9b0, 0x3335fb506009220b, + 0x2df4dacbb013b6b0, 0x589d920140d7dd8c, 0x2683eed8bb77fa43, + 0x8a3b86d4dd8169cf); +}; + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x3b, 0xb2, 0xd1, 0x3e, 0x39, 0x2c, 0x08, 0xc5, 0xbf, 0x54, 0x6c, + 0xde, 0x87, 0xcb, 0x7a, 0x83); + VLOAD_8(v3, 0x55, 0xde, 0xf4, 0x14, 0x3c, 0xed, 0x47, 0x1b, 0xca, 0x0b, 0xc4, + 0xe3, 0xd8, 0x8f, 0xa0, 0x0d); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v1); + asm volatile("vmulhu.vv v1, v2, v3, v0.t"); + VCMP_U8(5, v1, 0, 0x9a, 0, 0x04, 0, 0x28, 0, 0x14, 0, 0x03, 0, 0xc4, 0, 0x71, + 0, 0x06); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xe6e1, 0x02a1, 0x2911, 0xe3c3, 0xe141, 0x69e6, 0x4133, 0xf783, + 0x91ef, 0x1897, 0xf0bb, 0x0e07, 0xb8eb, 0x3f5a, 0x9f5d, 0xa626); + VLOAD_16(v6, 0x4fcb, 0x8a38, 0xbaa0, 0x8a97, 0xe409, 0x558e, 0x582b, 0x62b1, + 0xf7bb, 0x181f, 0x2b5a, 0xdf85, 0x44f3, 0x27fe, 0x8412, 0xcda0); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmulhu.vv v2, v4, v6, v0.t"); + VCMP_U16(6, v2, 0, 0x016b, 0, 0x7b4d, 0, 0x2364, 0, 0x5f6b, 0, 0x0251, 0, + 0x0c3f, 0, 0x09e5, 0, 0x8574); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xf129e694, 0x8dfc72a3, 0xc9911598, 0xd20083ec, 0xe7f36604, + 0x1ab510aa, 0xc290b86c, 0xa7e9a02e, 0x5c3f3bb3, 0x70a3dfae, + 0x16baad22, 0x21758cfb, 0x09033e60, 0x8b31075e, 0x6439b7bf, + 0xead33cf0); + VLOAD_32(v12, 0x3f2ef56d, 0x12649032, 0x6c0a880b, 0x7cb2477a, 0x41525037, + 0x02a39cfa, 0xf7595181, 0x0c230035, 0x86cf9ea9, 0x0f66ddd3, + 0x13351370, 0xbe489ce5, 0x4127f488, 0xe6b5e1b3, 0xc6918270, + 0xccc8626a); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmulhu.vv v4, v8, v12, v0.t"); + VCMP_U32(7, v4, 0, 0x0a3386a5, 0, 0x664a7ee4, 0, 0x00467bcb, 0, 0x07f5e895, 0, + 0x06c6dd00, 0, 0x18debc33, 0, 0x7d70f100, 0, 0xbbd823dc); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x4aee1e4f9775ff4b, 0x045a804c3d3e7dc0, 0x1a2f38060efcd306, + 0x34e09e5173ee6301, 0xd1f03c2e38769683, 0x1b1f454816d4ed10, + 0xed4a4f231da4abb3, 0xc87b025e6da277dc, 0x8da43ddf6feb6aae, + 0x7dcf10ced634db74, 0x736fd9583bc2de91, 0xa66de0036d350cbc, + 0x40bf5ec7afca9ec2, 0x5bb552a7b134ba79, 0x6ae5d02d7c121603, + 0x8a7621ad8d6f104a); + VLOAD_64(v24, 0x8f2c0088bea2739e, 0x4ed8c54dad60d3cb, 0x51e715e5cf56b2e6, + 0xa1b1262536ea3c57, 0x67f334468e5cde4f, 0x8ae5618164bd63fd, + 0x2f8be93c1d7807c3, 0x0444a9f4ccff2a2c, 0x6cac5e35bf847d59, + 0x1d92c5117b87a392, 0x124597d21d757a4e, 0x4ec5a9fb5b8a6591, + 0xb5b4189dd6080734, 0xf75ddacea0effff6, 0x5c3cb19fbc1c7580, + 0xff93a562f06d3641); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmulhu.vv v8, v16, v24, v0.t"); + VCMP_U64(8, v8, 0, 0x015742ce71e2c757, 0, 0x2165dabfb788d03d, 0, + 0x0eb728a66479b5fb, 0, 0x0357b1cf05241ad9, 0, 0x0e8895d7f39e953c, 0, + 0x3335fb506009220b, 0, 0x589d920140d7dd8c, 0, 0x8a3b86d4dd8169cf); +}; + +void TEST_CASE3(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x5c, 0x3c, 0x86, 0x65, 0x41, 0x38, 0x20, 0x9e, 0x88, 0x28, 0x19, + 0xc2, 0x5f, 0xa3, 0x7c, 0xca); + uint64_t scalar = 5; + asm volatile("vmulhu.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U8(9, v1, 0x01, 0x01, 0x02, 0x01, 0x01, 0x01, 0x00, 0x03, 0x02, 0x00, + 0x00, 0x03, 0x01, 0x03, 0x02, 0x03); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x4e7f, 0xfe41, 0x1346, 0x6c1a, 0x38ce, 0x5fa7, 0x5e39, 0xf7a2, + 0x61aa, 0x0a3a, 0xfe0a, 0x30f1, 0x5852, 0xbb6b, 0x42f7, 0x58d9); + scalar = 816; + asm volatile("vmulhu.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VCMP_U16(10, v2, 0x00fa, 0x032a, 0x003d, 0x0158, 0x00b5, 0x0130, 0x012c, + 0x0315, 0x0137, 0x0020, 0x0329, 0x009c, 0x0119, 0x0255, 0x00d5, + 0x011b); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x15c6221c, 0x0d704417, 0x3d90ffd1, 0x4e168273, 0xc3bd5e20, + 0xd75f62df, 0x3002ed42, 0x74269b1d, 0xc77bc0dd, 0x36f2552d, + 0x71b5888c, 0x02eb291b, 0x790cb3b1, 0xa3cf03c4, 0x8f90730a, + 0xf41b555a); + scalar = 7389998; + asm volatile("vmulhu.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U32(11, v4, 0x00099748, 0x0005eb5c, 0x001b1e60, 0x00226562, 0x00563815, + 0x005eddef, 0x001525e2, 0x00332972, 0x0057de3b, 0x001833e9, + 0x0032161d, 0x0001491b, 0x003551d9, 0x00482775, 0x003f3ca7, + 0x006b8612); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x96304201a90be11f, 0x11654d4226322e4b, 0xe16e5cf2c1183b63, + 0x447b5f4710764817, 0xb62589a3d309672c, 0x5ddec2e6716fd0d3, + 0xf31034a096a6d0fa, 0x9cb4dca46ce577f7, 0x30cf2e2dc6773d82, + 0x6129247d49c42f4b, 0x3d9ee22336a4e216, 0x3c9b9d533797be90, + 0x0c0c54042a20ddc8, 0xf309bda968a3a583, 0x550697570a1e9645, + 0x5beaf5933973231f); + scalar = 321156886679781445; + asm volatile("vmulhu.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U64(12, v8, 0x029d61da2f470da8, 0x004d882170361dd2, 0x03ecbc09716942cd, + 0x013138661b0ea1a1, 0x032bd162449d3f20, 0x01a25fd52874e6a2, + 0x043b51fe85cf352c, 0x02ba6ebb77802a7c, 0x00d98a5bba81dc57, + 0x01b10a47f99f8c44, 0x0112a3d22e03b6e9, 0x010e20461059ad6b, + 0x0035b2b00a44dfe2, 0x043b352e6dc32a00, 0x017af48bc4f5ad70, + 0x0199ac3dc8053978); +}; + +void TEST_CASE4(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x5c, 0x3c, 0x86, 0x65, 0x41, 0x38, 0x20, 0x9e, 0x88, 0x28, 0x19, + 0xc2, 0x5f, 0xa3, 0x7c, 0xca); + uint64_t scalar = 5; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v1); + asm volatile("vmulhu.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(13, v1, 0, 0x01, 0, 0x01, 0, 0x01, 0, 0x03, 0, 0x00, 0, 0x03, 0, 0x03, + 0, 0x03); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x4e7f, 0xfe41, 0x1346, 0x6c1a, 0x38ce, 0x5fa7, 0x5e39, 0xf7a2, + 0x61aa, 0x0a3a, 0xfe0a, 0x30f1, 0x5852, 0xbb6b, 0x42f7, 0x58d9); + scalar = 816; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vmulhu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(14, v2, 0, 0x032a, 0, 0x0158, 0, 0x0130, 0, 0x0315, 0, 0x0020, 0, + 0x009c, 0, 0x0255, 0, 0x011b); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x15c6221c, 0x0d704417, 0x3d90ffd1, 0x4e168273, 0xc3bd5e20, + 0xd75f62df, 0x3002ed42, 0x74269b1d, 0xc77bc0dd, 0x36f2552d, + 0x71b5888c, 0x02eb291b, 0x790cb3b1, 0xa3cf03c4, 0x8f90730a, + 0xf41b555a); + scalar = 7389998; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vmulhu.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(15, v4, 0, 0x0005eb5c, 0, 0x00226562, 0, 0x005eddef, 0, 0x00332972, + 0, 0x001833e9, 0, 0x0001491b, 0, 0x00482775, 0, 0x006b8612); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x96304201a90be11f, 0x11654d4226322e4b, 0xe16e5cf2c1183b63, + 0x447b5f4710764817, 0xb62589a3d309672c, 0x5ddec2e6716fd0d3, + 0xf31034a096a6d0fa, 0x9cb4dca46ce577f7, 0x30cf2e2dc6773d82, + 0x6129247d49c42f4b, 0x3d9ee22336a4e216, 0x3c9b9d533797be90, + 0x0c0c54042a20ddc8, 0xf309bda968a3a583, 0x550697570a1e9645, + 0x5beaf5933973231f); + scalar = 321156886679781445; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vmulhu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(16, v8, 0, 0x004d882170361dd2, 0, 0x013138661b0ea1a1, 0, + 0x01a25fd52874e6a2, 0, 0x02ba6ebb77802a7c, 0, 0x01b10a47f99f8c44, 0, + 0x010e20461059ad6b, 0, 0x043b352e6dc32a00, 0, 0x0199ac3dc8053978); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmv.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmv.c new file mode 100644 index 000000000..94d99133a --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmv.c @@ -0,0 +1,104 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.v.v v3, v1"); + VCMP_U8(1, v3, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + asm volatile("vmv.v.v v6, v2"); + VCMP_U16(2, v6, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + asm volatile("vmv.v.v v12, v4"); + VCMP_U32(3, v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + asm volatile("vmv.v.v v24, v8"); + VCMP_U64(4, v24, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); +} + +void TEST_CASE2() { + const uint64_t scalar = 0x00000000deadbeef; + + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.v.x v3, %[A]" ::[A] "r"(scalar)); + VCMP_U8(5, v3, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + 0xef, 0xef, 0xef, 0xef, 0xef, 0xef); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.v.x v4, %[A]" ::[A] "r"(scalar)); + VCMP_U16(6, v4, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + 0xbeef); + + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.v.x v8, %[A]" ::[A] "r"(scalar)); + VCMP_U32(7, v8, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.v.x v16, %[A]" ::[A] "r"(scalar)); + VCMP_U64(8, v16, 0x00000000deadbeef, 0x00000000deadbeef, 0x00000000deadbeef, + 0x00000000deadbeef, 0x00000000deadbeef, 0x00000000deadbeef, + 0x00000000deadbeef, 0x00000000deadbeef, 0x00000000deadbeef, + 0x00000000deadbeef, 0x00000000deadbeef, 0x00000000deadbeef, + 0x00000000deadbeef, 0x00000000deadbeef, 0x00000000deadbeef, + 0x00000000deadbeef); +} + +void TEST_CASE3() { + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.v.i v3, -9"); + VCMP_U8(9, v3, -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, + -9); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.v.i v4, -10"); + VCMP_U16(10, v4, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, + -10, -10, -10, -10); + + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.v.i v8, -11"); + VCMP_U32(11, v8, -11, -11, -11, -11, -11, -11, -11, -11, -11, -11, -11, -11, + -11, -11, -11, -11); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.v.i v16, -12"); + VCMP_U64(12, v16, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, -12, + -12, -12, -12, -12); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmvnrr.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmvnrr.c new file mode 100644 index 000000000..634172c9e --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmvnrr.c @@ -0,0 +1,146 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// +// For simplicity, this test depends on vl1r and vs1r + +#include "vector_macros.h" + +uint64_t counter; + +// Maximum size: (VLEN/8 Bytes * (MAX_LMUL == 8)) = VLEN +// Define VLEN before compiling me +// #define VLEN 128 +uint8_t gold_vec_8b[VLEN]; +uint8_t buf_vec_8b[VLEN]; + +/////////// +// vmv1r // +/////////// + +// 1 whole register load +void TEST_CASE1(void) { + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_8b, VLEN / 8); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_8b, VLEN / 8); + // Set vl and vtype to super short values + VSET(1, e64, m2); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl1re8.v v16, (%0)" ::"r"(gold_vec_8b)); + // Move the content to another register + asm volatile("vmv1r.v v1, v16"); + // Check that the whole register was loaded + asm volatile("vs1r.v v1, (%0)" ::"r"(buf_vec_8b)); + VMCMP(uint8_t, % hhu, 0, buf_vec_8b, gold_vec_8b, VLEN / 8); +} + +/////////// +// vmv2r // +/////////// + +// 2 whole registers load +void TEST_CASE2(void) { + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_8b, VLEN / 4); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_8b, VLEN / 4); + // Set vl and vtype to super short values + VSET(1, e64, m4); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl2re8.v v16, (%0)" ::"r"(gold_vec_8b)); + // Move the content to another register + asm volatile("vmv2r.v v2, v16"); + // Check that the whole register was loaded + asm volatile("vs2r.v v2, (%0)" ::"r"(buf_vec_8b)); + VMCMP(uint8_t, % hhu, 1, buf_vec_8b, gold_vec_8b, VLEN / 4); +} + +/////////// +// vmv4r // +/////////// + +// 4 whole registers load +void TEST_CASE3(void) { + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_8b, VLEN / 2); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_8b, VLEN / 2); + // Set vl and vtype to super short values + VSET(1, e64, m8); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl4re8.v v16, (%0)" ::"r"(gold_vec_8b)); + // Move the content to another register + asm volatile("vmv4r.v v4, v16"); + // Check that the whole register was loaded + asm volatile("vs4r.v v4, (%0)" ::"r"(buf_vec_8b)); + VMCMP(uint8_t, % hhu, 2, buf_vec_8b, gold_vec_8b, VLEN / 2); +} + +/////////// +// vmv8r // +/////////// + +// 8 whole registers load +void TEST_CASE4(void) { + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_8b, VLEN); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_8b, VLEN); + // Set vl and vtype to super short values + VSET(1, e64, m8); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + VCLEAR(v24); + // Load a buffer from memory - whole register load + asm volatile("vl8re8.v v16, (%0)" ::"r"(gold_vec_8b)); + // Move the content to another register + asm volatile("vmv8r.v v8, v16"); + // Check that the whole register was loaded + asm volatile("vs8r.v v8, (%0)" ::"r"(buf_vec_8b)); + VMCMP(uint8_t, % hhu, 3, buf_vec_8b, gold_vec_8b, VLEN); +} + +//////////// +// Others // +//////////// + +// Check with initial vl == 0 +void TEST_CASE5(void) { + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_8b, VLEN / 8); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_8b, VLEN / 8); + // Set vl and vtype to super short values + VSET(0, e64, m2); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl1re8.v v16, (%0)" ::"r"(gold_vec_8b)); + // Move the content to another register + asm volatile("vmv1r.v v1, v16"); + // Check that the whole register was loaded + asm volatile("vs1r.v v1, (%0)" ::"r"(buf_vec_8b)); + VMCMP(uint8_t, % hhu, 4, buf_vec_8b, gold_vec_8b, VLEN / 8); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmvsx.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmvsx.c new file mode 100644 index 000000000..11dfda779 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmvsx.c @@ -0,0 +1,75 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "vector_macros.h" + +int8_t scalar_8b; +int16_t scalar_16b; +int32_t scalar_32b; +int64_t scalar_64b; + +void TEST_CASE1() { + scalar_8b = 55 << 0; + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.s.x v1, %0" ::"r"(scalar_8b)); + VCMP_I8(1, v1, scalar_8b); + + scalar_16b = 55 << 8; + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.s.x v2, %0" ::"r"(scalar_16b)); + VCMP_I16(2, v2, scalar_16b); + + scalar_32b = 55 << 16; + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.s.x v4, %0" ::"r"(scalar_32b)); + VCMP_I32(3, v4, scalar_32b); + + scalar_64b = 55 << 32; + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.s.x v8, %0" ::"r"(scalar_64b)); + VCMP_I64(4, v8, scalar_64b); +} + +// Check special cases +void TEST_CASE2() { + scalar_64b = 55 << 32; + VSET(16, e64, m1); + VLOAD_64(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET(16, e64, m8); + asm volatile("vmv.s.x v1, %0" ::"r"(scalar_64b)); + VSET(1, e64, m1); + VCMP_I64(5, v1, scalar_64b); + + scalar_64b = 55 << 32; + VSET(16, e64, m1); + VLOAD_64(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET_ZERO(e64, m1); + asm volatile("vmv.s.x v1, %0" ::"r"(scalar_64b)); + VSET(1, e64, m1); + VCMP_I64(6, v1, 1); + + scalar_64b = 55 << 32; + VSET(16, e64, m1); + VLOAD_64(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET_ZERO(e64, m8); + asm volatile("vmv.s.x v1, %0" ::"r"(scalar_64b)); + VSET(1, e64, m1); + VCMP_I64(7, v1, 1); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmvxs.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmvxs.c new file mode 100644 index 000000000..2561c5b89 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmvxs.c @@ -0,0 +1,72 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "vector_macros.h" + +int8_t scalar_8b; +int16_t scalar_16b; +int32_t scalar_32b; +int64_t scalar_64b; + +void TEST_CASE1() { + scalar_8b = 0; + VSET(16, e8, m1); + VLOAD_8(v1, 55 << 0, 22, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.x.s %0, v1" : "=r"(scalar_8b)); + XCMP(1, scalar_8b, 55 << 0); + + scalar_16b = 0; + VSET(16, e16, m2); + VLOAD_16(v2, 55 << 8, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.x.s %0, v2" : "=r"(scalar_16b)); + XCMP(2, scalar_16b, 55 << 8); + + scalar_32b = 0; + VSET(16, e32, m4); + VLOAD_32(v4, 55 << 16, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.x.s %0, v4" : "=r"(scalar_32b)); + XCMP(3, scalar_32b, 55 << 16); + + scalar_64b = 0; + VSET(16, e64, m8); + VLOAD_64(v8, 55 << 30, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vmv.x.s %0, v8" : "=r"(scalar_64b)); + XCMP(4, scalar_64b, 55 << 30); +} + +// Check special cases +void TEST_CASE2() { + scalar_64b = 0; + VSET(16, e64, m1); + VLOAD_64(v1, 55 << 30, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET(16, e64, m8); + asm volatile("vmv.x.s %0, v1" : "=r"(scalar_64b)); + XCMP(5, scalar_64b, 55 << 30); + + scalar_64b = 0; + VSET(16, e64, m1); + VLOAD_64(v1, 55 << 30, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET_ZERO(e64, m1); + asm volatile("vmv.x.s %0, v1" : "=r"(scalar_64b)); + XCMP(6, scalar_64b, 55 << 30); + + scalar_64b = 0; + VSET(16, e64, m1); + VLOAD_64(v1, 55 << 30, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VSET_ZERO(e64, m8); + asm volatile("vmv.x.s %0, v1" : "=r"(scalar_64b)); + XCMP(7, scalar_64b, 55 << 30); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmxnor.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmxnor.c new file mode 100644 index 000000000..fa0998a8d --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmxnor.c @@ -0,0 +1,68 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + asm volatile("vmxnor.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(1, v1, 0xB6, 0x31); +} + +void TEST_CASE2() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0xFF, 0xFF); + asm volatile("vmxnor.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(2, v1, 0xCD, 0xEF); +} + +void TEST_CASE3() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x00, 0x00); + asm volatile("vmxnor.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(3, v1, 0x32, 0x10); +} + +void TEST_CASE4() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x0F, 0xF0); + asm volatile("vmxnor.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(4, v1, 0x3D, 0xE0); +} + +void TEST_CASE5() { + VSET(16, e8, m1); + VLOAD_8(v1, 0xFF, 0xFF); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + VSET(13, e8, m1); + asm volatile("vmxnor.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(5, v1, 0xB6, 0xF1); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmxor.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmxor.c new file mode 100644 index 000000000..6a561d752 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vmxor.c @@ -0,0 +1,68 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + asm volatile("vmxor.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(1, v1, 0x49, 0xCE); +} + +void TEST_CASE2() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0xFF, 0xFF); + asm volatile("vmxor.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(2, v1, 0x32, 0x10); +} + +void TEST_CASE3() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x00, 0x00); + asm volatile("vmxor.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(3, v1, 0xCD, 0xEF); +} + +void TEST_CASE4() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x0F, 0xF0); + asm volatile("vmxor.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(4, v1, 0xC2, 0x1F); +} + +void TEST_CASE5() { + VSET(16, e8, m1); + VLOAD_8(v1, 0xFF, 0xFF); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + VSET(13, e8, m1); + asm volatile("vmxor.mm v1, v2, v3"); + VSET(2, e8, m1); + VCMP_U8(5, v1, 0x49, 0xEE); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vnclip.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vnclip.c new file mode 100644 index 000000000..f66136ec6 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vnclip.c @@ -0,0 +1,78 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Muhammad Ijaz + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(4, e8, m1); + VLOAD_16(v2, 800, 65535, -50, 25); + VLOAD_8(v4, 7, 7, 7, 7); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vnclip.wv v1, v2, v4"); + VCMP_I8(1, v1, 6, 0xff, 0xff, 0); +} + +void TEST_CASE2() { + VSET(4, e8, m1); + VLOAD_16(v2, 800, 65535, -50, 25); + VLOAD_8(v4, 7, 7, 7, 7); + VLOAD_8(v0, 0x5, 0, 0, 0); + VCLEAR(v1); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vnclip.wv v1, v2, v4, v0.t"); + VCMP_I8(2, v1, 6, 0, 0xff, 0); +} + +void TEST_CASE3() { + VSET(4, e8, m1); + VLOAD_16(v2, 800, 65535, -50, 25); + int8_t scalar = 7; + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vnclip.wx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_I8(3, v1, 6, 0xff, 0xff, 0); +} + +void TEST_CASE4() { + VSET(4, e8, m1); + VLOAD_16(v2, 800, 65535, -50, 25); + int8_t scalar = 7; + VLOAD_8(v0, 0x5, 0, 0); + VCLEAR(v1); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vnclip.wx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I8(4, v1, 6, 0, 0xff, 0); +} + +void TEST_CASE5() { + VSET(4, e8, m1); + VLOAD_16(v2, 800, 65535, -50, 25); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vnclip.wi v1, v2, 7"); + VCMP_I8(5, v1, 6, 0xff, 0xff, 0); +} + +void TEST_CASE6() { + VSET(4, e8, m1); + VLOAD_16(v2, 800, 65535, -50, 25); + VLOAD_8(v0, 0x5, 0, 0, 0); + VCLEAR(v1); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vnclip.wi v1, v2, 7, v0.t"); + VCMP_I8(6, v1, 6, 0, 0xff, 0); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vnclipu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vnclipu.c new file mode 100644 index 000000000..e98ac0889 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vnclipu.c @@ -0,0 +1,78 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Muhammad Ijaz + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(4, e8, m1); + VLOAD_16(v2, 800, 65535, -50, 25); + VLOAD_8(v4, 7, 7, 7, 7); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vnclipu.wv v1, v2, v4"); + VCMP_U8(1, v1, 6, 0xff, 0xff, 0); +} + +void TEST_CASE2() { + VSET(4, e8, m1); + VLOAD_16(v2, 800, 65535, -50, 25); + VLOAD_8(v4, 7, 7, 7, 7); + VLOAD_8(v0, 0x5, 0, 0, 0); + VCLEAR(v1); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vnclipu.wv v1, v2, v4, v0.t"); + VCMP_U8(2, v1, 6, 0, 0xff, 0); +} + +void TEST_CASE3() { + VSET(4, e8, m1); + VLOAD_16(v2, 800, 65535, -50, 25); + int8_t scalar = 7; + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vnclipu.wx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U8(3, v1, 6, 0xff, 0xff, 0); +} + +void TEST_CASE4() { + VSET(4, e8, m1); + VLOAD_16(v2, 800, 65535, -50, 25); + int8_t scalar = 7; + VLOAD_8(v0, 0x5, 0, 0); + VCLEAR(v1); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vnclipu.wx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(4, v1, 6, 0, 0xff, 0); +} + +void TEST_CASE5() { + VSET(4, e8, m1); + VLOAD_16(v2, 800, 65535, -50, 25); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vnclipu.wi v1, v2, 7"); + VCMP_U8(5, v1, 6, 0xff, 0xff, 0); +} + +void TEST_CASE6() { + VSET(4, e8, m1); + VLOAD_16(v2, 800, 65535, -50, 25); + VLOAD_8(v0, 0x5, 0, 0, 0); + VCLEAR(v1); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vnclipu.wi v1, v2, 7, v0.t"); + VCMP_U8(6, v1, 6, 0, 0xff, 0); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vnmsac.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vnmsac.c new file mode 100644 index 000000000..7c13461a5 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vnmsac.c @@ -0,0 +1,292 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v3, 0x41, 0x5b, 0xd0, 0x04, 0xc4, 0x7a, 0x91, 0xd1, 0x7b, 0x09, 0x85, + 0x59, 0x2b, 0xe3, 0x33, 0xb9); + VLOAD_8(v2, 0xc5, 0x4d, 0xad, 0x35, 0x81, 0x18, 0x48, 0x50, 0xe7, 0x95, 0x7b, + 0x18, 0xe6, 0x44, 0x57, 0xaf); + VLOAD_8(v1, 0x53, 0x13, 0x2c, 0xd8, 0x4a, 0xc3, 0xa3, 0xd7, 0x7e, 0x1f, 0x4c, + 0x4e, 0x2e, 0x7d, 0x13, 0x5a); + asm volatile("vnmsac.vv v1, v2, v3"); + VCMP_U8(1, v1, 0x4e, 0xb4, 0x9c, 0x04, 0x86, 0x53, 0xdb, 0x87, 0x81, 0xe2, + 0x65, 0xf6, 0x8c, 0x31, 0xbe, 0xe3); + + VSET(16, e16, m2); + VLOAD_16(v6, 0x9904, 0x5982, 0xa6cb, 0x73a1, 0x227e, 0xc8f6, 0x3eeb, 0xb010, + 0x14a1, 0xef2d, 0x3376, 0x371a, 0x4fc8, 0xbcca, 0xccd7, 0x9097); + VLOAD_16(v4, 0xb2dd, 0x9ca4, 0x72fe, 0xecab, 0x9909, 0xe1b0, 0x1769, 0x6759, + 0x9500, 0x3aae, 0x0637, 0xeadc, 0x7523, 0xa53c, 0xecc7, 0xaccc); + VLOAD_16(v2, 0xb917, 0x6a27, 0x0f0c, 0x04a2, 0xe0b6, 0x9fb1, 0x5c69, 0x21e2, + 0x3588, 0x8d19, 0x65d9, 0x6458, 0xfbff, 0xf949, 0x34a4, 0x0710); + asm volatile("vnmsac.vv v2, v4, v6"); + VCMP_U16(2, v2, 0xd8a3, 0xdadf, 0x2ba2, 0x5c17, 0x5c48, 0x4091, 0x7106, + 0x7c52, 0x8088, 0xca83, 0x937f, 0x4600, 0xaba7, 0x87f1, 0xc583, + 0x5abc); + + VSET(16, e32, m4); + VLOAD_32(v12, 0xe6f4ff60, 0xbf6a38db, 0x30f2ea92, 0x1763619e, 0x815c1c28, + 0x5f1b57db, 0xdb2cdc06, 0xab577f4a, 0x214746ac, 0xd3a08c15, + 0x35887ce9, 0x9d5a0f65, 0x76adea2b, 0x91b7f299, 0x6e2977fe, + 0xdcbcb1d7); + VLOAD_32(v8, 0x885c8baa, 0xbe200324, 0x9eaa49d7, 0x4e208dde, 0x802bbe9f, + 0x7633680e, 0xf1a79717, 0xe62e371e, 0x0fc25b48, 0x11067f38, + 0xc654ccb4, 0x6702a66c, 0x7a0b229d, 0x25c2b688, 0x82b68b3d, + 0x4224aa5e); + VLOAD_32(v4, 0x3a582428, 0x61c55f94, 0xb445799b, 0xcca5a657, 0x51a7fe9e, + 0x0840b4f8, 0xfb0a701b, 0x1b5361d7, 0xd10c9064, 0xa899d63d, + 0xbb1779fd, 0x1b35390c, 0xd04c0f6c, 0xd8c9db62, 0x90a09dc8, + 0x463438b4); + asm volatile("vnmsac.vv v4, v8, v12"); + VCMP_U32(3, v4, 0x0e7d6e68, 0x92cacfc8, 0x726ad6fd, 0x6ff2f953, 0xe1d4d1c6, + 0x466feefe, 0xa6512191, 0xdf6d912b, 0x7c898c04, 0x0a7ec6a5, + 0x2121fa29, 0x91713c70, 0x60b0bd0d, 0xb3a6341a, 0x82041c42, + 0x7a9625c2); + + VSET(16, e64, m8); + VLOAD_64(v24, 0xb8d79a755b98580a, 0xceefb8be6deb3a3d, 0x670688aed7c97cdd, + 0x0ced1db5e1b7da8f, 0x58993c2ae4a62e89, 0x864439a0768dce1b, + 0x7882d6539128d119, 0xfe6469348911945b, 0x6da189493780c328, + 0xf8c4931b61dc54dc, 0xd5ac914ccbf735f0, 0xba0a5bf3b2b528d1, + 0x74d814e6ebcebe81, 0xfc44af3a74cfee8c, 0xea0cb63d1bf7d5dc, + 0xbe98c841d80bd077); + VLOAD_64(v16, 0x5517b401c8fbd5ec, 0x021c52c329edd200, 0xff61d899305d1423, + 0xd886035fdcfe3186, 0x694b857cb1cc3bc3, 0x52376b9a996e1925, + 0x5785c45d41ec230d, 0x950f08c23f6da73d, 0xe5dc4e9a35834719, + 0x9a08d0e965e96a19, 0xb80c2107151bdcf6, 0xd0612e4d4bc314b7, + 0xdfb23a142b750482, 0xedc5e4b79881bdaf, 0x72c493d9df55bf13, + 0xfd4b1328b8f7773a); + VLOAD_64(v8, 0xc265b2d19ad92bbb, 0x81a4ef527fc2e042, 0xe490f5981f64a313, + 0xf12edb410132b013, 0xc475df4b52276fe9, 0x069e283bf74ca195, + 0x8dd5189f3a66f166, 0x297726422e620380, 0x7b74d167bd1b22fd, + 0x08e88e9642656a52, 0x0ab0c3f0f7ddeb66, 0x00b155918c8646c0, + 0x84d4df4b2a3768c7, 0xc31234734867ae09, 0x79320b8da693a84e, + 0x30727b2d1bccd396); + asm volatile("vnmsac.vv v8, v16, v24"); + VCMP_U64(4, v8, 0x231a763b4759b083, 0x68b16397da83d642, 0x7081592414ce4cdc, + 0x084dc189ec3eea39, 0x72cb55f70cac6a8e, 0xc713321c491334ae, + 0xa21580bb2ab1e821, 0x3d84da5e7dab4cd1, 0x735a758175effc15, + 0x91e8df24708208d6, 0x5133f0cd25a5d6c6, 0x2f1a889653a2c559, + 0x5f9f0ac7744ba745, 0xba1bbac1969a4055, 0x275025160493a4fa, + 0x21c02801006747a0); +} + +void TEST_CASE2() { + VSET(16, e8, m1); + VLOAD_8(v3, 0x41, 0x5b, 0xd0, 0x04, 0xc4, 0x7a, 0x91, 0xd1, 0x7b, 0x09, 0x85, + 0x59, 0x2b, 0xe3, 0x33, 0xb9); + VLOAD_8(v2, 0xc5, 0x4d, 0xad, 0x35, 0x81, 0x18, 0x48, 0x50, 0xe7, 0x95, 0x7b, + 0x18, 0xe6, 0x44, 0x57, 0xaf); + VLOAD_8(v1, 0x53, 0x13, 0x2c, 0xd8, 0x4a, 0xc3, 0xa3, 0xd7, 0x7e, 0x1f, 0x4c, + 0x4e, 0x2e, 0x7d, 0x13, 0x5a); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vnmsac.vv v1, v2, v3, v0.t"); + VCMP_U8(5, v1, 0x53, 0xb4, 0x2c, 0x04, 0x4a, 0x53, 0xa3, 0x87, 0x7e, 0xe2, + 0x4c, 0xf6, 0x2e, 0x31, 0x13, 0xe3); + + VSET(16, e16, m2); + VLOAD_16(v6, 0x9904, 0x5982, 0xa6cb, 0x73a1, 0x227e, 0xc8f6, 0x3eeb, 0xb010, + 0x14a1, 0xef2d, 0x3376, 0x371a, 0x4fc8, 0xbcca, 0xccd7, 0x9097); + VLOAD_16(v4, 0xb2dd, 0x9ca4, 0x72fe, 0xecab, 0x9909, 0xe1b0, 0x1769, 0x6759, + 0x9500, 0x3aae, 0x0637, 0xeadc, 0x7523, 0xa53c, 0xecc7, 0xaccc); + VLOAD_16(v2, 0xb917, 0x6a27, 0x0f0c, 0x04a2, 0xe0b6, 0x9fb1, 0x5c69, 0x21e2, + 0x3588, 0x8d19, 0x65d9, 0x6458, 0xfbff, 0xf949, 0x34a4, 0x0710); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vnmsac.vv v2, v4, v6, v0.t"); + VCMP_U16(6, v2, 0xb917, 0xdadf, 0x0f0c, 0x5c17, 0xe0b6, 0x4091, 0x5c69, + 0x7c52, 0x3588, 0xca83, 0x65d9, 0x4600, 0xfbff, 0x87f1, 0x34a4, + 0x5abc); + + VSET(16, e32, m4); + VLOAD_32(v12, 0xe6f4ff60, 0xbf6a38db, 0x30f2ea92, 0x1763619e, 0x815c1c28, + 0x5f1b57db, 0xdb2cdc06, 0xab577f4a, 0x214746ac, 0xd3a08c15, + 0x35887ce9, 0x9d5a0f65, 0x76adea2b, 0x91b7f299, 0x6e2977fe, + 0xdcbcb1d7); + VLOAD_32(v8, 0x885c8baa, 0xbe200324, 0x9eaa49d7, 0x4e208dde, 0x802bbe9f, + 0x7633680e, 0xf1a79717, 0xe62e371e, 0x0fc25b48, 0x11067f38, + 0xc654ccb4, 0x6702a66c, 0x7a0b229d, 0x25c2b688, 0x82b68b3d, + 0x4224aa5e); + VLOAD_32(v4, 0x3a582428, 0x61c55f94, 0xb445799b, 0xcca5a657, 0x51a7fe9e, + 0x0840b4f8, 0xfb0a701b, 0x1b5361d7, 0xd10c9064, 0xa899d63d, + 0xbb1779fd, 0x1b35390c, 0xd04c0f6c, 0xd8c9db62, 0x90a09dc8, + 0x463438b4); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vnmsac.vv v4, v8, v12, v0.t"); + VCMP_U32(7, v4, 0x3a582428, 0x92cacfc8, 0xb445799b, 0x6ff2f953, 0x51a7fe9e, + 0x466feefe, 0xfb0a701b, 0xdf6d912b, 0xd10c9064, 0x0a7ec6a5, + 0xbb1779fd, 0x91713c70, 0xd04c0f6c, 0xb3a6341a, 0x90a09dc8, + 0x7a9625c2); + + VSET(16, e64, m8); + VLOAD_64(v24, 0xb8d79a755b98580a, 0xceefb8be6deb3a3d, 0x670688aed7c97cdd, + 0x0ced1db5e1b7da8f, 0x58993c2ae4a62e89, 0x864439a0768dce1b, + 0x7882d6539128d119, 0xfe6469348911945b, 0x6da189493780c328, + 0xf8c4931b61dc54dc, 0xd5ac914ccbf735f0, 0xba0a5bf3b2b528d1, + 0x74d814e6ebcebe81, 0xfc44af3a74cfee8c, 0xea0cb63d1bf7d5dc, + 0xbe98c841d80bd077); + VLOAD_64(v16, 0x5517b401c8fbd5ec, 0x021c52c329edd200, 0xff61d899305d1423, + 0xd886035fdcfe3186, 0x694b857cb1cc3bc3, 0x52376b9a996e1925, + 0x5785c45d41ec230d, 0x950f08c23f6da73d, 0xe5dc4e9a35834719, + 0x9a08d0e965e96a19, 0xb80c2107151bdcf6, 0xd0612e4d4bc314b7, + 0xdfb23a142b750482, 0xedc5e4b79881bdaf, 0x72c493d9df55bf13, + 0xfd4b1328b8f7773a); + VLOAD_64(v8, 0xc265b2d19ad92bbb, 0x81a4ef527fc2e042, 0xe490f5981f64a313, + 0xf12edb410132b013, 0xc475df4b52276fe9, 0x069e283bf74ca195, + 0x8dd5189f3a66f166, 0x297726422e620380, 0x7b74d167bd1b22fd, + 0x08e88e9642656a52, 0x0ab0c3f0f7ddeb66, 0x00b155918c8646c0, + 0x84d4df4b2a3768c7, 0xc31234734867ae09, 0x79320b8da693a84e, + 0x30727b2d1bccd396); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vnmsac.vv v8, v16, v24, v0.t"); + VCMP_U64(8, v8, 0xc265b2d19ad92bbb, 0x68b16397da83d642, 0xe490f5981f64a313, + 0x084dc189ec3eea39, 0xc475df4b52276fe9, 0xc713321c491334ae, + 0x8dd5189f3a66f166, 0x3d84da5e7dab4cd1, 0x7b74d167bd1b22fd, + 0x91e8df24708208d6, 0x0ab0c3f0f7ddeb66, 0x2f1a889653a2c559, + 0x84d4df4b2a3768c7, 0xba1bbac1969a4055, 0x79320b8da693a84e, + 0x21c02801006747a0); +} + +void TEST_CASE3() { + VSET(16, e8, m1); + int64_t scalar = 5; + VLOAD_8(v3, 0x5e, 0xf5, 0xa9, 0x0b, 0x14, 0x3c, 0x84, 0x22, 0xd7, 0xb6, 0x5c, + 0x90, 0xa2, 0x67, 0x3d, 0xf5); + VLOAD_8(v1, 0xfa, 0xd9, 0x2a, 0xe2, 0xe7, 0x1f, 0x8c, 0xbd, 0x40, 0x5d, 0x50, + 0x1f, 0xe0, 0xdd, 0x1f, 0xd7); + asm volatile("vnmsac.vx v1, %[A], v3" ::[A] "r"(scalar)); + VCMP_U8(9, v1, 0x24, 0x10, 0xdd, 0xab, 0x83, 0xf3, 0xf8, 0x13, 0x0d, 0xcf, + 0x84, 0x4f, 0xb6, 0xda, 0xee, 0x0e); + + VSET(16, e16, m2); + scalar = -5383; + VLOAD_16(v4, 0xfe80, 0x3910, 0x5313, 0xefef, 0xecfc, 0x7983, 0xcc0d, 0x731f, + 0xf384, 0xfde3, 0x9cd1, 0xf20b, 0xa41b, 0x949a, 0x5886, 0xa1a9); + VLOAD_16(v2, 0xe886, 0xf1f9, 0x1857, 0xd0bb, 0x522e, 0x2de0, 0xa6c2, 0xd624, + 0xd024, 0x40a2, 0xdd99, 0xd517, 0xf00a, 0xae8d, 0x79a5, 0x519d); + asm volatile("vnmsac.vx v2, %[A], v4" ::[A] "r"(scalar)); + VCMP_U16(10, v2, 0x5e06, 0xd169, 0xecdc, 0xfb44, 0x7912, 0x3f75, 0x4c1d, + 0x86fd, 0x4cc0, 0xd0d7, 0x4c50, 0x5a64, 0xa3c7, 0x60c3, 0xe34f, + 0x9a3c); + + VSET(16, e32, m4); + scalar = 6474219; + VLOAD_32(v8, 0x48da7aac, 0x54e98600, 0xf49f26e5, 0x338e8406, 0x40ca82f5, + 0x75d0b8f6, 0x38c88af2, 0x96bb5c03, 0x6f61c0a9, 0x3626dd69, + 0x31aba619, 0x7d974a55, 0xbc63c280, 0x502334bf, 0x9451b955, + 0x6a8bae19); + VLOAD_32(v4, 0xcbd8537e, 0x36928a6c, 0x60cf8444, 0xa19ea650, 0x8f2a8694, + 0x050956bf, 0x1ef799f5, 0x43c0f327, 0x4bfd5a25, 0x7be439e4, + 0xed89a52a, 0x2bbf028d, 0x872392b9, 0x0ad55495, 0x865c7264, + 0xfcce4b64); + asm volatile("vnmsac.vx v4, %[A], v8" ::[A] "r"(scalar)); + VCMP_U32(11, v4, 0x6b20ab9a, 0x9dfe886c, 0xf983030d, 0xddd6bece, 0xf2a7f2ad, + 0x2f0a66ed, 0x59240bcf, 0x915f2166, 0x8c4ace02, 0x802d8981, + 0xf9fc8b37, 0xa3f70986, 0xced98739, 0xa4fbf240, 0x4249945d, + 0xdd51d971); + + VSET(16, e64, m8); + scalar = -598189234597999223; + VLOAD_64(v16, 0x93adc14539897782, 0xa8d5d41c19b1455a, 0x55e01165195d2d9b, + 0xe81a26a17fef30f2, 0x33a71ede19aec0aa, 0x49598be14c5bc1cd, + 0x1c27bde3f488bfc6, 0x4188f9b8611e5d90, 0xd53289cca28a3b6b, + 0x3b435e1078e3bee9, 0x5e3f4c08c869abf4, 0x3c004920e9c39fb6, + 0x4b42a451b264b153, 0x110a6db11a7c2801, 0x7c0f358ac41d49fa, + 0x6e8c6ae4d14bd1a5); + VLOAD_64(v8, 0x3b0c6a3a651beecc, 0xb612caa033bc9bca, 0xda94340ac428ca78, + 0xf774b16ef94a22ea, 0x87df3c47c8113e43, 0x38487d57a064f677, + 0x358706b57ce6d6c7, 0xda111b3ac946811c, 0xe9ffed5b39f1ea1d, + 0x3c7e5a675c779870, 0x5d2ea63ac910e42e, 0xb3e832dbe2332203, + 0x05d366b426005f47, 0x00b3b58815a860d8, 0x023bbf8109263e1d, + 0x5fbc2f647d6c1153); + asm volatile("vnmsac.vx v8, %[A], v16" ::[A] "r"(scalar)); + VCMP_U64(12, v8, 0x60dd7609c833e03a, 0x79abe3a30a816ca0, 0x983ca4c97f204385, + 0x59a9303f04932768, 0x59c29659b4d00149, 0x59a15b1bb66f16c2, + 0x37b0445a8ebaa7d1, 0x65e0c3ab56fa1f0c, 0x94a740971b1d6eda, + 0xa1c7ff743113d8bf, 0xe8198a4799a97a9a, 0x5ccf06fd8751eb9d, + 0xa36557d05e8802dc, 0x10aae67f31dc2b4f, 0xe878939fd1287553, + 0x594538a8571dbf06); +} + +void TEST_CASE4() { + VSET(16, e8, m1); + int64_t scalar = 5; + VLOAD_8(v3, 0x5e, 0xf5, 0xa9, 0x0b, 0x14, 0x3c, 0x84, 0x22, 0xd7, 0xb6, 0x5c, + 0x90, 0xa2, 0x67, 0x3d, 0xf5); + VLOAD_8(v1, 0xfa, 0xd9, 0x2a, 0xe2, 0xe7, 0x1f, 0x8c, 0xbd, 0x40, 0x5d, 0x50, + 0x1f, 0xe0, 0xdd, 0x1f, 0xd7); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vnmsac.vx v1, %[A], v3, v0.t" ::[A] "r"(scalar)); + VCMP_U8(13, v1, 0xfa, 0x10, 0x2a, 0xab, 0xe7, 0xf3, 0x8c, 0x13, 0x40, 0xcf, + 0x50, 0x4f, 0xe0, 0xda, 0x1f, 0x0e); + + VSET(16, e16, m2); + scalar = -5383; + VLOAD_16(v4, 0xfe80, 0x3910, 0x5313, 0xefef, 0xecfc, 0x7983, 0xcc0d, 0x731f, + 0xf384, 0xfde3, 0x9cd1, 0xf20b, 0xa41b, 0x949a, 0x5886, 0xa1a9); + VLOAD_16(v2, 0xe886, 0xf1f9, 0x1857, 0xd0bb, 0x522e, 0x2de0, 0xa6c2, 0xd624, + 0xd024, 0x40a2, 0xdd99, 0xd517, 0xf00a, 0xae8d, 0x79a5, 0x519d); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vnmsac.vx v2, %[A], v4, v0.t" ::[A] "r"(scalar)); + VCMP_U16(14, v2, 0xe886, 0xd169, 0x1857, 0xfb44, 0x522e, 0x3f75, 0xa6c2, + 0x86fd, 0xd024, 0xd0d7, 0xdd99, 0x5a64, 0xf00a, 0x60c3, 0x79a5, + 0x9a3c); + + VSET(16, e32, m4); + scalar = 6474219; + VLOAD_32(v8, 0x48da7aac, 0x54e98600, 0xf49f26e5, 0x338e8406, 0x40ca82f5, + 0x75d0b8f6, 0x38c88af2, 0x96bb5c03, 0x6f61c0a9, 0x3626dd69, + 0x31aba619, 0x7d974a55, 0xbc63c280, 0x502334bf, 0x9451b955, + 0x6a8bae19); + VLOAD_32(v4, 0xcbd8537e, 0x36928a6c, 0x60cf8444, 0xa19ea650, 0x8f2a8694, + 0x050956bf, 0x1ef799f5, 0x43c0f327, 0x4bfd5a25, 0x7be439e4, + 0xed89a52a, 0x2bbf028d, 0x872392b9, 0x0ad55495, 0x865c7264, + 0xfcce4b64); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vnmsac.vx v4, %[A], v8, v0.t" ::[A] "r"(scalar)); + VCMP_U32(15, v4, 0xcbd8537e, 0x9dfe886c, 0x60cf8444, 0xddd6bece, 0x8f2a8694, + 0x2f0a66ed, 0x1ef799f5, 0x915f2166, 0x4bfd5a25, 0x802d8981, + 0xed89a52a, 0xa3f70986, 0x872392b9, 0xa4fbf240, 0x865c7264, + 0xdd51d971); + + VSET(16, e64, m8); + scalar = -598189234597999223; + VLOAD_64(v16, 0x93adc14539897782, 0xa8d5d41c19b1455a, 0x55e01165195d2d9b, + 0xe81a26a17fef30f2, 0x33a71ede19aec0aa, 0x49598be14c5bc1cd, + 0x1c27bde3f488bfc6, 0x4188f9b8611e5d90, 0xd53289cca28a3b6b, + 0x3b435e1078e3bee9, 0x5e3f4c08c869abf4, 0x3c004920e9c39fb6, + 0x4b42a451b264b153, 0x110a6db11a7c2801, 0x7c0f358ac41d49fa, + 0x6e8c6ae4d14bd1a5); + VLOAD_64(v8, 0x3b0c6a3a651beecc, 0xb612caa033bc9bca, 0xda94340ac428ca78, + 0xf774b16ef94a22ea, 0x87df3c47c8113e43, 0x38487d57a064f677, + 0x358706b57ce6d6c7, 0xda111b3ac946811c, 0xe9ffed5b39f1ea1d, + 0x3c7e5a675c779870, 0x5d2ea63ac910e42e, 0xb3e832dbe2332203, + 0x05d366b426005f47, 0x00b3b58815a860d8, 0x023bbf8109263e1d, + 0x5fbc2f647d6c1153); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vnmsac.vx v8, %[A], v16, v0.t" ::[A] "r"(scalar)); + VCMP_U64(16, v8, 0x3b0c6a3a651beecc, 0x79abe3a30a816ca0, 0xda94340ac428ca78, + 0x59a9303f04932768, 0x87df3c47c8113e43, 0x59a15b1bb66f16c2, + 0x358706b57ce6d6c7, 0x65e0c3ab56fa1f0c, 0xe9ffed5b39f1ea1d, + 0xa1c7ff743113d8bf, 0x5d2ea63ac910e42e, 0x5ccf06fd8751eb9d, + 0x05d366b426005f47, 0x10aae67f31dc2b4f, 0x023bbf8109263e1d, + 0x594538a8571dbf06); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vnmsub.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vnmsub.c new file mode 100644 index 000000000..14739529a --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vnmsub.c @@ -0,0 +1,292 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v1, 0x41, 0x5b, 0xd0, 0x04, 0xc4, 0x7a, 0x91, 0xd1, 0x7b, 0x09, 0x85, + 0x59, 0x2b, 0xe3, 0x33, 0xb9); + VLOAD_8(v2, 0xc5, 0x4d, 0xad, 0x35, 0x81, 0x18, 0x48, 0x50, 0xe7, 0x95, 0x7b, + 0x18, 0xe6, 0x44, 0x57, 0xaf); + VLOAD_8(v3, 0x53, 0x13, 0x2c, 0xd8, 0x4a, 0xc3, 0xa3, 0xd7, 0x7e, 0x1f, 0x4c, + 0x4e, 0x2e, 0x7d, 0x13, 0x5a); + asm volatile("vnmsub.vv v1, v2, v3"); + VCMP_U8(1, v1, 0x4e, 0xb4, 0x9c, 0x04, 0x86, 0x53, 0xdb, 0x87, 0x81, 0xe2, + 0x65, 0xf6, 0x8c, 0x31, 0xbe, 0xe3); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x9904, 0x5982, 0xa6cb, 0x73a1, 0x227e, 0xc8f6, 0x3eeb, 0xb010, + 0x14a1, 0xef2d, 0x3376, 0x371a, 0x4fc8, 0xbcca, 0xccd7, 0x9097); + VLOAD_16(v4, 0xb2dd, 0x9ca4, 0x72fe, 0xecab, 0x9909, 0xe1b0, 0x1769, 0x6759, + 0x9500, 0x3aae, 0x0637, 0xeadc, 0x7523, 0xa53c, 0xecc7, 0xaccc); + VLOAD_16(v6, 0xb917, 0x6a27, 0x0f0c, 0x04a2, 0xe0b6, 0x9fb1, 0x5c69, 0x21e2, + 0x3588, 0x8d19, 0x65d9, 0x6458, 0xfbff, 0xf949, 0x34a4, 0x0710); + asm volatile("vnmsub.vv v2, v4, v6"); + VCMP_U16(2, v2, 0xd8a3, 0xdadf, 0x2ba2, 0x5c17, 0x5c48, 0x4091, 0x7106, + 0x7c52, 0x8088, 0xca83, 0x937f, 0x4600, 0xaba7, 0x87f1, 0xc583, + 0x5abc); + + VSET(16, e32, m4); + VLOAD_32(v4, 0xe6f4ff60, 0xbf6a38db, 0x30f2ea92, 0x1763619e, 0x815c1c28, + 0x5f1b57db, 0xdb2cdc06, 0xab577f4a, 0x214746ac, 0xd3a08c15, + 0x35887ce9, 0x9d5a0f65, 0x76adea2b, 0x91b7f299, 0x6e2977fe, + 0xdcbcb1d7); + VLOAD_32(v8, 0x885c8baa, 0xbe200324, 0x9eaa49d7, 0x4e208dde, 0x802bbe9f, + 0x7633680e, 0xf1a79717, 0xe62e371e, 0x0fc25b48, 0x11067f38, + 0xc654ccb4, 0x6702a66c, 0x7a0b229d, 0x25c2b688, 0x82b68b3d, + 0x4224aa5e); + VLOAD_32(v12, 0x3a582428, 0x61c55f94, 0xb445799b, 0xcca5a657, 0x51a7fe9e, + 0x0840b4f8, 0xfb0a701b, 0x1b5361d7, 0xd10c9064, 0xa899d63d, + 0xbb1779fd, 0x1b35390c, 0xd04c0f6c, 0xd8c9db62, 0x90a09dc8, + 0x463438b4); + asm volatile("vnmsub.vv v4, v8, v12"); + VCMP_U32(3, v4, 0x0e7d6e68, 0x92cacfc8, 0x726ad6fd, 0x6ff2f953, 0xe1d4d1c6, + 0x466feefe, 0xa6512191, 0xdf6d912b, 0x7c898c04, 0x0a7ec6a5, + 0x2121fa29, 0x91713c70, 0x60b0bd0d, 0xb3a6341a, 0x82041c42, + 0x7a9625c2); + + VSET(16, e64, m8); + VLOAD_64(v8, 0xb8d79a755b98580a, 0xceefb8be6deb3a3d, 0x670688aed7c97cdd, + 0x0ced1db5e1b7da8f, 0x58993c2ae4a62e89, 0x864439a0768dce1b, + 0x7882d6539128d119, 0xfe6469348911945b, 0x6da189493780c328, + 0xf8c4931b61dc54dc, 0xd5ac914ccbf735f0, 0xba0a5bf3b2b528d1, + 0x74d814e6ebcebe81, 0xfc44af3a74cfee8c, 0xea0cb63d1bf7d5dc, + 0xbe98c841d80bd077); + VLOAD_64(v16, 0x5517b401c8fbd5ec, 0x021c52c329edd200, 0xff61d899305d1423, + 0xd886035fdcfe3186, 0x694b857cb1cc3bc3, 0x52376b9a996e1925, + 0x5785c45d41ec230d, 0x950f08c23f6da73d, 0xe5dc4e9a35834719, + 0x9a08d0e965e96a19, 0xb80c2107151bdcf6, 0xd0612e4d4bc314b7, + 0xdfb23a142b750482, 0xedc5e4b79881bdaf, 0x72c493d9df55bf13, + 0xfd4b1328b8f7773a); + VLOAD_64(v24, 0xc265b2d19ad92bbb, 0x81a4ef527fc2e042, 0xe490f5981f64a313, + 0xf12edb410132b013, 0xc475df4b52276fe9, 0x069e283bf74ca195, + 0x8dd5189f3a66f166, 0x297726422e620380, 0x7b74d167bd1b22fd, + 0x08e88e9642656a52, 0x0ab0c3f0f7ddeb66, 0x00b155918c8646c0, + 0x84d4df4b2a3768c7, 0xc31234734867ae09, 0x79320b8da693a84e, + 0x30727b2d1bccd396); + asm volatile("vnmsub.vv v8, v16, v24"); + VCMP_U64(4, v8, 0x231a763b4759b083, 0x68b16397da83d642, 0x7081592414ce4cdc, + 0x084dc189ec3eea39, 0x72cb55f70cac6a8e, 0xc713321c491334ae, + 0xa21580bb2ab1e821, 0x3d84da5e7dab4cd1, 0x735a758175effc15, + 0x91e8df24708208d6, 0x5133f0cd25a5d6c6, 0x2f1a889653a2c559, + 0x5f9f0ac7744ba745, 0xba1bbac1969a4055, 0x275025160493a4fa, + 0x21c02801006747a0); +} + +void TEST_CASE2() { + VSET(16, e8, m1); + VLOAD_8(v1, 0x41, 0x5b, 0xd0, 0x04, 0xc4, 0x7a, 0x91, 0xd1, 0x7b, 0x09, 0x85, + 0x59, 0x2b, 0xe3, 0x33, 0xb9); + VLOAD_8(v2, 0xc5, 0x4d, 0xad, 0x35, 0x81, 0x18, 0x48, 0x50, 0xe7, 0x95, 0x7b, + 0x18, 0xe6, 0x44, 0x57, 0xaf); + VLOAD_8(v3, 0x53, 0x13, 0x2c, 0xd8, 0x4a, 0xc3, 0xa3, 0xd7, 0x7e, 0x1f, 0x4c, + 0x4e, 0x2e, 0x7d, 0x13, 0x5a); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vnmsub.vv v1, v2, v3, v0.t"); + VCMP_U8(5, v1, 0x41, 0xb4, 0xd0, 0x04, 0xc4, 0x53, 0x91, 0x87, 0x7b, 0xe2, + 0x85, 0xf6, 0x2b, 0x31, 0x33, 0xe3); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x9904, 0x5982, 0xa6cb, 0x73a1, 0x227e, 0xc8f6, 0x3eeb, 0xb010, + 0x14a1, 0xef2d, 0x3376, 0x371a, 0x4fc8, 0xbcca, 0xccd7, 0x9097); + VLOAD_16(v4, 0xb2dd, 0x9ca4, 0x72fe, 0xecab, 0x9909, 0xe1b0, 0x1769, 0x6759, + 0x9500, 0x3aae, 0x0637, 0xeadc, 0x7523, 0xa53c, 0xecc7, 0xaccc); + VLOAD_16(v6, 0xb917, 0x6a27, 0x0f0c, 0x04a2, 0xe0b6, 0x9fb1, 0x5c69, 0x21e2, + 0x3588, 0x8d19, 0x65d9, 0x6458, 0xfbff, 0xf949, 0x34a4, 0x0710); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vnmsub.vv v2, v4, v6, v0.t"); + VCMP_U16(6, v2, 0x9904, 0xdadf, 0xa6cb, 0x5c17, 0x227e, 0x4091, 0x3eeb, + 0x7c52, 0x14a1, 0xca83, 0x3376, 0x4600, 0x4fc8, 0x87f1, 0xccd7, + 0x5abc); + + VSET(16, e32, m4); + VLOAD_32(v4, 0xe6f4ff60, 0xbf6a38db, 0x30f2ea92, 0x1763619e, 0x815c1c28, + 0x5f1b57db, 0xdb2cdc06, 0xab577f4a, 0x214746ac, 0xd3a08c15, + 0x35887ce9, 0x9d5a0f65, 0x76adea2b, 0x91b7f299, 0x6e2977fe, + 0xdcbcb1d7); + VLOAD_32(v8, 0x885c8baa, 0xbe200324, 0x9eaa49d7, 0x4e208dde, 0x802bbe9f, + 0x7633680e, 0xf1a79717, 0xe62e371e, 0x0fc25b48, 0x11067f38, + 0xc654ccb4, 0x6702a66c, 0x7a0b229d, 0x25c2b688, 0x82b68b3d, + 0x4224aa5e); + VLOAD_32(v12, 0x3a582428, 0x61c55f94, 0xb445799b, 0xcca5a657, 0x51a7fe9e, + 0x0840b4f8, 0xfb0a701b, 0x1b5361d7, 0xd10c9064, 0xa899d63d, + 0xbb1779fd, 0x1b35390c, 0xd04c0f6c, 0xd8c9db62, 0x90a09dc8, + 0x463438b4); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vnmsub.vv v4, v8, v12, v0.t"); + VCMP_U32(7, v4, 0xe6f4ff60, 0x92cacfc8, 0x30f2ea92, 0x6ff2f953, 0x815c1c28, + 0x466feefe, 0xdb2cdc06, 0xdf6d912b, 0x214746ac, 0x0a7ec6a5, + 0x35887ce9, 0x91713c70, 0x76adea2b, 0xb3a6341a, 0x6e2977fe, + 0x7a9625c2); + + VSET(16, e64, m8); + VLOAD_64(v8, 0xb8d79a755b98580a, 0xceefb8be6deb3a3d, 0x670688aed7c97cdd, + 0x0ced1db5e1b7da8f, 0x58993c2ae4a62e89, 0x864439a0768dce1b, + 0x7882d6539128d119, 0xfe6469348911945b, 0x6da189493780c328, + 0xf8c4931b61dc54dc, 0xd5ac914ccbf735f0, 0xba0a5bf3b2b528d1, + 0x74d814e6ebcebe81, 0xfc44af3a74cfee8c, 0xea0cb63d1bf7d5dc, + 0xbe98c841d80bd077); + VLOAD_64(v16, 0x5517b401c8fbd5ec, 0x021c52c329edd200, 0xff61d899305d1423, + 0xd886035fdcfe3186, 0x694b857cb1cc3bc3, 0x52376b9a996e1925, + 0x5785c45d41ec230d, 0x950f08c23f6da73d, 0xe5dc4e9a35834719, + 0x9a08d0e965e96a19, 0xb80c2107151bdcf6, 0xd0612e4d4bc314b7, + 0xdfb23a142b750482, 0xedc5e4b79881bdaf, 0x72c493d9df55bf13, + 0xfd4b1328b8f7773a); + VLOAD_64(v24, 0xc265b2d19ad92bbb, 0x81a4ef527fc2e042, 0xe490f5981f64a313, + 0xf12edb410132b013, 0xc475df4b52276fe9, 0x069e283bf74ca195, + 0x8dd5189f3a66f166, 0x297726422e620380, 0x7b74d167bd1b22fd, + 0x08e88e9642656a52, 0x0ab0c3f0f7ddeb66, 0x00b155918c8646c0, + 0x84d4df4b2a3768c7, 0xc31234734867ae09, 0x79320b8da693a84e, + 0x30727b2d1bccd396); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vnmsub.vv v8, v16, v24, v0.t"); + VCMP_U64(8, v8, 0xb8d79a755b98580a, 0x68b16397da83d642, 0x670688aed7c97cdd, + 0x084dc189ec3eea39, 0x58993c2ae4a62e89, 0xc713321c491334ae, + 0x7882d6539128d119, 0x3d84da5e7dab4cd1, 0x6da189493780c328, + 0x91e8df24708208d6, 0xd5ac914ccbf735f0, 0x2f1a889653a2c559, + 0x74d814e6ebcebe81, 0xba1bbac1969a4055, 0xea0cb63d1bf7d5dc, + 0x21c02801006747a0); +} + +void TEST_CASE3() { + VSET(16, e8, m1); + int64_t scalar = 5; + VLOAD_8(v1, 0x5e, 0xf5, 0xa9, 0x0b, 0x14, 0x3c, 0x84, 0x22, 0xd7, 0xb6, 0x5c, + 0x90, 0xa2, 0x67, 0x3d, 0xf5); + VLOAD_8(v3, 0xfa, 0xd9, 0x2a, 0xe2, 0xe7, 0x1f, 0x8c, 0xbd, 0x40, 0x5d, 0x50, + 0x1f, 0xe0, 0xdd, 0x1f, 0xd7); + asm volatile("vnmsub.vx v1, %[A], v3" ::[A] "r"(scalar)); + VCMP_U8(9, v1, 0x24, 0x10, 0xdd, 0xab, 0x83, 0xf3, 0xf8, 0x13, 0x0d, 0xcf, + 0x84, 0x4f, 0xb6, 0xda, 0xee, 0x0e); + + VSET(16, e16, m2); + scalar = -5383; + VLOAD_16(v2, 0xfe80, 0x3910, 0x5313, 0xefef, 0xecfc, 0x7983, 0xcc0d, 0x731f, + 0xf384, 0xfde3, 0x9cd1, 0xf20b, 0xa41b, 0x949a, 0x5886, 0xa1a9); + VLOAD_16(v4, 0xe886, 0xf1f9, 0x1857, 0xd0bb, 0x522e, 0x2de0, 0xa6c2, 0xd624, + 0xd024, 0x40a2, 0xdd99, 0xd517, 0xf00a, 0xae8d, 0x79a5, 0x519d); + asm volatile("vnmsub.vx v2, %[A], v4" ::[A] "r"(scalar)); + VCMP_U16(10, v2, 0x5e06, 0xd169, 0xecdc, 0xfb44, 0x7912, 0x3f75, 0x4c1d, + 0x86fd, 0x4cc0, 0xd0d7, 0x4c50, 0x5a64, 0xa3c7, 0x60c3, 0xe34f, + 0x9a3c); + + VSET(16, e32, m4); + scalar = 6474219; + VLOAD_32(v4, 0x48da7aac, 0x54e98600, 0xf49f26e5, 0x338e8406, 0x40ca82f5, + 0x75d0b8f6, 0x38c88af2, 0x96bb5c03, 0x6f61c0a9, 0x3626dd69, + 0x31aba619, 0x7d974a55, 0xbc63c280, 0x502334bf, 0x9451b955, + 0x6a8bae19); + VLOAD_32(v8, 0xcbd8537e, 0x36928a6c, 0x60cf8444, 0xa19ea650, 0x8f2a8694, + 0x050956bf, 0x1ef799f5, 0x43c0f327, 0x4bfd5a25, 0x7be439e4, + 0xed89a52a, 0x2bbf028d, 0x872392b9, 0x0ad55495, 0x865c7264, + 0xfcce4b64); + asm volatile("vnmsub.vx v4, %[A], v8" ::[A] "r"(scalar)); + VCMP_U32(11, v4, 0x6b20ab9a, 0x9dfe886c, 0xf983030d, 0xddd6bece, 0xf2a7f2ad, + 0x2f0a66ed, 0x59240bcf, 0x915f2166, 0x8c4ace02, 0x802d8981, + 0xf9fc8b37, 0xa3f70986, 0xced98739, 0xa4fbf240, 0x4249945d, + 0xdd51d971); + + VSET(16, e64, m8); + scalar = -598189234597999223; + VLOAD_64(v8, 0x93adc14539897782, 0xa8d5d41c19b1455a, 0x55e01165195d2d9b, + 0xe81a26a17fef30f2, 0x33a71ede19aec0aa, 0x49598be14c5bc1cd, + 0x1c27bde3f488bfc6, 0x4188f9b8611e5d90, 0xd53289cca28a3b6b, + 0x3b435e1078e3bee9, 0x5e3f4c08c869abf4, 0x3c004920e9c39fb6, + 0x4b42a451b264b153, 0x110a6db11a7c2801, 0x7c0f358ac41d49fa, + 0x6e8c6ae4d14bd1a5); + VLOAD_64(v16, 0x3b0c6a3a651beecc, 0xb612caa033bc9bca, 0xda94340ac428ca78, + 0xf774b16ef94a22ea, 0x87df3c47c8113e43, 0x38487d57a064f677, + 0x358706b57ce6d6c7, 0xda111b3ac946811c, 0xe9ffed5b39f1ea1d, + 0x3c7e5a675c779870, 0x5d2ea63ac910e42e, 0xb3e832dbe2332203, + 0x05d366b426005f47, 0x00b3b58815a860d8, 0x023bbf8109263e1d, + 0x5fbc2f647d6c1153); + asm volatile("vnmsub.vx v8, %[A], v16" ::[A] "r"(scalar)); + VCMP_U64(12, v8, 0x60dd7609c833e03a, 0x79abe3a30a816ca0, 0x983ca4c97f204385, + 0x59a9303f04932768, 0x59c29659b4d00149, 0x59a15b1bb66f16c2, + 0x37b0445a8ebaa7d1, 0x65e0c3ab56fa1f0c, 0x94a740971b1d6eda, + 0xa1c7ff743113d8bf, 0xe8198a4799a97a9a, 0x5ccf06fd8751eb9d, + 0xa36557d05e8802dc, 0x10aae67f31dc2b4f, 0xe878939fd1287553, + 0x594538a8571dbf06); +} + +void TEST_CASE4() { + VSET(16, e8, m1); + int64_t scalar = 5; + VLOAD_8(v1, 0x5e, 0xf5, 0xa9, 0x0b, 0x14, 0x3c, 0x84, 0x22, 0xd7, 0xb6, 0x5c, + 0x90, 0xa2, 0x67, 0x3d, 0xf5); + VLOAD_8(v3, 0xfa, 0xd9, 0x2a, 0xe2, 0xe7, 0x1f, 0x8c, 0xbd, 0x40, 0x5d, 0x50, + 0x1f, 0xe0, 0xdd, 0x1f, 0xd7); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vnmsub.vx v1, %[A], v3, v0.t" ::[A] "r"(scalar)); + VCMP_U8(13, v1, 0x5e, 0x10, 0xa9, 0xab, 0x14, 0xf3, 0x84, 0x13, 0xd7, 0xcf, + 0x5c, 0x4f, 0xa2, 0xda, 0x3d, 0x0e); + + VSET(16, e16, m2); + scalar = -5383; + VLOAD_16(v2, 0xfe80, 0x3910, 0x5313, 0xefef, 0xecfc, 0x7983, 0xcc0d, 0x731f, + 0xf384, 0xfde3, 0x9cd1, 0xf20b, 0xa41b, 0x949a, 0x5886, 0xa1a9); + VLOAD_16(v4, 0xe886, 0xf1f9, 0x1857, 0xd0bb, 0x522e, 0x2de0, 0xa6c2, 0xd624, + 0xd024, 0x40a2, 0xdd99, 0xd517, 0xf00a, 0xae8d, 0x79a5, 0x519d); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vnmsub.vx v2, %[A], v4, v0.t" ::[A] "r"(scalar)); + VCMP_U16(14, v2, 0xfe80, 0xd169, 0x5313, 0xfb44, 0xecfc, 0x3f75, 0xcc0d, + 0x86fd, 0xf384, 0xd0d7, 0x9cd1, 0x5a64, 0xa41b, 0x60c3, 0x5886, + 0x9a3c); + + VSET(16, e32, m4); + scalar = 6474219; + VLOAD_32(v4, 0x48da7aac, 0x54e98600, 0xf49f26e5, 0x338e8406, 0x40ca82f5, + 0x75d0b8f6, 0x38c88af2, 0x96bb5c03, 0x6f61c0a9, 0x3626dd69, + 0x31aba619, 0x7d974a55, 0xbc63c280, 0x502334bf, 0x9451b955, + 0x6a8bae19); + VLOAD_32(v8, 0xcbd8537e, 0x36928a6c, 0x60cf8444, 0xa19ea650, 0x8f2a8694, + 0x050956bf, 0x1ef799f5, 0x43c0f327, 0x4bfd5a25, 0x7be439e4, + 0xed89a52a, 0x2bbf028d, 0x872392b9, 0x0ad55495, 0x865c7264, + 0xfcce4b64); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vnmsub.vx v4, %[A], v8, v0.t" ::[A] "r"(scalar)); + VCMP_U32(15, v4, 0x48da7aac, 0x9dfe886c, 0xf49f26e5, 0xddd6bece, 0x40ca82f5, + 0x2f0a66ed, 0x38c88af2, 0x915f2166, 0x6f61c0a9, 0x802d8981, + 0x31aba619, 0xa3f70986, 0xbc63c280, 0xa4fbf240, 0x9451b955, + 0xdd51d971); + + VSET(16, e64, m8); + scalar = -598189234597999223; + VLOAD_64(v8, 0x93adc14539897782, 0xa8d5d41c19b1455a, 0x55e01165195d2d9b, + 0xe81a26a17fef30f2, 0x33a71ede19aec0aa, 0x49598be14c5bc1cd, + 0x1c27bde3f488bfc6, 0x4188f9b8611e5d90, 0xd53289cca28a3b6b, + 0x3b435e1078e3bee9, 0x5e3f4c08c869abf4, 0x3c004920e9c39fb6, + 0x4b42a451b264b153, 0x110a6db11a7c2801, 0x7c0f358ac41d49fa, + 0x6e8c6ae4d14bd1a5); + VLOAD_64(v16, 0x3b0c6a3a651beecc, 0xb612caa033bc9bca, 0xda94340ac428ca78, + 0xf774b16ef94a22ea, 0x87df3c47c8113e43, 0x38487d57a064f677, + 0x358706b57ce6d6c7, 0xda111b3ac946811c, 0xe9ffed5b39f1ea1d, + 0x3c7e5a675c779870, 0x5d2ea63ac910e42e, 0xb3e832dbe2332203, + 0x05d366b426005f47, 0x00b3b58815a860d8, 0x023bbf8109263e1d, + 0x5fbc2f647d6c1153); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vnmsub.vx v8, %[A], v16, v0.t" ::[A] "r"(scalar)); + VCMP_U64(16, v8, 0x93adc14539897782, 0x79abe3a30a816ca0, 0x55e01165195d2d9b, + 0x59a9303f04932768, 0x33a71ede19aec0aa, 0x59a15b1bb66f16c2, + 0x1c27bde3f488bfc6, 0x65e0c3ab56fa1f0c, 0xd53289cca28a3b6b, + 0xa1c7ff743113d8bf, 0x5e3f4c08c869abf4, 0x5ccf06fd8751eb9d, + 0x4b42a451b264b153, 0x10aae67f31dc2b4f, 0x7c0f358ac41d49fa, + 0x594538a8571dbf06); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vnsra.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vnsra.c new file mode 100644 index 000000000..63627ba9b --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vnsra.c @@ -0,0 +1,242 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_16(v2, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, + 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00); + VLOAD_8(v3, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vnsra.wv v4, v2, v3"); + VCMP_U8(1, v4, 0x00, 0x80, 0xC0, 0xE0, 0xFE, 0xFF, 0xFF, 0x00, 0x00, 0x80, + 0xC0, 0xE0, 0xFE, 0xFF, 0xFF, 0x00); + + VSET(16, e16, m2); + VLOAD_32(v4, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, + 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, + 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, + 0xFFFF0000); + VLOAD_16(v8, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vnsra.wv v12, v4, v8"); + VCMP_U16(2, v12, 0x0000, 0x8000, 0xC000, 0xE000, 0xFE00, 0xFFFE, 0xFFFF, + 0x0000, 0x0000, 0x8000, 0xC000, 0xE000, 0xFE00, 0xFFFE, 0xFFFF, + 0x0000); + + VSET(16, e32, m4); + VLOAD_64(v8, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF00000000); + VLOAD_32(v16, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vnsra.wv v24, v8, v16"); + VCMP_U32(3, v24, 0x00000000, 0x80000000, 0xC0000000, 0xE0000000, 0xFE000000, + 0xFFFE0000, 0xFFFFFFFE, 0xFFFFFFFF, 0x00000000, 0x80000000, + 0xC0000000, 0xE0000000, 0xFE000000, 0xFFFE0000, 0xFFFFFFFE, + 0xFFFFFFFF); +}; + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_16(v2, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, + 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00); + VLOAD_8(v4, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + asm volatile("vnsra.wv v6, v2, v4, v0.t"); + VCMP_U8(4, v6, 0x00, 0x80, 0x00, 0xE0, 0x00, 0xFF, 0x00, 0x00, 0x00, 0x80, + 0x00, 0xE0, 0x00, 0xFF, 0x00, 0x00); + + VSET(16, e16, m2); + VLOAD_32(v4, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, + 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, + 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, + 0xFFFF0000); + VLOAD_16(v8, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + asm volatile("vnsra.wv v12, v4, v8, v0.t"); + VCMP_U16(5, v12, 0x0000, 0x8000, 0x0000, 0xE000, 0x0000, 0xFFFE, 0x0000, + 0x0000, 0x0000, 0x8000, 0x0000, 0xE000, 0x0000, 0xFFFE, 0x0000, + 0x0000); + + VSET(16, e32, m4); + VLOAD_64(v8, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF00000000); + VLOAD_32(v16, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + asm volatile("vnsra.wv v24, v8, v16, v0.t"); + VCMP_U32(6, v24, 0x00000000, 0x80000000, 0x00000000, 0xE0000000, 0x00000000, + 0xFFFE0000, 0x00000000, 0xFFFFFFFF, 0x00000000, 0x80000000, + 0x00000000, 0xE0000000, 0x00000000, 0xFFFE0000, 0x00000000, + 0xFFFFFFFF); +}; + +void TEST_CASE3(void) { + const uint64_t scalar = 2; + + VSET(16, e8, m1); + VLOAD_16(v2, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, + 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC); + asm volatile("vnsra.wx v4, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U8(7, v4, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0xF8, 0xF9, + 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF); + + VSET(16, e16, m2); + VLOAD_32(v4, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, + 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, + 0xFFFFFFFC); + asm volatile("vnsra.wx v8, v4, %[A]" ::[A] "r"(scalar)); + VCMP_U16(8, v8, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + 0x0008, 0xFFF8, 0xFFF9, 0xFFFA, 0xFFFB, 0xFFFC, 0xFFFD, 0xFFFE, + 0xFFFF); + + VSET(16, e32, m4); + VLOAD_64(v8, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, + 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, + 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, + 0xFFFFFFFFFFFFFFF0, 0xFFFFFFFFFFFFFFF4, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFFC); + asm volatile("vnsra.wx v16, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U32(9, v16, 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, + 0x00000006, 0x00000007, 0x00000008, 0xFFFFFFF8, 0xFFFFFFF9, + 0xFFFFFFFA, 0xFFFFFFFB, 0xFFFFFFFC, 0xFFFFFFFD, 0xFFFFFFFE, + 0xFFFFFFFF); +}; + +void TEST_CASE4(void) { + const uint64_t scalar = 2; + + VSET(16, e8, m1); + VLOAD_16(v2, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, + 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vnsra.wx v4, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(10, v4, 0x00, 0x02, 0x00, 0x04, 0x00, 0x06, 0x00, 0x08, 0x00, 0xF9, + 0x00, 0xFB, 0x00, 0xFD, 0x00, 0xFF); + + VSET(16, e16, m2); + VLOAD_32(v4, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, + 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, + 0xFFFFFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vnsra.wx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(11, v8, 0x0000, 0x0002, 0x0000, 0x0004, 0x0000, 0x0006, 0x0000, + 0x0008, 0x0000, 0xFFF9, 0x0000, 0xFFFB, 0x0000, 0xFFFD, 0x0000, + 0xFFFF); + + VSET(16, e32, m4); + VLOAD_64(v8, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, + 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, + 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, + 0xFFFFFFFFFFFFFFF0, 0xFFFFFFFFFFFFFFF4, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + asm volatile("vnsra.wx v16, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(12, v16, 0x00000000, 0x00000002, 0x00000000, 0x00000004, 0x00000000, + 0x00000006, 0x00000000, 0x00000008, 0x00000000, 0xFFFFFFF9, + 0x00000000, 0xFFFFFFFB, 0x00000000, 0xFFFFFFFD, 0x00000000, + 0xFFFFFFFF); +}; + +void TEST_CASE5(void) { + VSET(16, e8, m1); + VLOAD_16(v2, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, + 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC); + asm volatile("vnsra.wi v4, v2, 2"); + VCMP_U8(13, v4, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0xF8, 0xF9, + 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF); + + VSET(16, e16, m2); + VLOAD_32(v4, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, + 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, + 0xFFFFFFFC); + asm volatile("vnsra.wi v8, v4, 2"); + VCMP_U16(14, v8, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + 0x0008, 0xFFF8, 0xFFF9, 0xFFFA, 0xFFFB, 0xFFFC, 0xFFFD, 0xFFFE, + 0xFFFF); + + VSET(16, e32, m4); + VLOAD_64(v8, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, + 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, + 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, + 0xFFFFFFFFFFFFFFF0, 0xFFFFFFFFFFFFFFF4, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFFC); + asm volatile("vnsra.wi v16, v8, 2"); + VCMP_U32(15, v16, 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, + 0x00000006, 0x00000007, 0x00000008, 0xFFFFFFF8, 0xFFFFFFF9, + 0xFFFFFFFA, 0xFFFFFFFB, 0xFFFFFFFC, 0xFFFFFFFD, 0xFFFFFFFE, + 0xFFFFFFFF); +}; + +void TEST_CASE6(void) { + VSET(16, e8, m1); + VLOAD_16(v2, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, + 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vnsra.wi v4, v2, 2, v0.t"); + VCMP_U8(16, v4, 0x00, 0x02, 0x00, 0x04, 0x00, 0x06, 0x00, 0x08, 0x00, 0xF9, + 0x00, 0xFB, 0x00, 0xFD, 0x00, 0xFF); + + VSET(16, e16, m2); + VLOAD_32(v4, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, + 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, + 0xFFFFFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vnsra.wi v8, v4, 2, v0.t"); + VCMP_U16(17, v8, 0x0000, 0x0002, 0x0000, 0x0004, 0x0000, 0x0006, 0x0000, + 0x0008, 0x0000, 0xFFF9, 0x0000, 0xFFFB, 0x0000, 0xFFFD, 0x0000, + 0xFFFF); + + VSET(16, e32, m4); + VLOAD_64(v8, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, + 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, + 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, + 0xFFFFFFFFFFFFFFF0, 0xFFFFFFFFFFFFFFF4, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + asm volatile("vnsra.wi v16, v8, 2, v0.t"); + VCMP_U32(18, v16, 0x00000000, 0x00000002, 0x00000000, 0x00000004, 0x00000000, + 0x00000006, 0x00000000, 0x00000008, 0x00000000, 0xFFFFFFF9, + 0x00000000, 0xFFFFFFFB, 0x00000000, 0xFFFFFFFD, 0x00000000, + 0xFFFFFFFF); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vnsrl.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vnsrl.c new file mode 100644 index 000000000..a0f4b77bb --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vnsrl.c @@ -0,0 +1,242 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_16(v2, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, + 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00); + VLOAD_8(v3, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vnsrl.wv v4, v2, v3"); + VCMP_U8(1, v4, 0x00, 0x80, 0xC0, 0xE0, 0xFE, 0x01, 0x01, 0x00, 0x00, 0x80, + 0xC0, 0xE0, 0xFE, 0x01, 0x01, 0x00); + + VSET(16, e16, m2); + VLOAD_32(v4, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, + 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, + 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, + 0xFFFF0000); + VLOAD_16(v8, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vnsrl.wv v12, v4, v8"); + VCMP_U16(2, v12, 0x0000, 0x8000, 0xC000, 0xE000, 0xFE00, 0xFFFE, 0x0001, + 0x0000, 0x0000, 0x8000, 0xC000, 0xE000, 0xFE00, 0xFFFE, 0x0001, + 0x0000); + + VSET(16, e32, m4); + VLOAD_64(v8, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF00000000); + VLOAD_32(v16, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vnsrl.wv v24, v8, v16"); + VCMP_U32(3, v24, 0x00000000, 0x80000000, 0xC0000000, 0xE0000000, 0xFE000000, + 0xFFFE0000, 0xFFFFFFFE, 0xFFFFFFFF, 0x00000000, 0x80000000, + 0xC0000000, 0xE0000000, 0xFE000000, 0xFFFE0000, 0xFFFFFFFE, + 0xFFFFFFFF); +}; + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_16(v2, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, + 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00, 0xFF00); + VLOAD_8(v4, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vnsrl.wv v8, v2, v4, v0.t"); + VCMP_U8(4, v8, 0x00, 0x80, 0x00, 0xE0, 0x00, 0x01, 0x00, 0x00, 0x00, 0x80, + 0x00, 0xE0, 0x00, 0x01, 0x00, 0x00); + + VSET(16, e16, m2); + VLOAD_32(v4, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, + 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, + 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, + 0xFFFF0000); + VLOAD_16(v8, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + asm volatile("vnsrl.wv v12, v4, v8, v0.t"); + VCMP_U16(5, v12, 0x0000, 0x8000, 0x0000, 0xE000, 0x0000, 0xFFFE, 0x0000, + 0x0000, 0x0000, 0x8000, 0x0000, 0xE000, 0x0000, 0xFFFE, 0x0000, + 0x0000); + + VSET(16, e32, m4); + VLOAD_64(v8, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF00000000); + VLOAD_32(v16, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + asm volatile("vnsrl.wv v24, v8, v16, v0.t"); + VCMP_U32(6, v24, 0x00000000, 0x80000000, 0x00000000, 0xE0000000, 0x00000000, + 0xFFFE0000, 0x00000000, 0xFFFFFFFF, 0x00000000, 0x80000000, + 0x00000000, 0xE0000000, 0x00000000, 0xFFFE0000, 0x00000000, + 0xFFFFFFFF); +}; + +void TEST_CASE3(void) { + const uint64_t scalar = 2; + + VSET(16, e8, m1); + VLOAD_16(v2, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, + 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC); + asm volatile("vnsrl.wx v4, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U8(7, v4, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0xF8, 0xF9, + 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF); + + VSET(16, e16, m2); + VLOAD_32(v4, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, + 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, + 0xFFFFFFFC); + asm volatile("vnsrl.wx v8, v4, %[A]" ::[A] "r"(scalar)); + VCMP_U16(8, v8, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + 0x0008, 0xFFF8, 0xFFF9, 0xFFFA, 0xFFFB, 0xFFFC, 0xFFFD, 0xFFFE, + 0xFFFF); + + VSET(16, e32, m4); + VLOAD_64(v8, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, + 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, + 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, + 0xFFFFFFFFFFFFFFF0, 0xFFFFFFFFFFFFFFF4, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFFC); + asm volatile("vnsrl.wx v16, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U32(9, v16, 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, + 0x00000006, 0x00000007, 0x00000008, 0xFFFFFFF8, 0xFFFFFFF9, + 0xFFFFFFFA, 0xFFFFFFFB, 0xFFFFFFFC, 0xFFFFFFFD, 0xFFFFFFFE, + 0xFFFFFFFF); +}; + +void TEST_CASE4(void) { + const uint64_t scalar = 2; + + VSET(16, e8, m1); + VLOAD_16(v2, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, + 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vnsrl.wx v4, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(10, v4, 0x00, 0x02, 0x00, 0x04, 0x00, 0x06, 0x00, 0x08, 0x00, 0xF9, + 0x00, 0xFB, 0x00, 0xFD, 0x00, 0xFF); + + VSET(16, e16, m2); + VLOAD_32(v4, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, + 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, + 0xFFFFFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vnsrl.wx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(11, v8, 0x0000, 0x0002, 0x0000, 0x0004, 0x0000, 0x0006, 0x0000, + 0x0008, 0x0000, 0xFFF9, 0x0000, 0xFFFB, 0x0000, 0xFFFD, 0x0000, + 0xFFFF); + + VSET(16, e32, m4); + VLOAD_64(v8, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, + 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, + 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, + 0xFFFFFFFFFFFFFFF0, 0xFFFFFFFFFFFFFFF4, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + asm volatile("vnsrl.wx v16, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(12, v16, 0x00000000, 0x00000002, 0x00000000, 0x00000004, 0x00000000, + 0x00000006, 0x00000000, 0x00000008, 0x00000000, 0xFFFFFFF9, + 0x00000000, 0xFFFFFFFB, 0x00000000, 0xFFFFFFFD, 0x00000000, + 0xFFFFFFFF); +}; + +void TEST_CASE5(void) { + VSET(16, e8, m1); + VLOAD_16(v2, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, + 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC); + asm volatile("vnsrl.wi v4, v2, 2"); + VCMP_U8(13, v4, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0xF8, 0xF9, + 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF); + + VSET(16, e16, m2); + VLOAD_32(v4, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, + 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, + 0xFFFFFFFC); + asm volatile("vnsrl.wi v8, v4, 2"); + VCMP_U16(14, v8, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + 0x0008, 0xFFF8, 0xFFF9, 0xFFFA, 0xFFFB, 0xFFFC, 0xFFFD, 0xFFFE, + 0xFFFF); + + VSET(16, e32, m4); + VLOAD_64(v8, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, + 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, + 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, + 0xFFFFFFFFFFFFFFF0, 0xFFFFFFFFFFFFFFF4, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFFC); + asm volatile("vnsrl.wi v16, v8, 2"); + VCMP_U32(15, v16, 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, + 0x00000006, 0x00000007, 0x00000008, 0xFFFFFFF8, 0xFFFFFFF9, + 0xFFFFFFFA, 0xFFFFFFFB, 0xFFFFFFFC, 0xFFFFFFFD, 0xFFFFFFFE, + 0xFFFFFFFF); +}; + +void TEST_CASE6(void) { + VSET(16, e8, m1); + VLOAD_16(v2, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, + 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vnsrl.wi v4, v2, 2, v0.t"); + VCMP_U8(16, v4, 0x00, 0x02, 0x00, 0x04, 0x00, 0x06, 0x00, 0x08, 0x00, 0xF9, + 0x00, 0xFB, 0x00, 0xFD, 0x00, 0xFF); + + VSET(16, e16, m2); + VLOAD_32(v4, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, + 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, + 0xFFFFFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vnsrl.wi v8, v4, 2, v0.t"); + VCMP_U16(17, v8, 0x0000, 0x0002, 0x0000, 0x0004, 0x0000, 0x0006, 0x0000, + 0x0008, 0x0000, 0xFFF9, 0x0000, 0xFFFB, 0x0000, 0xFFFD, 0x0000, + 0xFFFF); + + VSET(16, e32, m4); + VLOAD_64(v8, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, + 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, + 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, + 0xFFFFFFFFFFFFFFF0, 0xFFFFFFFFFFFFFFF4, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + asm volatile("vnsrl.wi v16, v8, 2, v0.t"); + VCMP_U32(18, v16, 0x00000000, 0x00000002, 0x00000000, 0x00000004, 0x00000000, + 0x00000006, 0x00000000, 0x00000008, 0x00000000, 0xFFFFFFF9, + 0x00000000, 0xFFFFFFFB, 0x00000000, 0xFFFFFFFD, 0x00000000, + 0xFFFFFFFF); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vor.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vor.c new file mode 100644 index 000000000..dcd162e63 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vor.c @@ -0,0 +1,309 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(12, e8, m1); + VLOAD_8(v2, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + 0xf0); + VLOAD_8(v3, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, + 0xf0); + asm volatile("vor.vv v1, v2, v3"); + VCMP_U8(1, v1, 0xff, 0x03, 0xf0, 0xff, 0x03, 0xf0, 0xff, 0x03, 0xf0, 0xff, + 0x03, 0xf0); + + VSET(12, e16, m2); + VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + 0xf0f0, 0xffff, 0x0001, 0xf0f0); + VLOAD_16(v6, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, + 0xf0f0, 0xff00, 0x0003, 0xf0f0); + asm volatile("vor.vv v2, v4, v6"); + VCMP_U16(2, v2, 0xffff, 0x0003, 0xf0f0, 0xffff, 0x0003, 0xf0f0, 0xffff, + 0x0003, 0xf0f0, 0xffff, 0x0003, 0xf0f0); + + VSET(12, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, + 0x00000001, 0xf0f0f0f0); + VLOAD_32(v12, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, 0x00000003, + 0xf0f0f0f0, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, + 0x00000003, 0xf0f0f0f0); + asm volatile("vor.vv v4, v8, v12"); + VCMP_U32(3, v4, 0xffffffff, 0x00000003, 0xf0f0f0f0, 0xffffffff, 0x00000003, + 0xf0f0f0f0, 0xffffffff, 0x00000003, 0xf0f0f0f0, 0xffffffff, + 0x00000003, 0xf0f0f0f0); + + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); + VLOAD_64(v24, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0); + asm volatile("vor.vv v8, v16, v24"); + VCMP_U64(4, v8, 0xffffffffffffffff, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0); +} + +void TEST_CASE2() { + VSET(12, e8, m1); + VLOAD_8(v2, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + 0xf0); + VLOAD_8(v3, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, + 0xf0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_8(v1, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + 0xef); + asm volatile("vor.vv v1, v2, v3, v0.t"); + VCMP_U8(5, v1, 0xff, 0xef, 0xf0, 0xff, 0xef, 0xf0, 0xff, 0xef, 0xf0, 0xff, + 0xef, 0xf0); + + VSET(12, e16, m2); + VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + 0xf0f0, 0xffff, 0x0001, 0xf0f0); + VLOAD_16(v6, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, + 0xf0f0, 0xff00, 0x0003, 0xf0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + 0xbeef, 0xbeef, 0xbeef, 0xbeef); + asm volatile("vor.vv v2, v4, v6, v0.t"); + VCMP_U16(6, v2, 0xffff, 0xbeef, 0xf0f0, 0xffff, 0xbeef, 0xf0f0, 0xffff, + 0xbeef, 0xf0f0, 0xffff, 0xbeef, 0xf0f0); + + VSET(12, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, + 0x00000001, 0xf0f0f0f0); + VLOAD_32(v12, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, 0x00000003, + 0xf0f0f0f0, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, + 0x00000003, 0xf0f0f0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_32(v4, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef); + asm volatile("vor.vv v4, v8, v12, v0.t"); + VCMP_U32(7, v4, 0xffffffff, 0xdeadbeef, 0xf0f0f0f0, 0xffffffff, 0xdeadbeef, + 0xf0f0f0f0, 0xffffffff, 0xdeadbeef, 0xf0f0f0f0, 0xffffffff, + 0xdeadbeef, 0xf0f0f0f0); + + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); + VLOAD_64(v24, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef); + asm volatile("vor.vv v8, v16, v24, v0.t"); + VCMP_U64(8, v8, 0xffffffffffffffff, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0f0); +} + +void TEST_CASE3() { + const uint64_t scalar = 0x0ff00ff00ff00ff0; + + VSET(12, e8, m1); + VLOAD_8(v2, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + 0xf0); + asm volatile("vor.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U8(9, v1, 0xff, 0xf1, 0xf0, 0xff, 0xf1, 0xf0, 0xff, 0xf1, 0xf0, 0xff, + 0xf1, 0xf0); + + VSET(12, e16, m2); + VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + 0xf0f0, 0xffff, 0x0001, 0xf0f0); + asm volatile("vor.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VCMP_U16(10, v2, 0xffff, 0x0ff1, 0xfff0, 0xffff, 0x0ff1, 0xfff0, 0xffff, + 0x0ff1, 0xfff0, 0xffff, 0x0ff1, 0xfff0); + + VSET(12, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, + 0x00000001, 0xf0f0f0f0); + asm volatile("vor.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U32(11, v4, 0xffffffff, 0x0ff00ff1, 0xfff0fff0, 0xffffffff, 0x0ff00ff1, + 0xfff0fff0, 0xffffffff, 0x0ff00ff1, 0xfff0fff0, 0xffffffff, + 0x0ff00ff1, 0xfff0fff0); + + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); + asm volatile("vor.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U64(12, v8, 0xffffffffffffffff, 0x0ff00ff00ff00ff1, 0xfff0fff0fff0fff0, + 0xffffffffffffffff, 0x0ff00ff00ff00ff1, 0xfff0fff0fff0fff0, + 0xffffffffffffffff, 0x0ff00ff00ff00ff1, 0xfff0fff0fff0fff0, + 0xffffffffffffffff, 0x0ff00ff00ff00ff1, 0xfff0fff0fff0fff0); +} + +void TEST_CASE4() { + const uint64_t scalar = 0x0ff00ff00ff00ff0; + + VSET(12, e8, m1); + VLOAD_8(v2, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + 0xf0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_8(v1, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + 0xef); + asm volatile("vor.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(13, v1, 0xff, 0xef, 0xf0, 0xff, 0xef, 0xf0, 0xff, 0xef, 0xf0, 0xff, + 0xef, 0xf0); + + VSET(12, e16, m2); + VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + 0xf0f0, 0xffff, 0x0001, 0xf0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + 0xbeef, 0xbeef, 0xbeef, 0xbeef); + asm volatile("vor.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(14, v2, 0xffff, 0xbeef, 0xfff0, 0xffff, 0xbeef, 0xfff0, 0xffff, + 0xbeef, 0xfff0, 0xffff, 0xbeef, 0xfff0); + + VSET(12, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, + 0x00000001, 0xf0f0f0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_32(v4, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef); + asm volatile("vor.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(15, v4, 0xffffffff, 0xdeadbeef, 0xfff0fff0, 0xffffffff, 0xdeadbeef, + 0xfff0fff0, 0xffffffff, 0xdeadbeef, 0xfff0fff0, 0xffffffff, + 0xdeadbeef, 0xfff0fff0); + + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef); + asm volatile("vor.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(16, v8, 0xffffffffffffffff, 0xdeadbeefdeadbeef, 0xfff0fff0fff0fff0, + 0xffffffffffffffff, 0xdeadbeefdeadbeef, 0xfff0fff0fff0fff0, + 0xffffffffffffffff, 0xdeadbeefdeadbeef, 0xfff0fff0fff0fff0, + 0xffffffffffffffff, 0xdeadbeefdeadbeef, 0xfff0fff0fff0fff0); +} + +void TEST_CASE5() { + VSET(12, e8, m1); + VLOAD_8(v2, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + 0xf0); + asm volatile("vor.vi v1, v2, 15"); + VCMP_U8(17, v1, 0xff, 0x0f, 0xff, 0xff, 0x0f, 0xff, 0xff, 0x0f, 0xff, 0xff, + 0x0f, 0xff); + + VSET(12, e16, m2); + VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + 0xf0f0, 0xffff, 0x0001, 0xf0f0); + asm volatile("vor.vi v2, v4, 15"); + VCMP_U16(18, v2, 0xffff, 0x000f, 0xf0ff, 0xffff, 0x000f, 0xf0ff, 0xffff, + 0x000f, 0xf0ff, 0xffff, 0x000f, 0xf0ff); + + VSET(12, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, + 0x00000001, 0xf0f0f0f0); + asm volatile("vor.vi v4, v8, 15"); + VCMP_U32(19, v4, 0xffffffff, 0x0000000f, 0xf0f0f0ff, 0xffffffff, 0x0000000f, + 0xf0f0f0ff, 0xffffffff, 0x0000000f, 0xf0f0f0ff, 0xffffffff, + 0x0000000f, 0xf0f0f0ff); + + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); + asm volatile("vor.vi v8, v16, 15"); + VCMP_U64(20, v8, 0xffffffffffffffff, 0x000000000000000f, 0xf0f0f0f0f0f0f0ff, + 0xffffffffffffffff, 0x000000000000000f, 0xf0f0f0f0f0f0f0ff, + 0xffffffffffffffff, 0x000000000000000f, 0xf0f0f0f0f0f0f0ff, + 0xffffffffffffffff, 0x000000000000000f, 0xf0f0f0f0f0f0f0ff); +} + +void TEST_CASE6() { + VSET(12, e8, m1); + VLOAD_8(v2, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + 0xf0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_8(v1, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + 0xef); + asm volatile("vor.vi v1, v2, 15, v0.t"); + VCMP_U8(21, v1, 0xff, 0xef, 0xff, 0xff, 0xef, 0xff, 0xff, 0xef, 0xff, 0xff, + 0xef, 0xff); + + VSET(12, e16, m2); + VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + 0xf0f0, 0xffff, 0x0001, 0xf0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + 0xbeef, 0xbeef, 0xbeef, 0xbeef); + asm volatile("vor.vi v2, v4, 15, v0.t"); + VCMP_U16(22, v2, 0xffff, 0xbeef, 0xf0ff, 0xffff, 0xbeef, 0xf0ff, 0xffff, + 0xbeef, 0xf0ff, 0xffff, 0xbeef, 0xf0ff); + + VSET(12, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, + 0x00000001, 0xf0f0f0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_32(v4, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef); + asm volatile("vor.vi v4, v8, 15, v0.t"); + VCMP_U32(23, v4, 0xffffffff, 0xdeadbeef, 0xf0f0f0ff, 0xffffffff, 0xdeadbeef, + 0xf0f0f0ff, 0xffffffff, 0xdeadbeef, 0xf0f0f0ff, 0xffffffff, + 0xdeadbeef, 0xf0f0f0ff); + + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef); + asm volatile("vor.vi v8, v16, 15, v0.t"); + VCMP_U64(24, v8, 0xffffffffffffffff, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0ff, + 0xffffffffffffffff, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0ff, + 0xffffffffffffffff, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0ff, + 0xffffffffffffffff, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0ff); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vpopc_m.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vpopc_m.c new file mode 100644 index 000000000..14fa78e6b --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vpopc_m.c @@ -0,0 +1,30 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(4, e32, m1); + VLOAD_U32(v2, 7, 0, 0, 0); + VLOAD_U32(v0, 5, 0, 0, 0); + volatile uint32_t scalar = 1337; + volatile uint32_t OUP[] = {0, 0, 0, 0}; + __asm__ volatile( + "vpopc.m %[A], v2, v0.t \n" + "sw %[A], (%1) \n" + : + : [A] "r"(scalar), "r"(OUP)); + XCMP(1, OUP[0], 2); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + TEST_CASE1(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredand.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredand.c new file mode 100644 index 000000000..2f01159dc --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredand.c @@ -0,0 +1,93 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "vector_macros.h" + +// Naive test +void TEST_CASE1(void) { + VSET(12, e8, m1); + VLOAD_8(v1, 0xff, 0xf1, 0xf0, 0xff, 0xf1, 0xf0, 0xff, 0xf1, 0xf0, 0xff, 0xf1, + 0xf0); + VLOAD_8(v2, 0xf0); + asm volatile("vredand.vs v3, v1, v2"); + VCMP_U8(1, v3, 0xf0); + + VSET(12, e16, m2); + VLOAD_16(v2, 0xffff, 0x0301, 0xf1f0, 0xffff, 0x0101, 0xf7f0, 0xffff, 0x0701, + 0xfff0, 0xffff, 0x0101, 0xf1f0); + VLOAD_16(v4, 0xefff); + asm volatile("vredand.vs v6, v2, v4"); + VCMP_U16(2, v6, 0x0100); + + VSET(12, e32, m4); + VLOAD_32(v4, 0xffffffff, 0x100ff001, 0xf0f0f0f0, 0xffffffff, 0x100ff001, + 0xf0f0f0f0, 0xffffffff, 0x100ff001, 0xf0f0f0f0, 0xffffffff, + 0x100ff001, 0xf0f0f0f0); + VLOAD_32(v8, 0x00f010f0); + asm volatile("vredand.vs v12, v4, v8"); + VCMP_U32(3, v12, 0x00001000); + + VSET(12, e64, m8); + VLOAD_64(v8, 0xffffffffffffffff, 0x1000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x1000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x1000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x1000000000000001, 0xf0f0f0f0f0f0f0f0); + VLOAD_64(v16, 0xfffffffffffffff7); + asm volatile("vredand.vs v24, v8, v16"); + VCMP_U64(4, v24, 0x1000000000000000); +} + +// Masked naive test +void TEST_CASE2(void) { + VSET(12, e8, m1); + VLOAD_8(v0, 0xf7, 0xff); + VLOAD_8(v1, 0xff, 0xf1, 0xff, 0x00, 0xf1, 0xf0, 0xff, 0xf1, 0xf0, 0xff, 0xf1, + 0xf0); + VLOAD_8(v2, 0xf0); + VLOAD_8(v3, 1); + asm volatile("vredand.vs v3, v1, v2, v0.t"); + VCMP_U8(5, v3, 0xf0); + + VSET(12, e16, m2); + VLOAD_8(v0, 0x00, 0x08); + VLOAD_16(v2, 0xffff, 0x0301, 0xf1f0, 0xffff, 0x0101, 0xf7f0, 0xffff, 0x9701, + 0xfff0, 0xffff, 0x0101, 0xf1f0); + VLOAD_16(v4, 0xefff); + VLOAD_16(v6, 1); + asm volatile("vredand.vs v6, v2, v4, v0.t"); + VCMP_U16(6, v6, 0xe1f0); + + VSET(12, e32, m4); + VLOAD_8(v0, 0xfe, 0xff); + VLOAD_32(v4, 0x00000000, 0x100ff001, 0xf0f0f0f0, 0xffffffff, 0x100ff001, + 0xf0f0f0f0, 0xffffffff, 0x100ff001, 0xf0f0f0f0, 0xffffffff, + 0x100ff001, 0xf0f0f0f0); + VLOAD_32(v8, 0x00f010f0); + VLOAD_32(v12, 1); + asm volatile("vredand.vs v12, v4, v8, v0.t"); + VCMP_U32(7, v12, 0x00001000); + + VSET(12, e64, m8); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_64(v8, 0xffffffffffffffff, 0x1000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x1000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x1000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x1000000000000001, 0xf0f0f0f0f0f0f0f0); + VLOAD_64(v16, 0xfffffffffffffff7); + VLOAD_64(v24, 1); + asm volatile("vredand.vs v24, v8, v16, v0.t"); + VCMP_U64(8, v24, 0x1000000000000000); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredmax.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredmax.c new file mode 100644 index 000000000..b00d0885c --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredmax.c @@ -0,0 +1,79 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "vector_macros.h" + +// Naive test +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, -7, 8, 1, 9, 3, 4, 5, -6, 7, 8); + VLOAD_8(v2, -1); + asm volatile("vredmax.vs v3, v1, v2"); + VCMP_U8(1, v3, 9); + + VSET(16, e16, m2); + VLOAD_16(v2, -1, 2, -3, 4, 5, 6, 7, 8, 1, 2, 3, -4, 5, 6, 7, 8); + VLOAD_16(v4, 9); + asm volatile("vredmax.vs v6, v2, v4"); + VCMP_U16(2, v6, 9); + + VSET(16, e32, m4); + VLOAD_32(v4, 9, 2, 3, -4, 5, 6, 7, 8, 1, 2, 3, 4, -5, 6, 7, 8); + VLOAD_32(v8, 1); + asm volatile("vredmax.vs v12, v4, v8"); + VCMP_U32(3, v12, 9); + + VSET(16, e64, m8); + VLOAD_64(v8, -1, 2, 3, -4, 5, 6, 7, 9, 1, 2, 3, 4, 5, 6, 7, -8); + VLOAD_64(v16, -1); + asm volatile("vredmax.vs v24, v8, v16"); + VCMP_U64(4, v24, 9); +} + +// Masked naive test +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v0, 0x03, 0x00); + VLOAD_8(v1, -1, 2, 3, -4, 5, 6, 7, 9, 1, -2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v2, 1); + VLOAD_8(v3, 1); + asm volatile("vredmax.vs v3, v1, v2, v0.t"); + VCMP_U8(5, v3, 2); + + VSET(16, e16, m2); + VLOAD_8(v0, 0x00, 0xc0); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, -7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 9); + VLOAD_16(v6, 1); + asm volatile("vredmax.vs v6, v2, v4, v0.t"); + VCMP_U16(6, v6, 9); + + VSET(16, e32, m4); + VLOAD_8(v0, 0x00, 0xc0); + VLOAD_32(v4, -1, 2, 3, 4, 5, 6, 7, -8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 1); + VLOAD_32(v12, 1); + asm volatile("vredmax.vs v12, v4, v8, v0.t"); + VCMP_U32(7, v12, 8); + + VSET(16, e64, m8); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_64(v8, 1, -2, 3, 4, 5, 6, -7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 4); + VLOAD_64(v24, 1); + asm volatile("vredmax.vs v24, v8, v16, v0.t"); + VCMP_U64(8, v24, 8); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredmaxu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredmaxu.c new file mode 100644 index 000000000..9e7deed64 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredmaxu.c @@ -0,0 +1,106 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "vector_macros.h" + +// Naive test +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 9, 3, 4, 5, 6, 7, 8); + VLOAD_8(v2, 1); + asm volatile("vredmaxu.vs v3, v1, v2"); + VCMP_U8(1, v3, 9); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 9); + asm volatile("vredmaxu.vs v6, v2, v4"); + VCMP_U16(2, v6, 9); + + VSET(16, e32, m4); + VLOAD_32(v4, 9, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 1); + asm volatile("vredmaxu.vs v12, v4, v8"); + VCMP_U32(3, v12, 9); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 9, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 1); + asm volatile("vredmaxu.vs v24, v8, v16"); + VCMP_U64(4, v24, 9); +} +// Masked naive test +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v0, 0x03, 0x00); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 9, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v2, 1); + VLOAD_8(v3, 1); + asm volatile("vredmaxu.vs v3, v1, v2, v0.t"); + VCMP_U8(5, v3, 2); + + VSET(16, e16, m2); + VLOAD_8(v0, 0x00, 0xc0); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 9); + VLOAD_16(v6, 1); + asm volatile("vredmaxu.vs v6, v2, v4, v0.t"); + VCMP_U16(6, v6, 9); + + VSET(16, e32, m4); + VLOAD_8(v0, 0x00, 0xc0); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 1); + VLOAD_32(v12, 1); + asm volatile("vredmaxu.vs v12, v4, v8, v0.t"); + VCMP_U32(7, v12, 8); + + VSET(16, e64, m8); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 4); + VLOAD_64(v24, 1); + asm volatile("vredmaxu.vs v24, v8, v16, v0.t"); + VCMP_U64(8, v24, 8); +} + +// Naive test with negative values +void TEST_CASE3(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 9, -3, 4, 5, 6, 7, 8); + VLOAD_8(v2, 1); + asm volatile("vredmaxu.vs v3, v1, v2"); + VCMP_U8(9, v3, -3); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, -9); + asm volatile("vredmaxu.vs v6, v2, v4"); + VCMP_U16(10, v6, -9); + + VSET(16, e32, m4); + VLOAD_32(v4, 9, 2, 3, 4, -5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 1); + asm volatile("vredmaxu.vs v12, v4, v8"); + VCMP_U32(11, v12, -5); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, -4, 5, 6, 7, 9, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, -1); + asm volatile("vredmaxu.vs v24, v8, v16"); + VCMP_U64(12, v24, -1); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredmin.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredmin.c new file mode 100644 index 000000000..c3415b626 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredmin.c @@ -0,0 +1,78 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "vector_macros.h" + +// Naive test +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 0, 1, 9, 3, 4, 5, 6, 7, 8); + VLOAD_8(v2, 1); + asm volatile("vredmin.vs v3, v1, v2"); + VCMP_U8(1, v3, 0); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, -3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 0); + asm volatile("vredmin.vs v6, v2, v4"); + VCMP_U16(2, v6, -3); + + VSET(16, e32, m4); + VLOAD_32(v4, 9, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, -1); + asm volatile("vredmin.vs v12, v4, v8"); + VCMP_U32(3, v12, -1); + + VSET(16, e64, m8); + VLOAD_64(v8, -1, 2, 3, 4, 5, -6, 7, -9, -1, -2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, -1); + asm volatile("vredmin.vs v24, v8, v16"); + VCMP_U64(4, v24, -9); +} +// Masked naive test +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v0, 0x03, 0x00); + VLOAD_8(v1, 1, -2, 3, 4, 5, 6, 7, 9, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v2, 1); + VLOAD_8(v3, 1); + asm volatile("vredmin.vs v3, v1, v2, v0.t"); + VCMP_U8(5, v3, -2); + + VSET(16, e16, m2); + VLOAD_8(v0, 0x00, 0xc0); + VLOAD_16(v2, -1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 3); + VLOAD_16(v6, 1); + asm volatile("vredmin.vs v6, v2, v4, v0.t"); + VCMP_U16(6, v6, 3); + + VSET(16, e32, m4); + VLOAD_8(v0, 0x00, 0xc0); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 8); + VLOAD_32(v12, 1); + asm volatile("vredmin.vs v12, v4, v8, v0.t"); + VCMP_U32(7, v12, 7); + + VSET(16, e64, m8); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 4); + VLOAD_64(v24, 1); + asm volatile("vredmin.vs v24, v8, v16, v0.t"); + VCMP_U64(8, v24, 1); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredminu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredminu.c new file mode 100644 index 000000000..5e3dc2254 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredminu.c @@ -0,0 +1,78 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "vector_macros.h" + +// Naive test +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 0, 1, 9, 3, 4, 5, 6, 7, 8); + VLOAD_8(v2, 1); + asm volatile("vredminu.vs v3, v1, v2"); + VCMP_U8(1, v3, 0); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, -3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 0); + asm volatile("vredminu.vs v6, v2, v4"); + VCMP_U16(2, v6, 0); + + VSET(16, e32, m4); + VLOAD_32(v4, 9, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, -1); + asm volatile("vredminu.vs v12, v4, v8"); + VCMP_U32(3, v12, 1); + + VSET(16, e64, m8); + VLOAD_64(v8, -1, 2, 3, 4, 5, -6, 7, -9, -1, -2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, -1); + asm volatile("vredminu.vs v24, v8, v16"); + VCMP_U64(4, v24, 2); +} +// Masked naive test +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v0, 0x03, 0x00); + VLOAD_8(v1, 1, -2, 3, 4, 5, 6, 7, 9, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v2, 1); + VLOAD_8(v3, 1); + asm volatile("vredminu.vs v3, v1, v2, v0.t"); + VCMP_U8(5, v3, 1); + + VSET(16, e16, m2); + VLOAD_8(v0, 0x00, 0xc0); + VLOAD_16(v2, -1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 3); + VLOAD_16(v6, 1); + asm volatile("vredminu.vs v6, v2, v4, v0.t"); + VCMP_U16(6, v6, 3); + + VSET(16, e32, m4); + VLOAD_8(v0, 0x00, 0xc0); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 8); + VLOAD_32(v12, 1); + asm volatile("vredminu.vs v12, v4, v8, v0.t"); + VCMP_U32(7, v12, 7); + + VSET(16, e64, m8); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 4); + VLOAD_64(v24, 1); + asm volatile("vredminu.vs v24, v8, v16, v0.t"); + VCMP_U64(8, v24, 1); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredor.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredor.c new file mode 100644 index 000000000..69ea3cb39 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredor.c @@ -0,0 +1,93 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "vector_macros.h" + +// Naive test +void TEST_CASE1(void) { + VSET(12, e8, m1); + VLOAD_8(v1, 0x00, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x01, + 0x00); + VLOAD_8(v2, 0x10); + asm volatile("vredor.vs v3, v1, v2"); + VCMP_U8(1, v3, 0x11); + + VSET(12, e16, m2); + VLOAD_16(v2, 0x0000, 0x0301, 0x0100, 0x0000, 0x0101, 0x0700, 0x0000, 0x0701, + 0x0000, 0x0000, 0x0101, 0x0100); + VLOAD_16(v4, 0xe000); + asm volatile("vredor.vs v6, v2, v4"); + VCMP_U16(2, v6, 0xe701); + + VSET(12, e32, m4); + VLOAD_32(v4, 0x00000000, 0x10000001, 0x00000000, 0x00000000, 0x10000001, + 0x00000000, 0x00000000, 0x10000001, 0x00000000, 0x00000000, + 0x10000001, 0x00000000); + VLOAD_32(v8, 0x00001000); + asm volatile("vredor.vs v12, v4, v8"); + VCMP_U32(3, v12, 0x10001001); + + VSET(12, e64, m8); + VLOAD_64(v8, 0x0000000000000000, 0x1000000000000001, 0x0000000000000000, + 0x0000000000000000, 0x1000000000000001, 0x0000000000000000, + 0x0000000000000000, 0x1000000000000001, 0x0000000000000000, + 0x0000000000000000, 0x1000000000000001, 0x0000000000000000); + VLOAD_64(v16, 0x0000000000000007); + asm volatile("vredor.vs v24, v8, v16"); + VCMP_U64(4, v24, 0x1000000000000007); +} + +// Masked naive test +void TEST_CASE2(void) { + VSET(12, e8, m1); + VLOAD_8(v0, 0x07, 0x00); + VLOAD_8(v1, 0x00, 0x01, 0x00, 0xff, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x01, + 0x00); + VLOAD_8(v2, 0x00); + VLOAD_8(v3, 1); + asm volatile("vredor.vs v3, v1, v2, v0.t"); + VCMP_U8(5, v3, 0x01); + + VSET(12, e16, m2); + VLOAD_8(v0, 0x00, 0x08); + VLOAD_16(v2, 0x0f00, 0x0301, 0x0100, 0x0000, 0x0101, 0x0700, 0x0000, 0x9701, + 0x0000, 0x0000, 0x0101, 0x0100); + VLOAD_16(v4, 0xe000); + VLOAD_16(v6, 1); + asm volatile("vredor.vs v6, v2, v4, v0.t"); + VCMP_U16(6, v6, 0xe100); + + VSET(12, e32, m4); + VLOAD_8(v0, 0x0e, 0x00); + VLOAD_32(v4, 0xf0000fff, 0x10000001, 0x00000000, 0x00000000, 0x10000001, + 0x00000000, 0x00000000, 0x10000001, 0x00000000, 0x00000000, + 0x10000001, 0x00000000); + VLOAD_32(v8, 0x00001000); + VLOAD_32(v12, 1); + asm volatile("vredor.vs v12, v4, v8, v0.t"); + VCMP_U32(7, v12, 0x10001001); + + VSET(12, e64, m8); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_64(v8, 0x0000000000000000, 0x1000000000000001, 0x0000f00000000000, + 0x0000000000000000, 0x1000000000000001, 0x0000000000000000, + 0x0000000000000000, 0x1000000000000001, 0x0000000000000000, + 0x0000000000000000, 0x1000000000000001, 0x0000000000000000); + VLOAD_64(v16, 0x0000000000000007); + VLOAD_64(v24, 1); + asm volatile("vredor.vs v24, v8, v16, v0.t"); + VCMP_U64(8, v24, 0x1000000000000007); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredsum.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredsum.c new file mode 100644 index 000000000..26284c76a --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredsum.c @@ -0,0 +1,178 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "vector_macros.h" + +// Naive test +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v2, 1); + asm volatile("vredsum.vs v3, v1, v2"); + VCMP_U8(1, v3, 73); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 1); + asm volatile("vredsum.vs v6, v2, v4"); + VCMP_U16(2, v6, 73); + + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 1); + asm volatile("vredsum.vs v12, v4, v8"); + VCMP_U32(3, v12, 73); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 1); + asm volatile("vredsum.vs v24, v8, v16"); + VCMP_U64(4, v24, 73); +} + +// Masked naive test +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v2, 1); + VLOAD_8(v3, 1); + asm volatile("vredsum.vs v3, v1, v2, v0.t"); + VCMP_U8(5, v3, 37); + + VSET(16, e16, m2); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 1); + VLOAD_16(v6, 1); + asm volatile("vredsum.vs v6, v2, v4, v0.t"); + VCMP_U16(6, v6, 37); + + VSET(16, e32, m4); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 1); + VLOAD_32(v12, 1); + asm volatile("vredsum.vs v12, v4, v8, v0.t"); + VCMP_U32(7, v12, 37); + + VSET(16, e64, m8); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 1); + VLOAD_64(v24, 1); + asm volatile("vredsum.vs v24, v8, v16, v0.t"); + VCMP_U64(8, v24, 37); +} + +// Are we respecting the undisturbed tail policy? +void TEST_CASE3(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v3, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vredsum.vs v3, v1, v2"); + VCMP_U8(9, v3, 73, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v6, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vredsum.vs v6, v2, v4"); + VCMP_U16(10, v6, 73, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vredsum.vs v12, v4, v8"); + VCMP_U32(11, v12, 73, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v24, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vredsum.vs v24, v16, v8"); + VCMP_U64(12, v24, 73, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); +} + +// Odd number of elements, undisturbed policy +void TEST_CASE4(void) { + VSET(15, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v3, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vredsum.vs v3, v1, v2"); + VCMP_U8(13, v3, 65, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(1, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v6, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vredsum.vs v6, v2, v4"); + VCMP_U16(14, v6, 2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(3, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vredsum.vs v12, v4, v8"); + VCMP_U32(15, v12, 7, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(7, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v24, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vredsum.vs v24, v8, v16"); + VCMP_U64(16, v24, 29, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(15, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v24, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vredsum.vs v24, v8, v16"); + VCMP_U64(17, v24, 65, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); +} + +// Odd number of elements, undisturbed policy, and mask +void TEST_CASE5(void) { + VSET(15, e8, m1); + VLOAD_8(v0, 0x00, 0x40); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v2, 100, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v3, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vredsum.vs v3, v1, v2, v0.t"); + VCMP_U8(18, v3, 107, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(1, e16, m1); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_16(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v3, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vredsum.vs v3, v1, v2, v0.t"); + VCMP_U16(19, v3, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(3, e32, m1); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_32(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v3, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vredsum.vs v3, v1, v2, v0.t"); + VCMP_U32(20, v3, 3, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredxor.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredxor.c new file mode 100644 index 000000000..74b128fc1 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vredxor.c @@ -0,0 +1,44 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "vector_macros.h" + +// Naive test +void TEST_CASE1(void) { + VSET(4, e8, m1); + VLOAD_8(v1, 0x00, 0x01, 0x01, 0x00); + VLOAD_8(v2, 0x11); + asm volatile("vredxor.vs v3, v1, v2"); + VCMP_U8(1, v3, 0x11); + + VSET(4, e16, m1); + VLOAD_16(v1, 0x8000, 0x0301, 0x0101, 0x0001); + VLOAD_16(v2, 0xe001); + asm volatile("vredxor.vs v3, v1, v2"); + VCMP_U16(2, v3, 0x6200); + + VSET(4, e32, m1); + VLOAD_32(v1, 0x00000001, 0x10000001, 0x00000000, 0x00000000); + VLOAD_32(v2, 0x00001000); + asm volatile("vredxor.vs v3, v1, v2"); + VCMP_U32(3, v3, 0x10001000); + + VSET(4, e64, m2); + VLOAD_64(v2, 0x0000000000000000, 0x1000000000000001, 0x0000000000000000, + 0x0000000000000000); + VLOAD_64(v4, 0x0000000000000007); + asm volatile("vredxor.vs v6, v2, v4"); + VCMP_U64(4, v6, 0x1000000000000006); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vrem.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vrem.c new file mode 100644 index 000000000..d7bce332d --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vrem.c @@ -0,0 +1,232 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x9b, 0x28, 0xec, 0x86, 0x26, 0x85, 0xf7, 0x33, 0x46, 0x37, 0x2c, + 0x0c, 0x8e, 0xae, 0xa1, 0x93); + VLOAD_8(v3, 0x84, 0x5e, 0x3b, 0xdf, 0x10, 0xfc, 0x05, 0xcf, 0x42, 0xbe, 0x23, + 0xdb, 0x37, 0x78, 0xe2, 0x85); + asm volatile("vrem.vv v1, v2, v3"); + VCMP_I8(1, v1, 0x9b, 0x28, 0xec, 0xe9, 0x06, 0xfd, 0xfc, 0x02, 0x04, 0x37, + 0x09, 0x0c, 0xfc, 0xae, 0xfb, 0x93); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xb58f, 0xa184, 0xdcf9, 0xd084, 0xbbc6, 0xcf0e, 0xbbd4, 0xa20c, + 0xe04c, 0xd954, 0xda74, 0xa394, 0x207a, 0x8975, 0xddd3, 0x897d); + VLOAD_16(v6, 0x4534, 0xafd7, 0xf703, 0x92c2, 0x97e3, 0xd85a, 0x1540, 0x8c5c, + 0x4a71, 0x43a7, 0xe65d, 0x2bdc, 0x497b, 0x6aa0, 0x6071, 0xf431); + asm volatile("vrem.vv v2, v4, v6"); + VCMP_I16(2, v2, 0xfac3, 0xf1ad, 0xf7f0, 0xd084, 0xbbc6, 0xf6b4, 0xfb94, + 0xa20c, 0xe04c, 0xd954, 0xf417, 0xfb4c, 0x207a, 0xf415, 0xddd3, + 0xff93); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x620db972, 0x60b1f870, 0x7d1badcf, 0x90a85eb6, 0xca41954b, + 0x10dc3772, 0xf7749e82, 0x027ed4d3, 0xdcb6a562, 0xa979baf0, + 0xb480c184, 0x979555c6, 0x3f894108, 0x803bd362, 0x9038beec, + 0x22d7ca24); + VLOAD_32(v12, 0xb9b52c0c, 0x30b52d8c, 0x832f89ea, 0x95181d9c, 0x85a6a24f, + 0x2f2c64a7, 0xebe4120c, 0x83852646, 0xfb1857b5, 0x25400571, + 0xab2d7393, 0xddb87ac8, 0x01149cdf, 0x62b2c8dc, 0xaed39563, + 0x41ec046e); + asm volatile("vrem.vv v4, v8, v12"); + VCMP_I32(3, v4, 0x1bc2e57e, 0x2ffccae4, 0x004b37b9, 0xfb90411a, 0xca41954b, + 0x10dc3772, 0xf7749e82, 0x027ed4d3, 0xff0c3f6f, 0xf3f9c5d2, + 0xb480c184, 0xfe6be56e, 0x00ddb682, 0xe2ee9c3e, 0xe1652989, + 0x22d7ca24); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x9fc0a4e82116b913, 0xbd1a679edd2667e1, 0x559913931b91caf2, + 0xecfe6fb53a8b043d, 0xd8a48a111d080e66, 0x7baccec6b5a29e3e, + 0x8746dc00d1d7ff0b, 0x467babd497d4931b, 0x6f7f3e669faa900c, + 0x36e81d34c3ee3445, 0x99bcc4a480c648c5, 0xc8ae527a2cc4d908, + 0xce3b4c1da847fe6a, 0x3709710bc016c1fc, 0x81471426bbe09e45, + 0x0f0389282729456f); + VLOAD_64(v24, 0xf2473f877dd9c3df, 0xd2471da7c8ff8466, 0x7e93451b38765d03, + 0xf7e905f27777369f, 0x73cbef014fd0f311, 0x4c3e4fc36800b443, + 0x4c283e06a5067444, 0xdc8295e57f30e905, 0x08207a363067024e, + 0x42aba773f21efc47, 0x5f00e9093d50b50f, 0x5ff0dcd41bf799fa, + 0xe8c1d1110518742a, 0x34fe1a3555bf07f0, 0xd1bce4800f79700f, + 0xff00f7d87b2c7068); + asm volatile("vrem.vv v8, v16, v24"); + VCMP_I64(4, v8, 0xffcde833b0225dfa, 0xead349f71426e37b, 0x559913931b91caf2, + 0xfd2c63d04b9c96ff, 0xd8a48a111d080e66, 0x2f6e7f034da1e9fb, + 0xd36f1a0776de734f, 0x22fe41ba17057c20, 0x05d909a62a6f7216, + 0x36e81d34c3ee3445, 0xf8bdadadbe16fdd4, 0xc8ae527a2cc4d908, + 0xfcb7a9fb9e171616, 0x020b56d66a57ba0c, 0xddcd4b269cedbe27, + 0x00120ed75ec3db87); +}; + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x9b, 0x28, 0xec, 0x86, 0x26, 0x85, 0xf7, 0x33, 0x46, 0x37, 0x2c, + 0x0c, 0x8e, 0xae, 0xa1, 0x93); + VLOAD_8(v3, 0x84, 0x5e, 0x3b, 0xdf, 0x10, 0xfc, 0x05, 0xcf, 0x42, 0xbe, 0x23, + 0xdb, 0x37, 0x78, 0xe2, 0x85); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v1); + asm volatile("vrem.vv v1, v2, v3, v0.t"); + VCMP_I8(5, v1, 0, 0x28, 0, 0xe9, 0, 0xfd, 0, 0x02, 0, 0x37, 0, 0x0c, 0, 0xae, + 0, 0x93); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xb58f, 0xa184, 0xdcf9, 0xd084, 0xbbc6, 0xcf0e, 0xbbd4, 0xa20c, + 0xe04c, 0xd954, 0xda74, 0xa394, 0x207a, 0x8975, 0xddd3, 0x897d); + VLOAD_16(v6, 0x4534, 0xafd7, 0xf703, 0x92c2, 0x97e3, 0xd85a, 0x1540, 0x8c5c, + 0x4a71, 0x43a7, 0xe65d, 0x2bdc, 0x497b, 0x6aa0, 0x6071, 0xf431); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vrem.vv v2, v4, v6, v0.t"); + VCMP_I16(6, v2, 0, 0xf1ad, 0, 0xd084, 0, 0xf6b4, 0, 0xa20c, 0, 0xd954, 0, + 0xfb4c, 0, 0xf415, 0, 0xff93); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x620db972, 0x60b1f870, 0x7d1badcf, 0x90a85eb6, 0xca41954b, + 0x10dc3772, 0xf7749e82, 0x027ed4d3, 0xdcb6a562, 0xa979baf0, + 0xb480c184, 0x979555c6, 0x3f894108, 0x803bd362, 0x9038beec, + 0x22d7ca24); + VLOAD_32(v12, 0xb9b52c0c, 0x30b52d8c, 0x832f89ea, 0x95181d9c, 0x85a6a24f, + 0x2f2c64a7, 0xebe4120c, 0x83852646, 0xfb1857b5, 0x25400571, + 0xab2d7393, 0xddb87ac8, 0x01149cdf, 0x62b2c8dc, 0xaed39563, + 0x41ec046e); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vrem.vv v4, v8, v12, v0.t"); + VCMP_I32(7, v4, 0, 0x2ffccae4, 0, 0xfb90411a, 0, 0x10dc3772, 0, 0x027ed4d3, 0, + 0xf3f9c5d2, 0, 0xfe6be56e, 0, 0xe2ee9c3e, 0, 0x22d7ca24); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x9fc0a4e82116b913, 0xbd1a679edd2667e1, 0x559913931b91caf2, + 0xecfe6fb53a8b043d, 0xd8a48a111d080e66, 0x7baccec6b5a29e3e, + 0x8746dc00d1d7ff0b, 0x467babd497d4931b, 0x6f7f3e669faa900c, + 0x36e81d34c3ee3445, 0x99bcc4a480c648c5, 0xc8ae527a2cc4d908, + 0xce3b4c1da847fe6a, 0x3709710bc016c1fc, 0x81471426bbe09e45, + 0x0f0389282729456f); + VLOAD_64(v24, 0xf2473f877dd9c3df, 0xd2471da7c8ff8466, 0x7e93451b38765d03, + 0xf7e905f27777369f, 0x73cbef014fd0f311, 0x4c3e4fc36800b443, + 0x4c283e06a5067444, 0xdc8295e57f30e905, 0x08207a363067024e, + 0x42aba773f21efc47, 0x5f00e9093d50b50f, 0x5ff0dcd41bf799fa, + 0xe8c1d1110518742a, 0x34fe1a3555bf07f0, 0xd1bce4800f79700f, + 0xff00f7d87b2c7068); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vrem.vv v8, v16, v24, v0.t"); + VCMP_I64(8, v8, 0, 0xead349f71426e37b, 0, 0xfd2c63d04b9c96ff, 0, + 0x2f6e7f034da1e9fb, 0, 0x22fe41ba17057c20, 0, 0x36e81d34c3ee3445, 0, + 0xc8ae527a2cc4d908, 0, 0x020b56d66a57ba0c, 0, 0x00120ed75ec3db87); +}; + +void TEST_CASE3(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x5b, 0x3b, 0xc4, 0x95, 0x41, 0x71, 0x9b, 0x67, 0x84, 0x2e, 0x0a, + 0x2a, 0xb2, 0x57, 0xe5, 0x6c); + int64_t scalar = 5; + asm volatile("vrem.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_I8(9, v1, 0x01, 0x04, 0x00, 0xfe, 0x00, 0x03, 0xff, 0x03, 0xfc, 0x01, + 0x00, 0x02, 0xfd, 0x02, 0xfe, 0x03); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xc670, 0x8f3b, 0x200f, 0x52ea, 0xfdce, 0xcf06, 0x57f1, 0x1936, + 0xb6ec, 0x69e8, 0x0abf, 0x441e, 0xa420, 0x396c, 0xe7c9, 0xa464); + scalar = -538; + asm volatile("vrem.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VCMP_I16(10, v2, 0xff2e, 0xfe9d, 0x0089, 0x00f4, 0xffe8, 0xff5c, 0x01c7, + 0x0218, 0xfe60, 0x00d4, 0x003d, 0x00de, 0xfe7e, 0x00ae, 0xfee7, + 0xfec2); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xf937dbf9, 0x6d855b59, 0x3bd09126, 0xaed11886, 0x6eb6f4bd, + 0x5c639253, 0xca0f2abf, 0x57fec97b, 0x39496099, 0x8bfcdd58, + 0x0f19f6e2, 0x2070c8d4, 0x8c689324, 0x2eecd9d7, 0xe2907e94, + 0xb6cc2d44); + scalar = 649; + asm volatile("vrem.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VCMP_I32(11, v4, 0xfffffee4, 0x00000116, 0x00000160, 0xffffffef, 0x00000217, + 0x00000275, 0xfffffea6, 0x000000a9, 0x000000e4, 0xfffffe09, + 0x00000272, 0x0000023c, 0xffffff79, 0x000000ce, 0xffffffb3, + 0xfffffe0e); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x94236504e03e6525, 0x8d219d7afe5b2fb0, 0xc65a0b252860ab73, + 0x2ca68636bacbc0bb, 0x275575f3e3fea940, 0x8f546251aaad354a, + 0xb1462969035e0fa7, 0x5c9cdc19273ce111, 0x25a8487741ee75db, + 0x38819f95e162663e, 0x698d19ce0e74ff8d, 0xb525257a9b5cd972, + 0xb308a4fe0dcbb2f3, 0xf2fa735abc2db4d0, 0xc73c476461ac3f28, + 0xb2830c2607bfffcc); + scalar = -59223; + asm volatile("vrem.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I64(12, v8, 0xffffffffffff299e, 0xffffffffffff1f8a, 0xffffffffffff57aa, + 0x000000000000cc8c, 0x000000000000416e, 0xffffffffffffcecd, + 0xffffffffffff7e24, 0x000000000000397b, 0x000000000000bb50, + 0x0000000000006b00, 0x0000000000004f3f, 0xffffffffffff9a21, + 0xffffffffffffae24, 0xffffffffffffca84, 0xffffffffffffa7fb, + 0xffffffffffff84dd); +}; + +void TEST_CASE4(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x5b, 0x3b, 0xc4, 0x95, 0x41, 0x71, 0x9b, 0x67, 0x84, 0x2e, 0x0a, + 0x2a, 0xb2, 0x57, 0xe5, 0x6c); + int64_t scalar = 5; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v1); + asm volatile("vrem.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I8(13, v1, 0, 0x04, 0, 0xfe, 0, 0x03, 0, 0x03, 0, 0x01, 0, 0x02, 0, 0x02, + 0, 0x03); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xc670, 0x8f3b, 0x200f, 0x52ea, 0xfdce, 0xcf06, 0x57f1, 0x1936, + 0xb6ec, 0x69e8, 0x0abf, 0x441e, 0xa420, 0x396c, 0xe7c9, 0xa464); + scalar = -538; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vrem.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I16(14, v2, 0, 0xfe9d, 0, 0x00f4, 0, 0xff5c, 0, 0x0218, 0, 0x00d4, 0, + 0x00de, 0, 0x00ae, 0, 0xfec2); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xf937dbf9, 0x6d855b59, 0x3bd09126, 0xaed11886, 0x6eb6f4bd, + 0x5c639253, 0xca0f2abf, 0x57fec97b, 0x39496099, 0x8bfcdd58, + 0x0f19f6e2, 0x2070c8d4, 0x8c689324, 0x2eecd9d7, 0xe2907e94, + 0xb6cc2d44); + scalar = 649; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vrem.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I32(15, v4, 0, 0x00000116, 0, 0xffffffef, 0, 0x00000275, 0, 0x000000a9, + 0, 0xfffffe09, 0, 0x0000023c, 0, 0x000000ce, 0, 0xfffffe0e); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x94236504e03e6525, 0x8d219d7afe5b2fb0, 0xc65a0b252860ab73, + 0x2ca68636bacbc0bb, 0x275575f3e3fea940, 0x8f546251aaad354a, + 0xb1462969035e0fa7, 0x5c9cdc19273ce111, 0x25a8487741ee75db, + 0x38819f95e162663e, 0x698d19ce0e74ff8d, 0xb525257a9b5cd972, + 0xb308a4fe0dcbb2f3, 0xf2fa735abc2db4d0, 0xc73c476461ac3f28, + 0xb2830c2607bfffcc); + scalar = -59223; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vrem.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I64(16, v8, 0, 0xffffffffffff1f8a, 0, 0x000000000000cc8c, 0, + 0xffffffffffffcecd, 0, 0x000000000000397b, 0, 0x0000000000006b00, 0, + 0xffffffffffff9a21, 0, 0xffffffffffffca84, 0, 0xffffffffffff84dd); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vremu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vremu.c new file mode 100644 index 000000000..a34fe33f8 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vremu.c @@ -0,0 +1,232 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x11, 0xd2, 0x6a, 0xcc, 0x14, 0xe4, 0x2c, 0x7f, 0xd2, 0x6b, 0x34, + 0x5c, 0x75, 0xdd, 0x0c, 0x42); + VLOAD_8(v3, 0x77, 0xb2, 0xd1, 0x95, 0x6f, 0xbe, 0x0d, 0x5a, 0x93, 0x02, 0xaf, + 0xfd, 0x94, 0xe0, 0xb7, 0xe6); + asm volatile("vremu.vv v1, v2, v3"); + VCMP_I8(1, v1, 0x11, 0x20, 0x6a, 0x37, 0x14, 0x26, 0x05, 0x25, 0x3f, 0x01, + 0x34, 0x5c, 0x75, 0xdd, 0x0c, 0x42); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xf77a, 0x54d7, 0xe527, 0xe28f, 0x53ed, 0x9301, 0xde4f, 0xcb17, + 0xae43, 0x9e4a, 0xa0c2, 0xdf31, 0xb66f, 0x286d, 0x1d15, 0x0480); + VLOAD_16(v6, 0x5bfa, 0x0571, 0x8a43, 0x6350, 0xb962, 0x71fc, 0x0b54, 0x1e8b, + 0x6c25, 0x9c0d, 0x5950, 0x1887, 0xbc18, 0x628e, 0x6561, 0x407f); + asm volatile("vremu.vv v2, v4, v6"); + VCMP_I16(2, v2, 0x3f86, 0x0338, 0x5ae4, 0x1bef, 0x53ed, 0x2105, 0x0713, + 0x13d5, 0x421e, 0x023d, 0x4772, 0x0272, 0xb66f, 0x286d, 0x1d15, + 0x0480); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x647d8841, 0xf9e0aabf, 0xea4aa122, 0xd6178d3e, 0x64a7afe5, + 0xe0350cba, 0xc72768ec, 0x9f977a31, 0x5e1c2ac4, 0xcd44b950, + 0x39dc32f4, 0x1dc82ea3, 0xd1cf125f, 0xc677269c, 0x6405ec5b, + 0x653a05ee); + VLOAD_32(v12, 0x89828d99, 0x5c7c7db0, 0x2911efb6, 0x1f6982ff, 0x564e4bd4, + 0xc4576bff, 0x8e998104, 0x4a23ba44, 0x994b4630, 0x017ee935, + 0xa38c7dae, 0x893dfb15, 0x4969125f, 0x9a951d27, 0x09b6017f, + 0x5a0a7906); + asm volatile("vremu.vv v4, v8, v12"); + VCMP_I32(3, v4, 0x647d8841, 0x40e7af5f, 0x1cf0f294, 0x199e7b44, 0x0e596411, + 0x1bdda0bb, 0x388de7e8, 0x0b5005a9, 0x5e1c2ac4, 0x0059ebf3, + 0x39dc32f4, 0x1dc82ea3, 0x3efceda1, 0x2be20975, 0x02e9dd65, + 0x0b2f8ce8); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x09ab27501ccac4a6, 0x97eb5bf189b39a0e, 0x26f588069b0858c4, + 0x9a251c274a394df3, 0x54b3587602f8d9d2, 0xc3cc623deda95ca7, + 0x118c4335397980bf, 0xc3e2d283cb39133d, 0x71837e24114813fc, + 0x85a1f65867438a09, 0x80f01e0588afc9a0, 0x60e89a1e5a43d9f5, + 0x93a87cf6308ad888, 0xca3976f49ac6a681, 0xcfc7c8f225b47766, + 0xeaa4ce2cf507b527); + VLOAD_64(v24, 0x9fed81c550326301, 0x445bb7ac18d0eaa1, 0x040f8ff58f5adf72, + 0xafc4ff6b8eb4d201, 0xfba36cabfc3fb4a0, 0x9c3ed271bf173d29, + 0xe8b7e325c9ff594b, 0x05169e56693600d7, 0x08e72c4bb62ad267, + 0xbd9677ee996d5fa5, 0x900295e8502a9817, 0x39e0bfa9927679a8, + 0xdd0ca7797d532524, 0x6f8f78c47ddee88a, 0x2f40f7661cca9eee, + 0x8e4a3b2358129e92); + asm volatile("vremu.vv v8, v16, v24"); + VCMP_I64(4, v8, 0x09ab27501ccac4a6, 0x0f33ec995811c4cc, 0x0269786490d67dc2, + 0x9a251c274a394df3, 0x54b3587602f8d9d2, 0x278d8fcc2e921f7e, + 0x118c4335397980bf, 0x028751b02d34f353, 0x06ad6a9787463728, + 0x85a1f65867438a09, 0x80f01e0588afc9a0, 0x2707da74c7cd604d, + 0x93a87cf6308ad888, 0x5aa9fe301ce7bdf7, 0x12c3eb59b289fbae, + 0x5c5a93099cf51695); +}; + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x11, 0xd2, 0x6a, 0xcc, 0x14, 0xe4, 0x2c, 0x7f, 0xd2, 0x6b, 0x34, + 0x5c, 0x75, 0xdd, 0x0c, 0x42); + VLOAD_8(v3, 0x77, 0xb2, 0xd1, 0x95, 0x6f, 0xbe, 0x0d, 0x5a, 0x93, 0x02, 0xaf, + 0xfd, 0x94, 0xe0, 0xb7, 0xe6); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v1); + asm volatile("vremu.vv v1, v2, v3, v0.t"); + VCMP_I8(5, v1, 0, 0x20, 0, 0x37, 0, 0x26, 0, 0x25, 0, 0x01, 0, 0x5c, 0, 0xdd, + 0, 0x42); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xf77a, 0x54d7, 0xe527, 0xe28f, 0x53ed, 0x9301, 0xde4f, 0xcb17, + 0xae43, 0x9e4a, 0xa0c2, 0xdf31, 0xb66f, 0x286d, 0x1d15, 0x0480); + VLOAD_16(v6, 0x5bfa, 0x0571, 0x8a43, 0x6350, 0xb962, 0x71fc, 0x0b54, 0x1e8b, + 0x6c25, 0x9c0d, 0x5950, 0x1887, 0xbc18, 0x628e, 0x6561, 0x407f); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vremu.vv v2, v4, v6, v0.t"); + VCMP_I16(6, v2, 0, 0x0338, 0, 0x1bef, 0, 0x2105, 0, 0x13d5, 0, 0x023d, 0, + 0x0272, 0, 0x286d, 0, 0x0480); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x647d8841, 0xf9e0aabf, 0xea4aa122, 0xd6178d3e, 0x64a7afe5, + 0xe0350cba, 0xc72768ec, 0x9f977a31, 0x5e1c2ac4, 0xcd44b950, + 0x39dc32f4, 0x1dc82ea3, 0xd1cf125f, 0xc677269c, 0x6405ec5b, + 0x653a05ee); + VLOAD_32(v12, 0x89828d99, 0x5c7c7db0, 0x2911efb6, 0x1f6982ff, 0x564e4bd4, + 0xc4576bff, 0x8e998104, 0x4a23ba44, 0x994b4630, 0x017ee935, + 0xa38c7dae, 0x893dfb15, 0x4969125f, 0x9a951d27, 0x09b6017f, + 0x5a0a7906); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vremu.vv v4, v8, v12, v0.t"); + VCMP_I32(7, v4, 0, 0x40e7af5f, 0, 0x199e7b44, 0, 0x1bdda0bb, 0, 0x0b5005a9, 0, + 0x0059ebf3, 0, 0x1dc82ea3, 0, 0x2be20975, 0, 0x0b2f8ce8); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x09ab27501ccac4a6, 0x97eb5bf189b39a0e, 0x26f588069b0858c4, + 0x9a251c274a394df3, 0x54b3587602f8d9d2, 0xc3cc623deda95ca7, + 0x118c4335397980bf, 0xc3e2d283cb39133d, 0x71837e24114813fc, + 0x85a1f65867438a09, 0x80f01e0588afc9a0, 0x60e89a1e5a43d9f5, + 0x93a87cf6308ad888, 0xca3976f49ac6a681, 0xcfc7c8f225b47766, + 0xeaa4ce2cf507b527); + VLOAD_64(v24, 0x9fed81c550326301, 0x445bb7ac18d0eaa1, 0x040f8ff58f5adf72, + 0xafc4ff6b8eb4d201, 0xfba36cabfc3fb4a0, 0x9c3ed271bf173d29, + 0xe8b7e325c9ff594b, 0x05169e56693600d7, 0x08e72c4bb62ad267, + 0xbd9677ee996d5fa5, 0x900295e8502a9817, 0x39e0bfa9927679a8, + 0xdd0ca7797d532524, 0x6f8f78c47ddee88a, 0x2f40f7661cca9eee, + 0x8e4a3b2358129e92); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vremu.vv v8, v16, v24, v0.t"); + VCMP_I64(8, v8, 0, 0x0f33ec995811c4cc, 0, 0x9a251c274a394df3, 0, + 0x278d8fcc2e921f7e, 0, 0x028751b02d34f353, 0, 0x85a1f65867438a09, 0, + 0x2707da74c7cd604d, 0, 0x5aa9fe301ce7bdf7, 0, 0x5c5a93099cf51695); +}; + +void TEST_CASE3(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x98, 0x1a, 0xbe, 0x48, 0x7c, 0xd9, 0x5e, 0x58, 0x2e, 0x46, 0x0c, + 0x24, 0xc5, 0x2b, 0x37, 0xbe); + uint64_t scalar = 5; + asm volatile("vremu.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_I8(9, v1, 0x02, 0x01, 0x00, 0x02, 0x04, 0x02, 0x04, 0x03, 0x01, 0x00, + 0x02, 0x01, 0x02, 0x03, 0x00, 0x00); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xf11f, 0xb8cd, 0xb686, 0xc226, 0xc35a, 0xd724, 0x03f1, 0xcf10, + 0xbae0, 0x9f01, 0x1d0f, 0xf53c, 0x5461, 0x341e, 0x9ae7, 0x032b); + scalar = 538; + asm volatile("vremu.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VCMP_I16(10, v2, 0x018b, 0x01f7, 0x01ca, 0x00ce, 0x0202, 0x00c8, 0x01d7, + 0x011c, 0x01f0, 0x0163, 0x01bd, 0x0174, 0x0051, 0x01ae, 0x017d, + 0x0111); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x9c36da54, 0x1b1dea93, 0x80be8651, 0x03a23fcf, 0x26973d17, + 0x521f01df, 0x09e8f77a, 0x5b231aa2, 0xd4bea1df, 0x529b4f34, + 0x800a5d88, 0xe7b02512, 0xf7954032, 0x48652b8c, 0x8b14b883, + 0x121a9b8b); + scalar = 649; + asm volatile("vremu.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VCMP_I32(11, v4, 0x00000039, 0x00000141, 0x0000020b, 0x0000015f, 0x0000008a, + 0x00000199, 0x00000214, 0x0000006c, 0x0000025d, 0x000001a6, + 0x000000d2, 0x00000168, 0x000001e6, 0x00000266, 0x00000188, + 0x00000159); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x1882c5f4b911b949, 0x6ca37133428ed155, 0xbacb9408aa8251bf, + 0x62d79deed97681f5, 0x56258335e007492c, 0x2428afa90a14fa61, + 0xd62824119c3084c6, 0xef97986ae9ea2da7, 0xfc28c84e37024f10, + 0x1f475f820dec67e1, 0x9c180cfef468c050, 0x4be017933813e27e, + 0xafd2b5edb83df693, 0xddd4766a628d4c30, 0xa1f4d0f48a6ac917, + 0x827a07db9e6a8897); + scalar = 9223; + asm volatile("vremu.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_I64(12, v8, 0x000000000000167d, 0x00000000000015f2, 0x00000000000019be, + 0x00000000000003fd, 0x00000000000010ce, 0x0000000000001863, + 0x0000000000000750, 0x0000000000000062, 0x0000000000002237, + 0x00000000000002bc, 0x0000000000000061, 0x0000000000001b82, + 0x0000000000001109, 0x0000000000000fb7, 0x00000000000011e8, + 0x0000000000000545); +}; + +void TEST_CASE4(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x98, 0x1a, 0xbe, 0x48, 0x7c, 0xd9, 0x5e, 0x58, 0x2e, 0x46, 0x0c, + 0x24, 0xc5, 0x2b, 0x37, 0xbe); + uint64_t scalar = 5; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v1); + asm volatile("vremu.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I8(13, v1, 0, 0x01, 0, 0x02, 0, 0x02, 0, 0x03, 0, 0x00, 0, 0x01, 0, 0x03, + 0, 0x00); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xf11f, 0xb8cd, 0xb686, 0xc226, 0xc35a, 0xd724, 0x03f1, 0xcf10, + 0xbae0, 0x9f01, 0x1d0f, 0xf53c, 0x5461, 0x341e, 0x9ae7, 0x032b); + scalar = 538; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v2); + asm volatile("vremu.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I16(14, v2, 0, 0x01f7, 0, 0x00ce, 0, 0x00c8, 0, 0x011c, 0, 0x0163, 0, + 0x0174, 0, 0x01ae, 0, 0x0111); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x9c36da54, 0x1b1dea93, 0x80be8651, 0x03a23fcf, 0x26973d17, + 0x521f01df, 0x09e8f77a, 0x5b231aa2, 0xd4bea1df, 0x529b4f34, + 0x800a5d88, 0xe7b02512, 0xf7954032, 0x48652b8c, 0x8b14b883, + 0x121a9b8b); + scalar = 649; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vremu.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I32(15, v4, 0, 0x00000141, 0, 0x0000015f, 0, 0x00000199, 0, 0x0000006c, + 0, 0x000001a6, 0, 0x00000168, 0, 0x00000266, 0, 0x00000159); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x1882c5f4b911b949, 0x6ca37133428ed155, 0xbacb9408aa8251bf, + 0x62d79deed97681f5, 0x56258335e007492c, 0x2428afa90a14fa61, + 0xd62824119c3084c6, 0xef97986ae9ea2da7, 0xfc28c84e37024f10, + 0x1f475f820dec67e1, 0x9c180cfef468c050, 0x4be017933813e27e, + 0xafd2b5edb83df693, 0xddd4766a628d4c30, 0xa1f4d0f48a6ac917, + 0x827a07db9e6a8897); + scalar = 9223; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vremu.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I64(16, v8, 0, 0x00000000000015f2, 0, 0x00000000000003fd, 0, + 0x0000000000001863, 0, 0x0000000000000062, 0, 0x00000000000002bc, 0, + 0x0000000000001b82, 0, 0x0000000000000fb7, 0, 0x0000000000000545); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vrgather.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vrgather.c new file mode 100644 index 000000000..7df4dc498 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vrgather.c @@ -0,0 +1,72 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(5, e8, m1); + VLOAD_8(v4, 10, 20, 30, 40, 50); + VLOAD_8(v6, 1, 0, 4, 3, 2); + __asm__ volatile("vrgather.vv v2, v4, v6"); + VEC_CMP_8(1, v2, 20, 10, 50, 40, 30); +} + +void TEST_CASE2() { + VSET(5, e8, m1); + VLOAD_8(v4, 10, 20, 30, 40, 50); + VLOAD_8(v6, 1, 0, 4, 3, 2); + VLOAD_U8(v0, 26, 0, 0, 0, 0); + CLEAR(v2); + __asm__ volatile("vrgather.vv v2, v4, v6, v0.t"); + VEC_CMP_8(2, v2, 0, 10, 0, 40, 30); +} + +void TEST_CASE3() { + VSET(5, e8, m1); + VLOAD_8(v4, 10, 20, 30, 40, 50); + uint64_t scalar = 3; + __asm__ volatile("vrgather.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VEC_CMP_8(3, v2, 40, 40, 40, 40, 40); +} + +void TEST_CASE4() { + VSET(5, e8, m1); + VLOAD_8(v4, 10, 20, 30, 40, 50); + uint64_t scalar = 3; + VLOAD_U8(v0, 7, 0, 0, 0, 0); + CLEAR(v2); + __asm__ volatile("vrgather.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VEC_CMP_8(4, v2, 40, 40, 40, 0, 0); +} + +void TEST_CASE5() { + VSET(5, e8, m1); + VLOAD_8(v4, 10, 20, 30, 40, 50); + __asm__ volatile("vrgather.vi v2, v4, 3"); + VEC_CMP_8(5, v2, 40, 40, 40, 40, 40); +} + +void TEST_CASE6() { + VSET(5, e8, m1); + VLOAD_8(v4, 10, 20, 30, 40, 50); + VLOAD_U8(v0, 7, 0, 0, 0, 0); + CLEAR(v2); + __asm__ volatile("vrgather.vi v2, v4, 3, v0.t"); + VEC_CMP_8(6, v2, 40, 40, 40, 0, 0); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vrsub.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vrsub.c new file mode 100644 index 000000000..2cab8099b --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vrsub.c @@ -0,0 +1,136 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + asm volatile("vrsub.vi v3, v1, 10"); + VCMP_U8(1, v3, 5, 0, -5, -10, -15, -20, -25, -30, 5, 0, -5, -10, -15, -20, + -25, -30); + + VSET(16, e16, m2); + VLOAD_16(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + asm volatile("vrsub.vi v4, v2, 10"); + VCMP_U16(2, v4, 5, 0, -5, -10, -15, -20, -25, -30, 5, 0, -5, -10, -15, -20, + -25, -30); + + VSET(16, e32, m4); + VLOAD_32(v4, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + asm volatile("vrsub.vi v8, v4, 10"); + VCMP_U32(3, v8, 5, 0, -5, -10, -15, -20, -25, -30, 5, 0, -5, -10, -15, -20, + -25, -30); + + VSET(16, e64, m8); + VLOAD_64(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + asm volatile("vrsub.vi v16, v8, 10"); + VCMP_U64(4, v16, 5, 0, -5, -10, -15, -20, -25, -30, 5, 0, -5, -10, -15, -20, + -25, -30); +} + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_8(v0, 0x33, 0x33); + VCLEAR(v3); + asm volatile("vrsub.vi v3, v1, 10, v0.t"); + VCMP_U8(5, v3, 5, 0, 0, 0, -15, -20, 0, 0, 5, 0, 0, 0, -15, -20, 0, 0); + + VSET(16, e16, m2); + VLOAD_16(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_8(v0, 0x33, 0x33); + VCLEAR(v4); + asm volatile("vrsub.vi v4, v2, 10, v0.t"); + VCMP_U16(6, v4, 5, 0, 0, 0, -15, -20, 0, 0, 5, 0, 0, 0, -15, -20, 0, 0); + + VSET(16, e32, m4); + VLOAD_32(v4, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_8(v0, 0x33, 0x33); + VCLEAR(v8); + asm volatile("vrsub.vi v8, v4, 10, v0.t"); + VCMP_U32(7, v8, 5, 0, 0, 0, -15, -20, 0, 0, 5, 0, 0, 0, -15, -20, 0, 0); + + VSET(16, e64, m8); + VLOAD_64(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_8(v0, 0x33, 0x33); + VCLEAR(v16); + asm volatile("vrsub.vi v16, v8, 10, v0.t"); + VCMP_U64(8, v16, 5, 0, 0, 0, -15, -20, 0, 0, 5, 0, 0, 0, -15, -20, 0, 0); +} + +void TEST_CASE3(void) { + const uint64_t scalar = 25; + + VSET(16, e8, m1); + VLOAD_8(v1, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + asm volatile("vrsub.vx v3, v1, %[A]" ::[A] "r"(scalar)); + VCMP_U8(9, v3, 20, 15, 10, 5, 0, -5, -10, -15, 20, 15, 10, 5, 0, -5, -10, + -15); + + VSET(16, e16, m2); + VLOAD_16(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + asm volatile("vrsub.vx v4, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U16(10, v4, 20, 15, 10, 5, 0, -5, -10, -15, 20, 15, 10, 5, 0, -5, -10, + -15); + + VSET(16, e32, m4); + VLOAD_32(v4, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + asm volatile("vrsub.vx v8, v4, %[A]" ::[A] "r"(scalar)); + VCMP_U32(11, v8, 20, 15, 10, 5, 0, -5, -10, -15, 20, 15, 10, 5, 0, -5, -10, + -15); + + VSET(16, e64, m8); + VLOAD_64(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + asm volatile("vrsub.vx v16, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U64(12, v16, 20, 15, 10, 5, 0, -5, -10, -15, 20, 15, 10, 5, 0, -5, -10, + -15); +} + +void TEST_CASE4(void) { + const uint64_t scalar = 25; + + VSET(16, e8, m1); + VLOAD_8(v1, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_8(v0, 0x33, 0x33); + VCLEAR(v3); + asm volatile("vrsub.vx v3, v1, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(13, v3, 20, 15, 0, 0, 0, -5, 0, 0, 20, 15, 0, 0, 0, -5, 0, 0); + + VSET(16, e16, m2); + VLOAD_16(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_8(v0, 0x33, 0x33); + VCLEAR(v4); + asm volatile("vrsub.vx v4, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(14, v4, 20, 15, 0, 0, 0, -5, 0, 0, 20, 15, 0, 0, 0, -5, 0, 0); + + VSET(16, e32, m4); + VLOAD_32(v4, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_8(v0, 0x33, 0x33); + VCLEAR(v8); + asm volatile("vrsub.vx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(15, v8, 20, 15, 0, 0, 0, -5, 0, 0, 20, 15, 0, 0, 0, -5, 0, 0); + + VSET(16, e64, m8); + VLOAD_64(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_8(v0, 0x33, 0x33); + VCLEAR(v16); + asm volatile("vrsub.vx v16, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(16, v16, 20, 15, 0, 0, 0, -5, 0, 0, 20, 15, 0, 0, 0, -5, 0, 0); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vs.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vs.c new file mode 100644 index 000000000..d75926e2b --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vs.c @@ -0,0 +1,93 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "vector_macros.h" + +#define AXI_DWIDTH 128 + +static volatile uint8_t ALIGNED_O8[16] __attribute__((aligned(AXI_DWIDTH))) = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; + +static volatile uint16_t ALIGNED_O16[16] + __attribute__((aligned(AXI_DWIDTH))) = { + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}; + +static volatile uint32_t ALIGNED_O32[16] + __attribute__((aligned(AXI_DWIDTH))) = { + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000}; + +static volatile uint64_t ALIGNED_O64[16] + __attribute__((aligned(AXI_DWIDTH))) = { + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000}; + +// Misaligned access wrt 128-bit +void TEST_CASE1(void) { + VSET(15, e8, m1); + VLOAD_8(v1, 0xe4, 0x19, 0x20, 0x9f, 0xe4, 0x19, 0x20, 0x9f, 0xe4, 0x19, 0x20, + 0x9f, 0xe4, 0x19, 0x20); + asm volatile("vse8.v v1, (%0)" ::"r"(&ALIGNED_O8[1])); + VVCMP_U8(1, ALIGNED_O8, 0x00, 0xe4, 0x19, 0x20, 0x9f, 0xe4, 0x19, 0x20, 0x9f, + 0xe4, 0x19, 0x20, 0x9f, 0xe4, 0x19, 0x20); +} + +void TEST_CASE2(void) { + VSET(15, e16, m2); + VLOAD_16(v2, 0xe478, 0x1549, 0x3240, 0x2f11, 0xe448, 0x1546, 0x3220, 0x9f11, + 0xe478, 0x1549, 0x3240, 0x2f11, 0xe448, 0x1546, 0x3220); + asm volatile("vse16.v v2, (%0)" ::"r"(&ALIGNED_O16[1])); + VVCMP_U16(2, ALIGNED_O16, 0x0000, 0xe478, 0x1549, 0x3240, 0x2f11, 0xe448, + 0x1546, 0x3220, 0x9f11, 0xe478, 0x1549, 0x3240, 0x2f11, 0xe448, + 0x1546, 0x3220); +} + +void TEST_CASE3(void) { + VSET(15, e32, m4); + VLOAD_32(v4, 0xe1356784, 0x13241139, 0x20862497, 0x9f872456, 0xe1356784, + 0x13241139, 0x20862497, 0x9f872456, 0xe1356784, 0x13241139, + 0x20862497, 0x9f872456, 0xe1356784, 0x13241139, 0x20862497); + asm volatile("vse32.v v4, (%0)" ::"r"(&ALIGNED_O32[1])); + VVCMP_U32(3, ALIGNED_O32, 0x00000000, 0xe1356784, 0x13241139, 0x20862497, + 0x9f872456, 0xe1356784, 0x13241139, 0x20862497, 0x9f872456, + 0xe1356784, 0x13241139, 0x20862497, 0x9f872456, 0xe1356784, + 0x13241139, 0x20862497); +} + +void TEST_CASE4(void) { + VSET(15, e64, m8); + VLOAD_64(v8, 0xe135578794246784, 0x1315345345241139, 0x2086252110062497, + 0x1100229933847136, 0xaaffaaffaaffaaff, 0xaf87245315434136, + 0xa135578794246784, 0x2315345345241139, 0x1086252110062497, + 0x1100229933847134, 0xaaffaaffaaffaaf4, 0x9315345345241139, + 0x9086252110062497, 0x9100229933847134, 0x9affaaffaaffaaf4); + asm volatile("vse64.v v8, (%0)" ::"r"(&ALIGNED_O64[1])); + VVCMP_U64(4, ALIGNED_O64, 0x0000000000000000, 0xe135578794246784, + 0x1315345345241139, 0x2086252110062497, 0x1100229933847136, + 0xaaffaaffaaffaaff, 0xaf87245315434136, 0xa135578794246784, + 0x2315345345241139, 0x1086252110062497, 0x1100229933847134, + 0xaaffaaffaaffaaf4, 0x9315345345241139, 0x9086252110062497, + 0x9100229933847134, 0x9affaaffaaffaaf4); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vs1r.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vs1r.c new file mode 100644 index 000000000..1374f02f8 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vs1r.c @@ -0,0 +1,162 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// +// For simplicity, this test depends on vl1r + +#include "vector_macros.h" + +uint64_t counter; + +// Maximum size: (VLEN/8 Bytes * (MAX_LMUL == 8)) = VLEN +// Define VLEN before compiling me +// #define VLEN 128 +uint8_t gold_vec_8b[VLEN]; +uint8_t zero_vec_8b[VLEN]; +uint8_t buf_vec_8b[VLEN]; + +////////// +// vs1r // +////////// + +// 1 whole register load +void TEST_CASE1(void) { + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_8b, VLEN / 8); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_8b, VLEN / 8); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_8b, VLEN / 8); + // Set vl and vtype to super short values + VSET(1, e64, m2); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl1re8.v v16, (%0)" ::"r"(gold_vec_8b)); + // Check that the whole register was loaded + asm volatile("vs1r.v v16, (%0)" ::"r"(buf_vec_8b)); + VMCMP(uint8_t, % hhu, 0, buf_vec_8b, gold_vec_8b, VLEN / 8); + // Check that the neighbour registers are okay + asm volatile("vs1r.v v17, (%0)" ::"r"(buf_vec_8b)); + VMCMP(uint8_t, % hhu, 0, buf_vec_8b, zero_vec_8b, VLEN / 8); +} + +////////// +// vs2r // +////////// + +// 2 whole registers load +void TEST_CASE2(void) { + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_8b, VLEN / 4); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_8b, VLEN / 4); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_8b, VLEN / 4); + // Set vl and vtype to super short values + VSET(1, e64, m4); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl2re8.v v16, (%0)" ::"r"(gold_vec_8b)); + // Check that the whole register was loaded + asm volatile("vs2r.v v16, (%0)" ::"r"(buf_vec_8b)); + VMCMP(uint8_t, % hhu, 1, buf_vec_8b, gold_vec_8b, VLEN / 8); + // Check that the neighbour registers are okay + asm volatile("vs2r.v v18, (%0)" ::"r"(buf_vec_8b)); + VMCMP(uint8_t, % hhu, 1, buf_vec_8b, zero_vec_8b, VLEN / 8); +} + +////////// +// vs4r // +////////// + +// 4 whole registers load +void TEST_CASE3(void) { + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_8b, VLEN / 2); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_8b, VLEN / 2); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_8b, VLEN / 2); + // Set vl and vtype to super short values + VSET(1, e64, m8); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl4re8.v v16, (%0)" ::"r"(gold_vec_8b)); + // Check that the whole register was loaded + asm volatile("vs4r.v v16, (%0)" ::"r"(buf_vec_8b)); + VMCMP(uint8_t, % hhu, 2, buf_vec_8b, gold_vec_8b, VLEN / 8); + // Check that the neighbour registers are okay + asm volatile("vs4r.v v20, (%0)" ::"r"(buf_vec_8b)); + VMCMP(uint8_t, % hhu, 2, buf_vec_8b, zero_vec_8b, VLEN / 8); +} + +////////// +// vs8r // +////////// + +// 8 whole registers load +void TEST_CASE4(void) { + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_8b, VLEN); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_8b, VLEN); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_8b, VLEN); + // Set vl and vtype to super short values + VSET(1, e64, m8); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + VCLEAR(v24); + // Load a buffer from memory - whole register load + asm volatile("vl8re8.v v16, (%0)" ::"r"(gold_vec_8b)); + // Check that the whole register was loaded + asm volatile("vs8r.v v16, (%0)" ::"r"(buf_vec_8b)); + VMCMP(uint8_t, % hhu, 3, buf_vec_8b, gold_vec_8b, VLEN / 8); + // Check that the neighbour registers are okay + asm volatile("vs8r.v v24, (%0)" ::"r"(buf_vec_8b)); + VMCMP(uint8_t, % hhu, 3, buf_vec_8b, zero_vec_8b, VLEN / 8); +} + +//////////// +// Others // +//////////// + +// Check with initial vl == 0 +void TEST_CASE5(void) { + // Initialize a golden vector + INIT_MEM_CNT(gold_vec_8b, VLEN / 8); + // Initialize a zero golden vector + INIT_MEM_ZEROES(zero_vec_8b, VLEN / 8); + // Reserve space for a buffer in memory + INIT_MEM_ZEROES(buf_vec_8b, VLEN / 8); + // Set vl and vtype to super short values + VSET(0, e64, m2); + // Initialize register + neighbours to pattern value + VCLEAR(v16); + // Load a buffer from memory - whole register load + asm volatile("vl1re8.v v16, (%0)" ::"r"(gold_vec_8b)); + // Check that the whole register was loaded + asm volatile("vs1r.v v16, (%0)" ::"r"(buf_vec_8b)); + VMCMP(uint8_t, % hhu, 4, buf_vec_8b, gold_vec_8b, VLEN / 8); + // Check that the neighbour registers are okay + asm volatile("vs1r.v v17, (%0)" ::"r"(buf_vec_8b)); + VMCMP(uint8_t, % hhu, 4, buf_vec_8b, zero_vec_8b, VLEN / 8); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsadd.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsadd.c new file mode 100644 index 000000000..cd224c730 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsadd.c @@ -0,0 +1,99 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + uint64_t vxsat; + VSET(4, e8, m1); + VLOAD_8(v1, -80, 2, 100, 4); + VLOAD_8(v2, -90, 2, 50, 4); + __asm__ volatile("vsadd.vv v3, v1, v2" ::); + VCMP_U8(1, v3, 0x80, 4, 127, 8); + read_vxsat(vxsat); + check_vxsat(1, vxsat, 1); + reset_vxsat; +} + +void TEST_CASE2(void) { + uint64_t vxsat; + VSET(4, e8, m1); + VLOAD_8(v1, -80, 2, 100, 4); + VLOAD_8(v2, -90, 2, 50, 4); + VLOAD_8(v0, 0xA, 0x0, 0x0, 0x0); + VCLEAR(v3); + __asm__ volatile("vsadd.vv v3, v1, v2, v0.t" ::); + VCMP_U8(2, v3, 0, 4, 0, 8); + read_vxsat(vxsat); + check_vxsat(2, vxsat, 0); + reset_vxsat; +} + +void TEST_CASE3(void) { + uint64_t vxsat; + VSET(4, e32, m1); + VLOAD_32(v1, 1, 0x7FFFFFFB, 3, 4); + __asm__ volatile("vsadd.vi v3, v1, 5" ::); + VCMP_U32(3, v3, 6, 0x7FFFFFFF, 8, 9); + read_vxsat(vxsat); + check_vxsat(3, vxsat, 1); + reset_vxsat; +} + +// Dont use VCLEAR here, it results in a glitch where are values are off by 1 +void TEST_CASE4(void) { + uint64_t vxsat; + VSET(4, e32, m1); + VLOAD_32(v1, 1, 2, 0xFFFFFFFD, 0x7FFFFFFC); + VLOAD_32(v0, 0xA, 0x0, 0x0, 0x0); + VCLEAR(v3); + __asm__ volatile("vsadd.vi v3, v1, 5, v0.t" ::); + VCMP_U32(4, v3, 0, 7, 0, 0x7FFFFFFF); + read_vxsat(vxsat); + check_vxsat(4, vxsat, 1); + reset_vxsat; +} + +void TEST_CASE5(void) { + uint64_t vxsat; + VSET(4, e32, m1); + VLOAD_32(v1, 0x7FFFFFFD, 2, 3, 4); + const uint32_t scalar = 5; + __asm__ volatile("vsadd.vx v3, v1, %[A]" ::[A] "r"(scalar)); + VCMP_U32(5, v3, 0x7FFFFFFF, 7, 8, 9); + read_vxsat(vxsat); + check_vxsat(5, vxsat, 1); + reset_vxsat; +} + +// Dont use VCLEAR here, it results in a glitch where are values are off by 1 +void TEST_CASE6(void) { + uint64_t vxsat; + VSET(4, e32, m1); + VLOAD_32(v1, 1, 0x7ffffffC, 3, 4); + const uint32_t scalar = 5; + VLOAD_32(v0, 0xA, 0x0, 0x0, 0x0); + VCLEAR(v3); + __asm__ volatile("vsadd.vx v3, v1, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(6, v3, 0, 0x7FFFFFFF, 0, 9); + read_vxsat(vxsat); + check_vxsat(6, vxsat, 1); + reset_vxsat; +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsaddu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsaddu.c new file mode 100644 index 000000000..f2fd3d03d --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsaddu.c @@ -0,0 +1,113 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + uint64_t vxsat; + VSET(4, e8, m1); + VLOAD_8(v1, 133, 2, 220, 4); + VLOAD_8(v2, 133, 2, 50, 4); + __asm__ volatile("vsaddu.vv v3, v1, v2" ::); + VCMP_U8(1, v3, 255, 4, 255, 8); + read_vxsat(vxsat); + check_vxsat(1, vxsat, 1); + reset_vxsat; +} + +void TEST_CASE2(void) { + uint64_t vxsat; + VSET(4, e8, m1); + VLOAD_8(v1, 1, 2, 3, 154); + VLOAD_8(v2, 1, 2, 3, 124); + VLOAD_8(v0, 0xA, 0x0, 0x0, 0x0); + VCLEAR(v3); + __asm__ volatile("vsaddu.vv v3, v1, v2, v0.t" ::); + VCMP_U8(2, v3, 0, 4, 0, 255); + read_vxsat(vxsat); + check_vxsat(2, vxsat, 1); + reset_vxsat; +} + +void TEST_CASE3(void) { + uint64_t vxsat; + VSET(4, e32, m2); + VLOAD_32(v2, 1, 0xFFFFFFFB, 3, 4); + __asm__ volatile("vsaddu.vi v6, v2, 5" ::); + VCMP_U32(3, v6, 6, 0xFFFFFFFF, 8, 9); + read_vxsat(vxsat); + check_vxsat(3, vxsat, 1); + reset_vxsat; +} + +// Dont use VCLEAR here, it results in a glitch where are values are off by 1 +void TEST_CASE4(void) { + uint64_t vxsat; + VSET(4, e32, m1); + VLOAD_32(v1, 1, 2, 0xFFFFFFFD, 0xFFFFFFFC); + VLOAD_32(v0, 0xA, 0x0, 0x0, 0x0); + VCLEAR(v3); + __asm__ volatile("vsaddu.vi v3, v1, 5, v0.t" ::); + VCMP_U32(4, v3, 0, 7, 0, 0xFFFFFFFF); + read_vxsat(vxsat); + check_vxsat(4, vxsat, 1); + reset_vxsat; +} + +void TEST_CASE5(void) { + uint64_t vxsat; + VSET(4, e32, m1); + VLOAD_32(v1, 0xFFFFFFFD, 2, 3, 4); + const uint32_t scalar = 5; + __asm__ volatile("vsaddu.vx v3, v1, %[A]" ::[A] "r"(scalar)); + VCMP_U32(5, v3, 0xFFFFFFFF, 7, 8, 9); + read_vxsat(vxsat); + check_vxsat(5, vxsat, 1); + reset_vxsat; +} + +// Dont use VCLEAR here, it results in a glitch where are values are off by 1 +void TEST_CASE6(void) { + uint64_t vxsat; + VSET(4, e32, m1); + VLOAD_32(v1, 1, 0xfffffffC, 3, 4); + const uint32_t scalar = 5; + VLOAD_32(v0, 0xA, 0x0, 0x0, 0x0); + VCLEAR(v3); + __asm__ volatile("vsaddu.vx v3, v1, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(6, v3, 0, 0xFFFFFFFF, 0, 9); + read_vxsat(vxsat); + check_vxsat(6, vxsat, 1); + reset_vxsat; +} + +void TEST_CASE7(void) { + uint64_t vxsat; + VSET(4, e32, m1); + VLOAD_32(v1, 1, 0x0000FFFF, 3, 4); + VLOAD_32(v2, 0xA, 0xFFFF0000, 0x0, 0x0); + VCLEAR(v3); + __asm__ volatile("vsaddu.vv v3, v1, v2" ::); + VCMP_U32(7, v3, 0xB, 0xFFFFFFFF, 3, 4); + read_vxsat(vxsat); + check_vxsat(7, vxsat, 0); + reset_vxsat; +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsbc.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsbc.c new file mode 100644 index 000000000..09d61e8b7 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsbc.c @@ -0,0 +1,76 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v2, 8, 7, 6, 5, 4, 3, 2, 1, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vsbc.vvm v3, v1, v2, v0"); + VCMP_U8(1, v3, -7, -6, -3, -2, 1, 2, 5, 6, 0, -1, 0, -1, 0, -1, 0, -1); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 8, 7, 6, 5, 4, 3, 2, 1, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vsbc.vvm v6, v2, v4, v0"); + VCMP_U16(2, v6, -7, -6, -3, -2, 1, 2, 5, 6, 0, -1, 0, -1, 0, -1, 0, -1); + + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 8, 7, 6, 5, 4, 3, 2, 1, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vsbc.vvm v12, v4, v8, v0"); + VCMP_U32(3, v12, -7, -6, -3, -2, 1, 2, 5, 6, 0, -1, 0, -1, 0, -1, 0, -1); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 8, 7, 6, 5, 4, 3, 2, 1, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vsbc.vvm v24, v8, v16, v0"); + VCMP_U64(4, v24, -7, -6, -3, -2, 1, 2, 5, 6, 0, -1, 0, -1, 0, -1, 0, -1); +}; + +void TEST_CASE2(void) { + const uint32_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_8(v1, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vsbc.vxm v3, v1, %[A], v0" ::[A] "r"(scalar)); + VCMP_U8(5, v3, -4, -4, -2, -2, 0, 0, 2, 2, -4, -4, -2, -2, 0, 0, 2, 2); + + VSET(16, e16, m2); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vsbc.vxm v4, v2, %[A], v0" ::[A] "r"(scalar)); + VCMP_U16(6, v4, -4, -4, -2, -2, 0, 0, 2, 2, -4, -4, -2, -2, 0, 0, 2, 2); + + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vsbc.vxm v8, v4, %[A], v0" ::[A] "r"(scalar)); + VCMP_U32(7, v8, -4, -4, -2, -2, 0, 0, 2, 2, -4, -4, -2, -2, 0, 0, 2, 2); + + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vsbc.vxm v16, v8, %[A], v0" ::[A] "r"(scalar)); + VCMP_U64(8, v16, -4, -4, -2, -2, 0, 0, 2, 2, -4, -4, -2, -2, 0, 0, 2, 2); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse1.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse1.c new file mode 100644 index 000000000..70f5ca3a0 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse1.c @@ -0,0 +1,57 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti +// +// For simplicity, this test depends on vle1 + +#include "vector_macros.h" + +#define AXI_DWIDTH 128 + +static volatile uint8_t ALIGNED_I8_GOLD[16] + __attribute__((aligned(AXI_DWIDTH))) = {0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, + 0x89, 0x88, 0x88, 0xae, 0x08, 0x91, + 0x02, 0x59, 0x11, 0x89}; + +static volatile uint8_t ALIGNED_I8_BUF[16] + __attribute__((aligned(AXI_DWIDTH))) = {0x00, 0x00, 0x00, 0x0, 0x00, 0x00, + 0x00, 0x0, 0x00, 0x00, 0x00, 0x0, + 0x00, 0x00, 0x00, 0x0}; + +void TEST_CASE1(void) { + VSET(16, e8, m1); + asm volatile("vle1.v v0, (%0)" ::"r"(ALIGNED_I8_GOLD)); + asm volatile("vse1.v v0, (%0)" ::"r"(ALIGNED_I8_BUF)); + VMCMP(uint8_t, % hhu, 1, ALIGNED_I8_BUF, ALIGNED_I8_GOLD, 2); + + VSET(13, e8, m1); + asm volatile("vle1.v v0, (%0)" ::"r"(ALIGNED_I8_GOLD)); + asm volatile("vse1.v v0, (%0)" ::"r"(ALIGNED_I8_BUF)); + VMCMP(uint8_t, % hhu, 2, ALIGNED_I8_BUF, ALIGNED_I8_GOLD, 2); + + VSET(13, e64, m1); + asm volatile("vle1.v v0, (%0)" ::"r"(ALIGNED_I8_GOLD)); + asm volatile("vse1.v v0, (%0)" ::"r"(ALIGNED_I8_BUF)); + VMCMP(uint8_t, % hhu, 3, ALIGNED_I8_BUF, ALIGNED_I8_GOLD, 2); + + VSET(13, e64, m8); + asm volatile("vle1.v v0, (%0)" ::"r"(ALIGNED_I8_GOLD)); + asm volatile("vse1.v v0, (%0)" ::"r"(ALIGNED_I8_BUF)); + VMCMP(uint8_t, % hhu, 4, ALIGNED_I8_BUF, ALIGNED_I8_GOLD, 2); + + VSET(13, e64, m8); + asm volatile("vle1.v v1, (%0)" ::"r"(ALIGNED_I8_GOLD)); + asm volatile("vse1.v v1, (%0)" ::"r"(ALIGNED_I8_BUF)); + VMCMP(uint8_t, % hhu, 5, ALIGNED_I8_BUF, ALIGNED_I8_GOLD, 2); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse16.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse16.c new file mode 100644 index 000000000..c5d9b06a7 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse16.c @@ -0,0 +1,357 @@ +// TODO uncomment TEST_CASE13 and TEST_CASE 15 after issue of vl=0 and +// non-zero vstart is resolved +// TODO uncomment TEST_CASE2 after issue of exception is resolved +#include "long_array.h" +#include "vector_macros.h" + +#define AXI_DWIDTH 128 +void mtvec_handler(void) { + asm volatile("csrr t0, mcause"); // Read mcause + + // Read mepc + asm volatile("csrr t1, mepc"); + + // Increment return address by 4 + asm volatile("addi t1, t1, 4"); + asm volatile("csrw mepc, t1"); + + // Filter with mcause and handle here + + asm volatile("mret"); +} + +// Exception Handler for spike +void handle_trap(void) { + // Read mepc + asm volatile("csrr t1, mepc"); + + // Increment return address by 4 + asm volatile("addi t1, t1, 4"); + asm volatile("csrw mepc, t1"); + + asm volatile("ld ra, 8(sp)"); + asm volatile("ld sp, 16(sp)"); + asm volatile("ld gp, 24(sp)"); + asm volatile("ld tp, 32(sp)"); + asm volatile("ld t0, 40(sp)"); + asm volatile("ld t0, 40(sp)"); + asm volatile("ld t1, 48(sp)"); + asm volatile("ld t2, 56(sp)"); + asm volatile("ld s0, 64(sp)"); + asm volatile("ld s1, 72(sp)"); + asm volatile("ld a0, 80(sp)"); + asm volatile("ld a1, 88(sp)"); + asm volatile("ld a2, 96(sp)"); + asm volatile("ld a3, 104(sp)"); + asm volatile("ld a4, 112(sp)"); + asm volatile("ld a5, 120(sp)"); + asm volatile("ld a6, 128(sp)"); + asm volatile("ld a7, 136(sp)"); + asm volatile("ld s2, 144(sp)"); + asm volatile("ld s3, 152(sp)"); + asm volatile("ld s4, 160(sp)"); + asm volatile("ld s5, 168(sp)"); + asm volatile("ld s6, 176(sp)"); + asm volatile("ld s7, 184(sp)"); + asm volatile("ld s8, 192(sp)"); + asm volatile("ld s9, 200(sp)"); + asm volatile("ld s10, 208(sp)"); + asm volatile("ld s11, 216(sp)"); + asm volatile("ld t3, 224(sp)"); + asm volatile("ld t4, 232(sp)"); + asm volatile("ld t5, 240(sp)"); + asm volatile("ld t6, 248(sp)"); + + // Read mcause + asm volatile("csrr t3, mcause"); + + asm volatile("addi sp, sp, 272"); + + // Filter with mcause and handle here + + asm volatile("mret"); +} + +void reset_vec16(volatile uint16_t *vec) { + for (uint64_t i = 0; i < 1024; ++i) vec[i] = 0; +} + +static volatile uint16_t ALIGNED_I16[1024] __attribute__((aligned(AXI_DWIDTH))); + +//**********Checking functionality of vse16 ********// +void TEST_CASE1(void) { + VSET(16, e16, m2); + VLOAD_16(v0, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989); + asm volatile("vse16.v v0, (%0)" ::"r"(ALIGNED_I16)); + VVCMP_U16(1, ALIGNED_I16, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, + 0x3489, 0x9388, 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, + 0x1111, 0x1989); +} + +//******Checking functionality of with illegal destination register +// specifier for EMUL********// +// In this test case EMUL=2 and register is v1 which will cause illegal +// instruction exception and set mcause = 2 +void TEST_CASE2(void) { + uint8_t mcause; + reset_vec16(ALIGNED_I16); + VSET(16, e16, m2); + VLOAD_16(v1, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989); + VSET(16, e16, m2); + asm volatile("vse16.v v1, (%0)" ::"r"(ALIGNED_I16)); + asm volatile("addi %[A], t3, 0" : [A] "=r"(mcause)); + XCMP(2, mcause, 2); +} + +//*******Checking functionality of vse16 with different values of masking +// register******// +void TEST_CASE3(void) { + reset_vec16(ALIGNED_I16); + VSET(16, e16, m2); + VLOAD_16(v6, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989); + VLOAD_8(v0, 0xFF, 0xFF); + asm volatile("vse16.v v6, (%0), v0.t" ::"r"(ALIGNED_I16)); + VCLEAR(v6); + VVCMP_U16(3, ALIGNED_I16, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, + 0x3489, 0x9388, 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, + 0x1111, 0x1989); +} + +void TEST_CASE4(void) { + VSET(16, e16, m2); + VLOAD_16(v6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse16.v v6, (%0)" ::"r"(ALIGNED_I16)); + VCLEAR(v6); + VLOAD_16(v6, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989); + VLOAD_8(v0, 0x00, 0x00); + asm volatile("vse16.v v6, (%0), v0.t" ::"r"(ALIGNED_I16)); + VCLEAR(v6); + VVCMP_U16(4, ALIGNED_I16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16); +} + +void TEST_CASE5(void) { + VSET(16, e16, m2); + VLOAD_16(v6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse16.v v6, (%0)" ::"r"(ALIGNED_I16)); + VCLEAR(v6); + VLOAD_16(v6, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vse16.v v6, (%0), v0.t" ::"r"(ALIGNED_I16)); + VCLEAR(v6); + VVCMP_U16(5, ALIGNED_I16, 1, 0xbbd3, 3, 0x8cd1, 5, 0x7548, 7, 0x9388, 9, + 0x11ae, 11, 0x4891, 13, 0x8759, 15, 0x1989); +} + +//******Checking functionality with different combinations of vta and vma*****// +// **** It uses undisturbed policy for tail agnostic and mask agnostic****// +void TEST_CASE6(void) { + uint64_t avl; + VSET(16, e16, m2); + VLOAD_16(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse16.v v8, (%0)" ::"r"(ALIGNED_I16)); + VCLEAR(v8); + VLOAD_8(v0, 0xAA, 0xAA); + VLOAD_16(v8, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989); + __asm__ volatile("vsetivli %[A], 12, e16, m1, ta, ma" : [A] "=r"(avl)); + asm volatile("vse16.v v8, (%0),v0.t" ::"r"(ALIGNED_I16)); + VCLEAR(v8); + VVCMP_U16(6, ALIGNED_I16, 1, 0xbbd3, 3, 0x8cd1, 5, 0x7548, 7, 0x9388, 9, + 0x11ae, 11, 0x4891, 13, 14, 15, 16); +} + +void TEST_CASE7(void) { + reset_vec16(ALIGNED_I16); + uint64_t avl; + VSET(16, e16, m2); + VLOAD_16(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse16.v v8, (%0)" ::"r"(ALIGNED_I16)); + VCLEAR(v8); + VLOAD_8(v0, 0xAA, 0xAA); + VLOAD_16(v8, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989); + __asm__ volatile("vsetivli %[A], 12, e16, m1, ta, mu" : [A] "=r"(avl)); + asm volatile("vse16.v v8, (%0), v0.t" ::"r"(ALIGNED_I16)); + VCLEAR(v8); + VVCMP_U16(7, ALIGNED_I16, 1, 0xbbd3, 3, 0x8cd1, 5, 0x7548, 7, 0x9388, 9, + 0x11ae, 11, 0x4891, 13, 14, 15, 16); +} + +void TEST_CASE8(void) { + reset_vec16(ALIGNED_I16); + uint64_t avl; + VSET(16, e8, m1); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse16.v v4, (%0)" ::"r"(ALIGNED_I16)); + VCLEAR(v4); + VLOAD_8(v0, 0xAA, 0xAA); + VLOAD_16(v4, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989); + __asm__ volatile("vsetivli %[A], 12, e16, m1, tu, ma" : [A] "=r"(avl)); + asm volatile("vse16.v v4, (%0), v0.t" ::"r"(ALIGNED_I16)); + VCLEAR(v4); + VVCMP_U16(8, ALIGNED_I16, 1, 0xbbd3, 3, 0x8cd1, 5, 0x7548, 7, 0x9388, 9, + 0x11ae, 11, 0x4891, 13, 14, 15, 16); +} + +void TEST_CASE9(void) { + uint64_t avl; + VSET(16, e8, m1); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse16.v v4, (%0)" ::"r"(ALIGNED_I16)); + VCLEAR(v4); + VLOAD_8(v0, 0xAA, 0xAA); + VLOAD_16(v4, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989); + __asm__ volatile("vsetivli %[A], 12, e16, m1, tu, mu" : [A] "=r"(avl)); + asm volatile("vse16.v v4, (%0), v0.t" ::"r"(ALIGNED_I16)); + VCLEAR(v4); + VVCMP_U16(9, ALIGNED_I16, 1, 0xbbd3, 3, 0x8cd1, 5, 0x7548, 7, 0x9388, 9, + 0x11ae, 11, 0x4891, 13, 14, 15, 16); +} + +//*******Checking functionality if encoded EEW is not supported for given SEW +// and LMUL values because EMUL become out of range*****// +// This test case cover corner case for EEW = 16.If LMUL is changed to +// mf8 it will give error because emul become less than 1/8 (EMUL = 1/16) +// But it does not support this configuration because SEW/LMUL > ELEN +void TEST_CASE10(void) { + VSET(16, e16, m2); + VLOAD_16(v6, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989); + VSET(2, e32, mf2); + asm volatile("vse16.v v6, (%0)" ::"r"(ALIGNED_I16)); + VCLEAR(v6); + VVCMP_U16(10, ALIGNED_I16, 0x05e0, 0xbbd3); +} + +// This test case execute upper bound case of EMUL (8) +// If LMUL is changed to m8 it will give error because emul become greater than +// 8 (EMUL = 16) +void TEST_CASE11(void) { + reset_vec16(ALIGNED_I16); + VSET(16, e16, m1); + VLOAD_16(v8, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989); + VSET(16, e8, m4); + asm volatile("vse16.v v8, (%0)" ::"r"(ALIGNED_I16)); + VCLEAR(v8); + VVCMP_U16(11, ALIGNED_I16, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, + 0x3489, 0x9388, 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, + 0x1111, 0x1989); +} + +//******Checking functionality with different values of vl******// +void TEST_CASE12(void) { + reset_vec16(ALIGNED_I16); + VSET(16, e16, m2); + VLOAD_16(v6, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989); + asm volatile("vse16.v v6, (%0)" ::"r"(ALIGNED_I16)); + VCLEAR(v6); + VVCMP_U16(12, ALIGNED_I16, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, + 0x3489, 0x9388, 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, + 0x1111, 0x1989); +} + +void TEST_CASE13(void) { + uint64_t avl; + VSET(16, e8, m2); + VLOAD_16(v6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse16.v v6, (%0)" ::"r"(ALIGNED_I16)); + VCLEAR(v6); + VLOAD_16(v6, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, 0x08, + 0x91, 0x02, 0x59, 0x11, 0x89); + __asm__ volatile("vsetivli %[A], 0, e8, m1, tu, ma" : [A] "=r"(avl)); + asm volatile("vse16.v v6, (%0)" ::"r"(ALIGNED_I16)); + VSET(16, e16, m2); + VVCMP_U16(13, ALIGNED_I16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16); +} + +void TEST_CASE14(void) { + VSET(16, e16, m2); + VLOAD_16(v12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse16.v v12, (%0)" ::"r"(ALIGNED_I16)); + VCLEAR(v12); + VLOAD_16(v12, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989); + VSET(13, e8, m1); + asm volatile("vse16.v v12, (%0)" ::"r"(ALIGNED_I16)); + VVCMP_U16(14, ALIGNED_I16, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, + 0x3489, 0x9388, 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 14, 15, 16); +} + +//******Checking functionality with different vstart value*****// +void TEST_CASE15(void) { + VSET(16, e16, m1); + VLOAD_16(v7, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse16.v v7, (%0)" ::"r"(ALIGNED_I16)); + VCLEAR(v7); + VLOAD_16(v7, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989); + VSET(13, e16, m1); + write_csr(vstart, 2); + asm volatile("vse16.v v7, (%0)" ::"r"(ALIGNED_I16)); + VVCMP_U16(15, ALIGNED_I16, 1, 2, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, + 0x9388, 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 14, 15, 16); +} + +//****Checking functionality with different values of EMUL and +// large number of elements *******// +void TEST_CASE16(void) { + reset_vec16(ALIGNED_I16); + VSET(1024, e16, m4); + asm volatile("vle16.v v8, (%0)" ::"r"(&LONG_I16[0])); + asm volatile("vse16.v v8, (%0)" ::"r"(ALIGNED_I16)); + LVVCMP_U16(16, ALIGNED_I16, LONG_I16); +} + +void TEST_CASE17(void) { + reset_vec16(ALIGNED_I16); + VSET(512, e16, m2); + asm volatile("vle16.v v10, (%0)" ::"r"(&LONG_I16[0])); + asm volatile("vse16.v v10, (%0)" ::"r"(ALIGNED_I16)); + LVVCMP_U16(17, ALIGNED_I16, LONG_I16); +} + +void TEST_CASE18(void) { + reset_vec16(ALIGNED_I16); + VSET(300, e16, m2); + asm volatile("vle16.v v10, (%0)" ::"r"(&LONG_I16[0])); + asm volatile("vse16.v v10, (%0)" ::"r"(ALIGNED_I16)); + LVVCMP_U16(18, ALIGNED_I16, LONG_I16); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + printf("*****Running tests for vse16.v*****\n"); + TEST_CASE1(); + // TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + TEST_CASE8(); + TEST_CASE9(); + TEST_CASE10(); + TEST_CASE11(); + TEST_CASE12(); + // TEST_CASE13(); + TEST_CASE14(); + // TEST_CASE15(); + TEST_CASE16(); + TEST_CASE17(); + TEST_CASE18(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse32.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse32.c new file mode 100644 index 000000000..afd7c6427 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse32.c @@ -0,0 +1,408 @@ +// TODO uncomment TEST_CASE13 and TEST_CASE 15 after issue of vl=0 and +// non-zero vstart is resolved +// TODO uncomment TEST_CASE2 after issue of exception is resolved +#include "long_array.h" +#include "vector_macros.h" +#define AXI_DWIDTH 128 +void mtvec_handler(void) { + asm volatile("csrr t0, mcause"); // Read mcause + + // Read mepc + asm volatile("csrr t1, mepc"); + + // Increment return address by 4 + asm volatile("addi t1, t1, 4"); + asm volatile("csrw mepc, t1"); + + // Filter with mcause and handle here + + asm volatile("mret"); +} + +// Exception Handler for spike +void handle_trap(void) { + // Read mepc + asm volatile("csrr t1, mepc"); + + // Increment return address by 4 + asm volatile("addi t1, t1, 4"); + asm volatile("csrw mepc, t1"); + + asm volatile("ld ra, 8(sp)"); + asm volatile("ld sp, 16(sp)"); + asm volatile("ld gp, 24(sp)"); + asm volatile("ld tp, 32(sp)"); + asm volatile("ld t0, 40(sp)"); + asm volatile("ld t0, 40(sp)"); + asm volatile("ld t1, 48(sp)"); + asm volatile("ld t2, 56(sp)"); + asm volatile("ld s0, 64(sp)"); + asm volatile("ld s1, 72(sp)"); + asm volatile("ld a0, 80(sp)"); + asm volatile("ld a1, 88(sp)"); + asm volatile("ld a2, 96(sp)"); + asm volatile("ld a3, 104(sp)"); + asm volatile("ld a4, 112(sp)"); + asm volatile("ld a5, 120(sp)"); + asm volatile("ld a6, 128(sp)"); + asm volatile("ld a7, 136(sp)"); + asm volatile("ld s2, 144(sp)"); + asm volatile("ld s3, 152(sp)"); + asm volatile("ld s4, 160(sp)"); + asm volatile("ld s5, 168(sp)"); + asm volatile("ld s6, 176(sp)"); + asm volatile("ld s7, 184(sp)"); + asm volatile("ld s8, 192(sp)"); + asm volatile("ld s9, 200(sp)"); + asm volatile("ld s10, 208(sp)"); + asm volatile("ld s11, 216(sp)"); + asm volatile("ld t3, 224(sp)"); + asm volatile("ld t4, 232(sp)"); + asm volatile("ld t5, 240(sp)"); + asm volatile("ld t6, 248(sp)"); + + // Read mcause + asm volatile("csrr t3, mcause"); + + asm volatile("addi sp, sp, 272"); + + // Filter with mcause and handle here + + asm volatile("mret"); +} + +void reset_vec32(volatile uint32_t *vec) { + for (uint64_t i = 0; i < 1024; ++i) vec[i] = 0; +} + +static volatile uint32_t ALIGNED_I32[1024] __attribute__((aligned(AXI_DWIDTH))); + +//**********Checking functionality of vse32********// +void TEST_CASE1(void) { + VSET(16, e32, m4); + VLOAD_32(v0, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, + 0x89139848); + asm volatile("vse32.v v0, (%0)" ::"r"(ALIGNED_I32)); + VVCMP_U32(1, ALIGNED_I32, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, + 0x9fa831c7, 0x38197598, 0x18931795, 0x81937598, 0x18747547, + 0x3eeeeeee, 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, + 0x83195999, 0x89139848); +} + +//******Checking functionality of with illegal destination register +// specifier for EMUL********// +// In this test case EMUL=2 and register is v1 which will cause illegal +// instruction exception and set mcause = 2 +void TEST_CASE2(void) { + uint8_t mcause; + reset_vec32(ALIGNED_I32); + VSET(16, e32, m1); + VLOAD_32(v1, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, + 0x89139848); + VSET(16, e64, m4); + asm volatile("vse32.v v1, (%0)" ::"r"(ALIGNED_I32)); + asm volatile("addi %[A], t3, 0" : [A] "=r"(mcause)); + XCMP(2, mcause, 2); +} + +//*******Checking functionality of vse16 with different values of masking +// register******// +void TEST_CASE3(void) { + reset_vec32(ALIGNED_I32); + VSET(16, e32, m4); + VLOAD_32(v4, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, + 0x89139848); + VLOAD_8(v0, 0xFF, 0xFF); + asm volatile("vse32.v v4, (%0), v0.t" ::"r"(ALIGNED_I32)); + VCLEAR(v4); + VVCMP_U32(3, ALIGNED_I32, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, + 0x9fa831c7, 0x38197598, 0x18931795, 0x81937598, 0x18747547, + 0x3eeeeeee, 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, + 0x83195999, 0x89139848); +} + +void TEST_CASE4(void) { + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse32.v v4, (%0)" ::"r"(ALIGNED_I32)); + VCLEAR(v4); + VLOAD_32(v4, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, + 0x89139848); + VLOAD_8(v0, 0x00, 0x00); + asm volatile("vse32.v v4, (%0), v0.t" ::"r"(ALIGNED_I32)); + VCLEAR(v4); + VVCMP_U32(4, ALIGNED_I32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16); +} + +void TEST_CASE5(void) { + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse32.v v4, (%0)" ::"r"(ALIGNED_I32)); + VCLEAR(v4); + VLOAD_32(v4, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, + 0x89139848); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vse32.v v4, (%0), v0.t" ::"r"(ALIGNED_I32)); + VCLEAR(v4); + VVCMP_U32(5, ALIGNED_I32, 1, 0xf9aa71f0, 3, 0x99991348, 5, 0x38197598, 7, + 0x81937598, 9, 0x3eeeeeee, 11, 0xab8b9148, 13, 0x31897598, 15, + 0x89139848); +} + +//******Checking functionality with different combinations of vta and vma*****// +// **** It uses undisturbed policy for tail agnostic and mask agnostic****// +void TEST_CASE6(void) { + reset_vec32(ALIGNED_I32); + uint64_t avl; + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse32.v v4, (%0)" ::"r"(ALIGNED_I32)); + VCLEAR(v4); + VLOAD_8(v0, 0xAA, 0xAA); + VLOAD_32(v4, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, + 0x89139848); + __asm__ volatile("vsetivli %[A], 12, e32, m1, ta, ma" : [A] "=r"(avl)); + asm volatile("vse32.v v4, (%0),v0.t" ::"r"(ALIGNED_I32)); + VCLEAR(v4); + VVCMP_U32(6, ALIGNED_I32, 1, 0xf9aa71f0, 3, 0x99991348, 5, 0x38197598, 7, + 0x81937598, 9, 0x3eeeeeee, 11, 0xab8b9148, 13, 14, 15, 16); +} + +void TEST_CASE7(void) { + reset_vec32(ALIGNED_I32); + uint64_t avl; + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse32.v v4, (%0)" ::"r"(ALIGNED_I32)); + VCLEAR(v4); + VLOAD_8(v0, 0xAA, 0xAA); + VLOAD_32(v4, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, + 0x89139848); + __asm__ volatile("vsetivli %[A], 12, e32, m1, ta, mu" : [A] "=r"(avl)); + asm volatile("vse32.v v4, (%0), v0.t" ::"r"(ALIGNED_I32)); + VCLEAR(v4); + VVCMP_U32(7, ALIGNED_I32, 1, 0xf9aa71f0, 3, 0x99991348, 5, 0x38197598, 7, + 0x81937598, 9, 0x3eeeeeee, 11, 0xab8b9148, 13, 14, 15, 16); +} + +void TEST_CASE8(void) { + reset_vec32(ALIGNED_I32); + uint64_t avl; + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse32.v v4, (%0)" ::"r"(ALIGNED_I32)); + VCLEAR(v4); + VLOAD_8(v0, 0xAA, 0xAA); + VLOAD_32(v4, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, + 0x89139848); + __asm__ volatile("vsetivli %[A], 12, e32, m1, tu, ma" : [A] "=r"(avl)); + asm volatile("vse32.v v4, (%0), v0.t" ::"r"(ALIGNED_I32)); + VCLEAR(v4); + VVCMP_U32(8, ALIGNED_I32, 1, 0xf9aa71f0, 3, 0x99991348, 5, 0x38197598, 7, + 0x81937598, 9, 0x3eeeeeee, 11, 0xab8b9148, 13, 14, 15, 16); +} + +void TEST_CASE9(void) { + reset_vec32(ALIGNED_I32); + uint64_t avl; + VSET(16, e32, m4); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse32.v v4, (%0)" ::"r"(ALIGNED_I32)); + VCLEAR(v4); + VLOAD_8(v0, 0xAA, 0xAA); + VLOAD_32(v4, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, + 0x89139848); + __asm__ volatile("vsetivli %[A], 12, e16, m1, tu, mu" : [A] "=r"(avl)); + asm volatile("vse32.v v4, (%0), v0.t" ::"r"(ALIGNED_I32)); + VCLEAR(v4); + VVCMP_U32(9, ALIGNED_I32, 1, 0xf9aa71f0, 3, 0x99991348, 5, 0x38197598, 7, + 0x81937598, 9, 0x3eeeeeee, 11, 0xab8b9148, 13, 14, 15, 16); +} + +//*******Checking functionality if encoded EEW is not supported for given SEW +// and LMUL values because EMUL become out of range*****// +// This test case cover corner case for EEW = 32.If LMUL is changed to +// mf8 and SEW is changed to e64 it will give error because emul become less +// than 1/8 (EMUL = 1/16) But it does not support this configuration because +// SEW/LMUL > ELEN +void TEST_CASE10(void) { + reset_vec32(ALIGNED_I32); + VSET(16, e32, m4); + VLOAD_32(v8, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, + 0x89139848); + VSET(2, e32, mf2); + asm volatile("vse32.v v8, (%0)" ::"r"(ALIGNED_I32)); + VCLEAR(v8); + VVCMP_U32(10, ALIGNED_I32, 0x9fe41920, 0xf9aa71f0); +} + +// This test case execute upper bound case of EMUL (8) +// If LMUL is changed to m8 it will give error because emul become greater than +// 8 (EMUL = 16) +void TEST_CASE11(void) { + reset_vec32(ALIGNED_I32); + VSET(16, e32, m4); + VLOAD_32(v8, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, + 0x89139848); + VSET(16, e8, m2); + asm volatile("vse32.v v8, (%0)" ::"r"(ALIGNED_I32)); + VCLEAR(v8); + VVCMP_U32(11, ALIGNED_I32, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, + 0x9fa831c7, 0x38197598, 0x18931795, 0x81937598, 0x18747547, + 0x3eeeeeee, 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, + 0x83195999, 0x89139848); +} + +//******Checking functionality with different values of vl******// +void TEST_CASE12(void) { + reset_vec32(ALIGNED_I32); + VSET(16, e32, m4); + VLOAD_32(v8, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, + 0x89139848); + asm volatile("vse32.v v8, (%0)" ::"r"(ALIGNED_I32)); + VCLEAR(v8); + VVCMP_U32(12, ALIGNED_I32, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, + 0x9fa831c7, 0x38197598, 0x18931795, 0x81937598, 0x18747547, + 0x3eeeeeee, 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, + 0x83195999, 0x89139848); +} + +void TEST_CASE13(void) { + uint64_t avl; + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse32.v v8, (%0)" ::"r"(ALIGNED_I32)); + VCLEAR(v8); + VLOAD_32(v8, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, + 0x89139848); + __asm__ volatile("vsetivli %[A], 0, e32, m1, tu, ma" : [A] "=r"(avl)); + asm volatile("vse32.v v8, (%0)" ::"r"(ALIGNED_I32)); + VSET(16, e32, m4); + VVCMP_U32(13, ALIGNED_I32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16); +} + +void TEST_CASE14(void) { + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse32.v v8, (%0)" ::"r"(ALIGNED_I32)); + VCLEAR(v8); + VLOAD_32(v8, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, + 0x89139848); + VSET(13, e32, m4); + asm volatile("vse32.v v8, (%0)" ::"r"(ALIGNED_I32)); + VSET(16, e32, m4); + VVCMP_U32(14, ALIGNED_I32, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, + 0x9fa831c7, 0x38197598, 0x18931795, 0x81937598, 0x18747547, + 0x3eeeeeee, 0x90139301, 0xab8b9148, 0x90318509, 14, 15, 16); +} + +//******Checking functionality with different vstart value*****// +void TEST_CASE15(void) { + reset_vec32(ALIGNED_I32); + VSET(16, e32, m1); + VLOAD_32(v7, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse32.v v7, (%0)" ::"r"(ALIGNED_I32)); + VCLEAR(v7); + VLOAD_32(v7, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, + 0x89139848); + VSET(13, e32, m1); + write_csr(vstart, 2); + asm volatile("vse32.v v7, (%0)" ::"r"(ALIGNED_I32)); + VVCMP_U32(15, ALIGNED_I32, 1, 2, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 14, 15, 16); +} + +//****Checking functionality with different values of EMUL and +// large number of elements *******// +void TEST_CASE16(void) { + reset_vec32(ALIGNED_I32); + VSET(1024, e32, m8); + asm volatile("vle32.v v8, (%0)" ::"r"(&LONG_I32[0])); + asm volatile("vse32.v v8, (%0)" ::"r"(ALIGNED_I32)); + LVVCMP_U32(16, ALIGNED_I32, LONG_I32); +} + +void TEST_CASE17(void) { + reset_vec32(ALIGNED_I32); + VSET(512, e32, m4); + asm volatile("vle32.v v12, (%0)" ::"r"(&LONG_I32[0])); + asm volatile("vse32.v v12, (%0)" ::"r"(ALIGNED_I32)); + LVVCMP_U32(17, ALIGNED_I32, LONG_I32); +} + +void TEST_CASE18(void) { + reset_vec32(ALIGNED_I32); + VSET(256, e32, m2); + asm volatile("vle32.v v10, (%0)" ::"r"(&LONG_I32[0])); + asm volatile("vse32.v v10, (%0)" ::"r"(ALIGNED_I32)); + LVVCMP_U32(18, ALIGNED_I32, LONG_I32); +} + +void TEST_CASE19(void) { + reset_vec32(ALIGNED_I32); + VSET(200, e32, m2); + asm volatile("vle32.v v8, (%0)" ::"r"(&LONG_I32[0])); + asm volatile("vse32.v v8, (%0)" ::"r"(ALIGNED_I32)); + LVVCMP_U32(19, ALIGNED_I32, LONG_I32); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + printf("*****Running tests for vse32.v*****\n"); + TEST_CASE1(); + // TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + TEST_CASE8(); + TEST_CASE9(); + TEST_CASE10(); + TEST_CASE11(); + TEST_CASE12(); + // TEST_CASE13(); + TEST_CASE14(); + // TEST_CASE15(); + TEST_CASE16(); + TEST_CASE17(); + TEST_CASE18(); + TEST_CASE19(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse64.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse64.c new file mode 100644 index 000000000..da857b854 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse64.c @@ -0,0 +1,435 @@ +// TODO uncomment TEST_CASE12 and TEST_CASE 14 after issue of vl=0 and +// non-zero vstart is resolved +// TODO uncomment TEST_CASE2 after issue of exception is resolved +#include "long_array.h" +#include "vector_macros.h" + +#define AXI_DWIDTH 128 +void mtvec_handler(void) { + asm volatile("csrr t0, mcause"); // Read mcause + + // Read mepc + asm volatile("csrr t1, mepc"); + + // Increment return address by 4 + asm volatile("addi t1, t1, 4"); + asm volatile("csrw mepc, t1"); + + // Filter with mcause and handle here + + asm volatile("mret"); +} + +// Exception Handler for spike +void handle_trap(void) { + // Read mepc + asm volatile("csrr t1, mepc"); + + // Increment return address by 4 + asm volatile("addi t1, t1, 4"); + asm volatile("csrw mepc, t1"); + + asm volatile("ld ra, 8(sp)"); + asm volatile("ld sp, 16(sp)"); + asm volatile("ld gp, 24(sp)"); + asm volatile("ld tp, 32(sp)"); + asm volatile("ld t0, 40(sp)"); + asm volatile("ld t0, 40(sp)"); + asm volatile("ld t1, 48(sp)"); + asm volatile("ld t2, 56(sp)"); + asm volatile("ld s0, 64(sp)"); + asm volatile("ld s1, 72(sp)"); + asm volatile("ld a0, 80(sp)"); + asm volatile("ld a1, 88(sp)"); + asm volatile("ld a2, 96(sp)"); + asm volatile("ld a3, 104(sp)"); + asm volatile("ld a4, 112(sp)"); + asm volatile("ld a5, 120(sp)"); + asm volatile("ld a6, 128(sp)"); + asm volatile("ld a7, 136(sp)"); + asm volatile("ld s2, 144(sp)"); + asm volatile("ld s3, 152(sp)"); + asm volatile("ld s4, 160(sp)"); + asm volatile("ld s5, 168(sp)"); + asm volatile("ld s6, 176(sp)"); + asm volatile("ld s7, 184(sp)"); + asm volatile("ld s8, 192(sp)"); + asm volatile("ld s9, 200(sp)"); + asm volatile("ld s10, 208(sp)"); + asm volatile("ld s11, 216(sp)"); + asm volatile("ld t3, 224(sp)"); + asm volatile("ld t4, 232(sp)"); + asm volatile("ld t5, 240(sp)"); + asm volatile("ld t6, 248(sp)"); + + // Read mcause + asm volatile("csrr t3, mcause"); + + asm volatile("addi sp, sp, 272"); + + // Filter with mcause and handle here + + asm volatile("mret"); +} + +void reset_vec64(volatile uint64_t *vec) { + for (uint64_t i = 0; i < 1024; ++i) vec[i] = 0; +} + +static volatile uint64_t ALIGNED_I64[1024] __attribute__((aligned(AXI_DWIDTH))); + +//**********Checking functionality of vse64 with different destination +// registers********// +void TEST_CASE1(void) { + VSET(16, e64, m8); + VLOAD_64(v0, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989); + asm volatile("vse64.v v0, (%0)" ::"r"(ALIGNED_I64)); + VVCMP_U64(1, ALIGNED_I64, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, + 0xa11a9384a7163840, 0x99991348a9f38cd1, 0x9fa831c7a11a9384, + 0x3819759853987548, 0x1893179501093489, 0x81937598aa819388, + 0x1874754791888188, 0x3eeeeeeee33111ae, 0x9013930148815808, + 0xab8b914891484891, 0x9031850931584902, 0x3189759837598759, + 0x8319599991911111, 0x8913984898951989); +} + +//******Checking functionality of with illegal destination register +// specifier for EMUL********// +// In this test case EMUL=2 and register is v1 which will cause illegal +// instruction exception and set mcause = 2 +void TEST_CASE2(void) { + uint8_t mcause; + reset_vec64(ALIGNED_I64); + VSET(16, e64, m8); + VLOAD_64(v8, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989); + VSET(16, e64, m8); + asm volatile("vse64.v v8, (%0)" ::"r"(ALIGNED_I64)); + asm volatile("addi %[A], t3, 0" : [A] "=r"(mcause)); + XCMP(2, mcause, 2); +} + +//*******Checking functionality of vse16 with different values of masking +// register******// +void TEST_CASE3(void) { + reset_vec64(ALIGNED_I64); + VSET(16, e64, m8); + VLOAD_64(v16, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989); + VLOAD_8(v0, 0xFF, 0xFF); + asm volatile("vse64.v v16, (%0), v0.t" ::"r"(ALIGNED_I64)); + VCLEAR(v16); + VVCMP_U64(3, ALIGNED_I64, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, + 0xa11a9384a7163840, 0x99991348a9f38cd1, 0x9fa831c7a11a9384, + 0x3819759853987548, 0x1893179501093489, 0x81937598aa819388, + 0x1874754791888188, 0x3eeeeeeee33111ae, 0x9013930148815808, + 0xab8b914891484891, 0x9031850931584902, 0x3189759837598759, + 0x8319599991911111, 0x8913984898951989); +} + +void TEST_CASE4(void) { + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse64.v v16, (%0)" ::"r"(ALIGNED_I64)); + VCLEAR(v16); + VLOAD_64(v16, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989); + VLOAD_8(v0, 0x00, 0x00); + asm volatile("vse64.v v16, (%0), v0.t" ::"r"(ALIGNED_I64)); + VCLEAR(v16); + VVCMP_U64(4, ALIGNED_I64, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16); +} + +void TEST_CASE5(void) { + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse64.v v16, (%0)" ::"r"(ALIGNED_I64)); + VCLEAR(v16); + VLOAD_64(v16, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vse64.v v16, (%0), v0.t" ::"r"(ALIGNED_I64)); + VCLEAR(v16); + VVCMP_U64(5, ALIGNED_I64, 1, 0xf9aa71f0c394bbd3, 3, 0x99991348a9f38cd1, 5, + 0x3819759853987548, 7, 0x81937598aa819388, 9, 0x3eeeeeeee33111ae, + 11, 0xab8b914891484891, 13, 0x3189759837598759, 15, + 0x8913984898951989); +} + +//******Checking functionality with different combinations of vta and vma*****// +// **** It uses undisturbed policy for tail agnostic and mask agnostic****// +void TEST_CASE6(void) { + reset_vec64(ALIGNED_I64); + uint64_t avl; + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse64.v v8, (%0)" ::"r"(ALIGNED_I64)); + VCLEAR(v8); + VLOAD_8(v0, 0xAA, 0xAA); + VLOAD_64(v8, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989); + __asm__ volatile("vsetivli %[A], 12, e64, m1, ta, ma" : [A] "=r"(avl)); + asm volatile("vse64.v v8, (%0),v0.t" ::"r"(ALIGNED_I64)); + VCLEAR(v8); + VVCMP_U64(6, ALIGNED_I64, 1, 0xf9aa71f0c394bbd3, 3, 0x99991348a9f38cd1, 5, + 0x3819759853987548, 7, 0x81937598aa819388, 9, 0x3eeeeeeee33111ae, + 11, 0xab8b914891484891, 13, 14, 15, 16); +} + +void TEST_CASE7(void) { + reset_vec64(ALIGNED_I64); + uint64_t avl; + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse64.v v8, (%0)" ::"r"(ALIGNED_I64)); + VCLEAR(v8); + VLOAD_8(v0, 0xAA, 0xAA); + VLOAD_64(v8, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989); + __asm__ volatile("vsetivli %[A], 12, e64, m1, ta, mu" : [A] "=r"(avl)); + asm volatile("vse64.v v8, (%0), v0.t" ::"r"(ALIGNED_I64)); + VCLEAR(v8); + VVCMP_U64(7, ALIGNED_I64, 1, 0xf9aa71f0c394bbd3, 3, 0x99991348a9f38cd1, 5, + 0x3819759853987548, 7, 0x81937598aa819388, 9, 0x3eeeeeeee33111ae, + 11, 0xab8b914891484891, 13, 14, 15, 16); +} + +void TEST_CASE8(void) { + reset_vec64(ALIGNED_I64); + uint64_t avl; + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse64.v v8, (%0)" ::"r"(ALIGNED_I64)); + VCLEAR(v8); + VLOAD_8(v0, 0xAA, 0xAA); + VLOAD_64(v8, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989); + __asm__ volatile("vsetivli %[A], 12, e64, m1, tu, ma" : [A] "=r"(avl)); + asm volatile("vse64.v v8, (%0), v0.t" ::"r"(ALIGNED_I64)); + VCLEAR(v8); + VVCMP_U64(8, ALIGNED_I64, 1, 0xf9aa71f0c394bbd3, 3, 0x99991348a9f38cd1, 5, + 0x3819759853987548, 7, 0x81937598aa819388, 9, 0x3eeeeeeee33111ae, + 11, 0xab8b914891484891, 13, 14, 15, 16); +} + +void TEST_CASE9(void) { + reset_vec64(ALIGNED_I64); + uint64_t avl; + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse64.v v8, (%0)" ::"r"(ALIGNED_I64)); + VCLEAR(v8); + VLOAD_8(v0, 0xAA, 0xAA); + VLOAD_64(v8, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989); + __asm__ volatile("vsetivli %[A], 12, e16, m1, tu, mu" : [A] "=r"(avl)); + asm volatile("vse64.v v8, (%0), v0.t" ::"r"(ALIGNED_I64)); + VCLEAR(v8); + VVCMP_U64(9, ALIGNED_I64, 1, 0xf9aa71f0c394bbd3, 3, 0x99991348a9f38cd1, 5, + 0x3819759853987548, 7, 0x81937598aa819388, 9, 0x3eeeeeeee33111ae, + 11, 0xab8b914891484891, 13, 14, 15, 16); +} + +//*******Checking functionality if encoded EEW is not supported for given SEW +// and LMUL values because EMUL become out of range*****// +// This test case cover upper bound of EMUL(8). If LMUL is changed to +// m2 it will give error because emul become greater than 8 (EMUL = 16) +void TEST_CASE10(void) { + VSET(16, e64, m8); + VLOAD_64(v8, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989); + VSET(16, e8, m1); + asm volatile("vse64.v v8, (%0)" ::"r"(ALIGNED_I64)); + VCLEAR(v8); + VVCMP_U64(10, ALIGNED_I64, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, + 0xa11a9384a7163840, 0x99991348a9f38cd1, 0x9fa831c7a11a9384, + 0x3819759853987548, 0x1893179501093489, 0x81937598aa819388, + 0x1874754791888188, 0x3eeeeeeee33111ae, 0x9013930148815808, + 0xab8b914891484891, 0x9031850931584902, 0x3189759837598759, + 0x8319599991911111, 0x8913984898951989); +} + +//******Checking functionality with different values of vl******// +void TEST_CASE11(void) { + reset_vec64(ALIGNED_I64); + VSET(16, e64, m8); + VLOAD_64(v8, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989); + asm volatile("vse64.v v8, (%0)" ::"r"(ALIGNED_I64)); + VCLEAR(v8); + VVCMP_U64(11, ALIGNED_I64, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, + 0xa11a9384a7163840, 0x99991348a9f38cd1, 0x9fa831c7a11a9384, + 0x3819759853987548, 0x1893179501093489, 0x81937598aa819388, + 0x1874754791888188, 0x3eeeeeeee33111ae, 0x9013930148815808, + 0xab8b914891484891, 0x9031850931584902, 0x3189759837598759, + 0x8319599991911111, 0x8913984898951989); +} + +void TEST_CASE12(void) { + uint64_t avl; + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse64.v v6, (%0)" ::"r"(ALIGNED_I64)); + VCLEAR(v8); + VLOAD_64(v8, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989); + __asm__ volatile("vsetivli %[A], 0, e64, m1, tu, ma" : [A] "=r"(avl)); + asm volatile("vse64.v v8, (%0)" ::"r"(ALIGNED_I64)); + VSET(16, e64, m8); + VVCMP_U64(12, ALIGNED_I64, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16); +} + +void TEST_CASE13(void) { + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse64.v v8, (%0)" ::"r"(ALIGNED_I64)); + VCLEAR(v8); + VLOAD_64(v8, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989); + VSET(13, e64, m8); + asm volatile("vse64.v v8, (%0)" ::"r"(ALIGNED_I64)); + VSET(16, e64, m8); + VVCMP_U64(13, ALIGNED_I64, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, + 0xa11a9384a7163840, 0x99991348a9f38cd1, 0x9fa831c7a11a9384, + 0x3819759853987548, 0x1893179501093489, 0x81937598aa819388, + 0x1874754791888188, 0x3eeeeeeee33111ae, 0x9013930148815808, + 0xab8b914891484891, 0x9031850931584902, 14, 15, 16); +} + +//******Checking functionality with different vstart value*****// +void TEST_CASE14(void) { + reset_vec64(ALIGNED_I64); + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse64.v v8, (%0)" ::"r"(ALIGNED_I64)); + VCLEAR(v8); + VLOAD_64(v8, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989); + VSET(13, e64, m8); + write_csr(vstart, 2); + asm volatile("vse64.v v8, (%0)" ::"r"(ALIGNED_I64)); + VVCMP_U64(14, ALIGNED_I64, 1, 2, 0xa11a9384a7163840, 0x99991348a9f38cd1, + 0x9fa831c7a11a9384, 0x3819759853987548, 0x1893179501093489, + 0x81937598aa819388, 0x1874754791888188, 0x3eeeeeeee33111ae, + 0x9013930148815808, 0xab8b914891484891, 0x9031850931584902, 14, 15, + 16); +} + +//****Checking functionality with different values of EMUL and +// large number of elements *******// +void TEST_CASE15(void) { + reset_vec64(ALIGNED_I64); + VSET(512, e64, m8); + asm volatile("vle64.v v8, (%0)" ::"r"(&LONG_I64[0])); + asm volatile("vse64.v v8, (%0)" ::"r"(ALIGNED_I64)); + LVVCMP_U64(15, ALIGNED_I64, LONG_I64); +} + +void TEST_CASE16(void) { + reset_vec64(ALIGNED_I64); + VSET(256, e64, m4); + asm volatile("vle64.v v8, (%0)" ::"r"(&LONG_I64[0])); + asm volatile("vse64.v v8, (%0)" ::"r"(ALIGNED_I64)); + LVVCMP_U64(16, ALIGNED_I64, LONG_I64); +} + +void TEST_CASE17(void) { + reset_vec64(ALIGNED_I64); + VSET(128, e64, m2); + asm volatile("vle64.v v8, (%0)" ::"r"(&LONG_I64[0])); + asm volatile("vse64.v v8, (%0)" ::"r"(ALIGNED_I64)); + LVVCMP_U64(17, ALIGNED_I64, LONG_I64); +} + +void TEST_CASE18(void) { + reset_vec64(ALIGNED_I64); + VSET(100, e64, m2); + asm volatile("vle64.v v8, (%0)" ::"r"(&LONG_I64[0])); + asm volatile("vse64.v v8, (%0)" ::"r"(ALIGNED_I64)); + LVVCMP_U64(18, ALIGNED_I64, LONG_I64); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + printf("*****Running tests for vse64.v*****\n"); + TEST_CASE1(); + // TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + TEST_CASE8(); + TEST_CASE9(); + TEST_CASE10(); + TEST_CASE11(); + // TEST_CASE12(); + TEST_CASE13(); + // TEST_CASE14(); + TEST_CASE15(); + TEST_CASE16(); + TEST_CASE17(); + TEST_CASE18(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse8.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse8.c new file mode 100644 index 000000000..0d74bde5d --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse8.c @@ -0,0 +1,331 @@ +// TODO uncomment TEST_CASE12 and TEST_CASE 14 after issue of vl=0 and +// non-zero vstart is resolved +// TODO uncomment TEST_CASE2 after issue of exception is resolved +#include "long_array.h" +#include "vector_macros.h" + +#define AXI_DWIDTH 128 +void mtvec_handler(void) { + asm volatile("csrr t0, mcause"); // Read mcause + + // Read mepc + asm volatile("csrr t1, mepc"); + + // Increment return address by 4 + asm volatile("addi t1, t1, 4"); + asm volatile("csrw mepc, t1"); + + // Filter with mcause and handle here + + asm volatile("mret"); +} + +// Exception Handler for spike +void handle_trap(void) { + // Read mepc + asm volatile("csrr t1, mepc"); + + // Increment return address by 4 + asm volatile("addi t1, t1, 4"); + asm volatile("csrw mepc, t1"); + + asm volatile("ld ra, 8(sp)"); + asm volatile("ld sp, 16(sp)"); + asm volatile("ld gp, 24(sp)"); + asm volatile("ld tp, 32(sp)"); + asm volatile("ld t0, 40(sp)"); + asm volatile("ld t0, 40(sp)"); + asm volatile("ld t1, 48(sp)"); + asm volatile("ld t2, 56(sp)"); + asm volatile("ld s0, 64(sp)"); + asm volatile("ld s1, 72(sp)"); + asm volatile("ld a0, 80(sp)"); + asm volatile("ld a1, 88(sp)"); + asm volatile("ld a2, 96(sp)"); + asm volatile("ld a3, 104(sp)"); + asm volatile("ld a4, 112(sp)"); + asm volatile("ld a5, 120(sp)"); + asm volatile("ld a6, 128(sp)"); + asm volatile("ld a7, 136(sp)"); + asm volatile("ld s2, 144(sp)"); + asm volatile("ld s3, 152(sp)"); + asm volatile("ld s4, 160(sp)"); + asm volatile("ld s5, 168(sp)"); + asm volatile("ld s6, 176(sp)"); + asm volatile("ld s7, 184(sp)"); + asm volatile("ld s8, 192(sp)"); + asm volatile("ld s9, 200(sp)"); + asm volatile("ld s10, 208(sp)"); + asm volatile("ld s11, 216(sp)"); + asm volatile("ld t3, 224(sp)"); + asm volatile("ld t4, 232(sp)"); + asm volatile("ld t5, 240(sp)"); + asm volatile("ld t6, 248(sp)"); + + // Read mcause + asm volatile("csrr t3, mcause"); + + asm volatile("addi sp, sp, 272"); + + // Filter with mcause and handle here + + asm volatile("mret"); +} + +void reset_vec8(volatile uint8_t *vec) { + for (uint64_t i = 0; i < 1024; ++i) vec[i] = 0; +} + +static volatile uint8_t ALIGNED_I8[1024] __attribute__((aligned(AXI_DWIDTH))); + +//**********Checking functionality of vse8 ********// +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v0, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, 0x08, + 0x91, 0x02, 0x59, 0x11, 0x89); + asm volatile("vse8.v v0, (%0)" ::"r"(ALIGNED_I8)); + VVCMP_U8(1, ALIGNED_I8, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, + 0xae, 0x08, 0x91, 0x02, 0x59, 0x11, 0x89); +} + +//******Checking functionality of with illegal destination register +// specifier for EMUL********// +// In this test case EMUL=2 and register is v1 which will cause illegal +// instruction exception and set mcause = 2 +void TEST_CASE2(void) { + uint8_t mcause; + reset_vec8(ALIGNED_I8); + VSET(16, e8, m1); + VLOAD_8(v1, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, 0x08, + 0x91, 0x02, 0x59, 0x11, 0x89); + VSET(16, e16, m4); + asm volatile("vse8.v v1, (%0)" ::"r"(ALIGNED_I8)); + asm volatile("addi %[A], t3, 0" : [A] "=r"(mcause)); + XCMP(2, mcause, 2); +} + +//*******Checking functionality of vse8 with different values of masking +// register******// +void TEST_CASE3(void) { + reset_vec8(ALIGNED_I8); + VSET(16, e8, m1); + VLOAD_8(v3, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, 0x08, + 0x91, 0x02, 0x59, 0x11, 0x89); + VLOAD_8(v0, 0xFF, 0xFF); + asm volatile("vse8.v v3, (%0), v0.t" ::"r"(ALIGNED_I8)); + VCLEAR(v3); + VVCMP_U8(3, ALIGNED_I8, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, + 0xae, 0x08, 0x91, 0x02, 0x59, 0x11, 0x89); +} + +void TEST_CASE4(void) { + VSET(16, e8, m1); + VLOAD_8(v3, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse8.v v3, (%0)" ::"r"(ALIGNED_I8)); + VCLEAR(v3); + VLOAD_8(v3, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, 0x08, + 0x91, 0x02, 0x59, 0x11, 0x89); + VLOAD_8(v0, 0x00, 0x00); + asm volatile("vse8.v v3, (%0), v0.t" ::"r"(ALIGNED_I8)); + VCLEAR(v3); + VVCMP_U8(4, ALIGNED_I8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16); +} + +void TEST_CASE5(void) { + VSET(16, e8, m1); + VLOAD_8(v3, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse8.v v3, (%0)" ::"r"(ALIGNED_I8)); + VCLEAR(v3); + VLOAD_8(v3, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, 0x08, + 0x91, 0x02, 0x59, 0x11, 0x89); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vse8.v v3, (%0), v0.t" ::"r"(ALIGNED_I8)); + VCLEAR(v3); + VVCMP_U8(5, ALIGNED_I8, 1, 0xd3, 3, 0xd1, 5, 0x48, 7, 0x88, 9, 0xae, 11, 0x91, + 13, 0x59, 15, 0x89); +} + +//******Checking functionality with different combinations of vta and vma*****// +// **** It uses undisturbed policy for tail agnostic and mask agnostic****// +void TEST_CASE6(void) { + uint64_t avl; + VSET(16, e8, m1); + VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse8.v v4, (%0)" ::"r"(ALIGNED_I8)); + VCLEAR(v4); + VLOAD_8(v0, 0xAA, 0xAA); + VLOAD_8(v4, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, 0x08, + 0x91, 0x02, 0x59, 0x11, 0x89); + __asm__ volatile("vsetivli %[A], 12, e8, m1, ta, ma" : [A] "=r"(avl)); + asm volatile("vse8.v v4, (%0),v0.t" ::"r"(ALIGNED_I8)); + VCLEAR(v4); + VVCMP_U8(6, ALIGNED_I8, 1, 0xd3, 3, 0xd1, 5, 0x48, 7, 0x88, 9, 0xae, 11, 0x91, + 13, 14, 15, 16); +} + +void TEST_CASE7(void) { + reset_vec8(ALIGNED_I8); + uint64_t avl; + VSET(16, e8, m1); + VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse8.v v4, (%0)" ::"r"(ALIGNED_I8)); + VCLEAR(v4); + VLOAD_8(v0, 0xAA, 0xAA); + VLOAD_8(v4, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, 0x08, + 0x91, 0x02, 0x59, 0x11, 0x89); + __asm__ volatile("vsetivli %[A], 12, e8, m1, ta, mu" : [A] "=r"(avl)); + asm volatile("vse8.v v4, (%0), v0.t" ::"r"(ALIGNED_I8)); + VCLEAR(v4); + VVCMP_U8(7, ALIGNED_I8, 1, 0xd3, 3, 0xd1, 5, 0x48, 7, 0x88, 9, 0xae, 11, 0x91, + 13, 14, 15, 16); +} + +void TEST_CASE8(void) { + reset_vec8(ALIGNED_I8); + uint64_t avl; + VSET(16, e8, m1); + VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse8.v v4, (%0)" ::"r"(ALIGNED_I8)); + VCLEAR(v4); + VLOAD_8(v0, 0xAA, 0xAA); + VLOAD_8(v4, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, 0x08, + 0x91, 0x02, 0x59, 0x11, 0x89); + __asm__ volatile("vsetivli %[A], 12, e8, m1, tu, ma" : [A] "=r"(avl)); + asm volatile("vse8.v v4, (%0), v0.t" ::"r"(ALIGNED_I8)); + VCLEAR(v4); + VVCMP_U8(8, ALIGNED_I8, 1, 0xd3, 3, 0xd1, 5, 0x48, 7, 0x88, 9, 0xae, 11, 0x91, + 13, 14, 15, 16); +} + +void TEST_CASE9(void) { + reset_vec8(ALIGNED_I8); + uint64_t avl; + VSET(16, e8, m1); + VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse8.v v4, (%0)" ::"r"(ALIGNED_I8)); + VCLEAR(v4); + VLOAD_8(v0, 0xAA, 0xAA); + VLOAD_8(v4, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, 0x08, + 0x91, 0x02, 0x59, 0x11, 0x89); + __asm__ volatile("vsetivli %[A], 12, e8, m1, tu, mu" : [A] "=r"(avl)); + asm volatile("vse8.v v4, (%0), v0.t" ::"r"(ALIGNED_I8)); + VCLEAR(v4); + VVCMP_U8(9, ALIGNED_I8, 1, 0xd3, 3, 0xd1, 5, 0x48, 7, 0x88, 9, 0xae, 11, 0x91, + 13, 14, 15, 16); +} + +//*******Checking functionality if encoded EEW is not supported for given SEW +// and LMUL values because EMUL become out of range*****// +// This test case execute lower bound case of EMUL (1/8). If LMUL is changed to +// mf4 or mf8 it will give error because emul become out of range +void TEST_CASE10(void) { + reset_vec8(ALIGNED_I8); + VSET(16, e8, m1); + VLOAD_8(v5, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, 0x08, + 0x91, 0x02, 0x59, 0x11, 0x89); + VSET(2, e32, mf2); + asm volatile("vse8.v v5, (%0)" ::"r"(ALIGNED_I8)); + VCLEAR(v5); + VVCMP_U8(10, ALIGNED_I8, 0xe0, 0xd3); +} + +//******Checking functionality with different values of vl******// +void TEST_CASE11(void) { + reset_vec8(ALIGNED_I8); + VSET(16, e8, m1); + VLOAD_8(v6, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, 0x08, + 0x91, 0x02, 0x59, 0x11, 0x89); + asm volatile("vse8.v v6, (%0)" ::"r"(ALIGNED_I8)); + VCLEAR(v6); + VVCMP_U8(11, ALIGNED_I8, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, + 0xae, 0x08, 0x91, 0x02, 0x59, 0x11, 0x89); +} + +void TEST_CASE12(void) { + uint64_t avl; + VSET(16, e8, m1); + VLOAD_8(v6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse8.v v6, (%0)" ::"r"(ALIGNED_I8)); + VCLEAR(v6); + VLOAD_8(v6, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, 0x08, + 0x91, 0x02, 0x59, 0x11, 0x89); + __asm__ volatile("vsetivli %[A], 0, e8, m1, tu, ma" : [A] "=r"(avl)); + asm volatile("vse8.v v6, (%0)" ::"r"(ALIGNED_I8)); + VSET(16, e8, m1); + VVCMP_U8(12, ALIGNED_I8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16); +} + +void TEST_CASE13(void) { + VSET(16, e8, m1); + VLOAD_8(v6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse8.v v6, (%0)" ::"r"(ALIGNED_I8)); + VCLEAR(v6); + VLOAD_8(v6, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, 0x08, + 0x91, 0x02, 0x59, 0x11, 0x89); + VSET(13, e8, m1); + asm volatile("vse8.v v6, (%0)" ::"r"(ALIGNED_I8)); + VVCMP_U8(13, ALIGNED_I8, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, + 0xae, 0x08, 0x91, 0x02, 14, 15, 16); +} + +//******Checking functionality with different vstart value*****// +void TEST_CASE14(void) { + reset_vec8(ALIGNED_I8); + VSET(16, e8, m1); + VLOAD_8(v7, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse8.v v7, (%0)" ::"r"(ALIGNED_I8)); + VCLEAR(v7); + VLOAD_8(v7, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, 0x08, + 0x91, 0x02, 0x59, 0x11, 0x89); + VSET(13, e8, m1); + write_csr(vstart, 2); + asm volatile("vse8.v v7, (%0)" ::"r"(ALIGNED_I8)); + write_csr(vstart, 0); + VVCMP_U8(14, ALIGNED_I8, 1, 2, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, + 0x08, 0x91, 0x02, 14, 15, 16); +} + +//****Checking functionality with different values of EMUL and +// large number of elements *******// + +void TEST_CASE15(void) { + reset_vec8(ALIGNED_I8); + VSET(1024, e8, m2); + asm volatile("vle8.v v8, (%0)" ::"r"(&LONG_I8[0])); + asm volatile("vse8.v v8, (%0)" ::"r"(ALIGNED_I8)); + LVVCMP_U8(15, ALIGNED_I8, LONG_I8); +} + +void TEST_CASE16(void) { + reset_vec8(ALIGNED_I8); + VSET(800, e8, m2); + asm volatile("vle8.v v8, (%0)" ::"r"(&LONG_I8[0])); + asm volatile("vse8.v v8, (%0)" ::"r"(ALIGNED_I8)); + LVVCMP_U8(16, ALIGNED_I8, LONG_I8); +} +int main(void) { + INIT_CHECK(); + enable_vec(); + + printf("*****Running tests for vse8.v*****\n"); + TEST_CASE1(); + // TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + TEST_CASE8(); + TEST_CASE9(); + TEST_CASE10(); + TEST_CASE11(); + // TEST_CASE12(); + TEST_CASE13(); + // TEST_CASE14(); + TEST_CASE15(); + TEST_CASE16(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsetivli.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsetivli.c new file mode 100644 index 000000000..017feaacf --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsetivli.c @@ -0,0 +1,466 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "vector_macros.h" + +//***********LMUL = 1**********// +void TEST_CASE1(void) { + uint64_t avl, vtype, + vl; // Declaring avl,vtype and vl variables to pass for comparison + uint64_t vlmul = 0; // Setting value of vlmul + uint64_t vsew = 0; // Setting value of vsew + uint64_t vta = 1; // Setting value of vta + uint64_t vma = 1; // Setting value of vma + uint64_t golden_vtype; // Declaring variable to use as a reference value + vtype(golden_vtype, vlmul, vsew, vta, + vma); // Setting up reference variable golden_vtype by assigning + // different fields of configurations + __asm__ volatile("vsetivli %[A], 30, e8, m1, ta, ma" + : [A] "=r"(avl)); // Executing vsetivli instruction + read_vtype(vtype); // Reading vtype CSR + read_vl(vl); // Reading vl CSR + check_vtype_vl( + 1, vtype, golden_vtype, avl, vl, vsew, + vlmul); // Passsing actual values and reference values for comparison +} + +void TEST_CASE2(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 0; + uint64_t vsew = 1; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A], 20, e16, m1,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(2, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE3(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 0; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A],10, e32, m1,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(3, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE4(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 0; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A],16, e64, m1,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(4, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +///////////////////////////////////////////////////////////////////////////////// + +//***********LMUL = 2**********// +void TEST_CASE5(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 1; + uint64_t vsew = 0; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A],30, e8, m2,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(5, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE6(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 1; + uint64_t vsew = 1; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A],20, e16, m2,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(6, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE7(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 1; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A],10, e32, m2,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(7, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE8(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 1; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A],16, e64, m2,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(8, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +/////////////////////////////////////////////////////////////////////////// + +//***********LMUL = 4**********// + +void TEST_CASE9(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 2; + uint64_t vsew = 0; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A],30, e8, m4,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(9, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE10(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 2; + uint64_t vsew = 1; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A],20, e16, m4,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(10, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE11(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 2; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A],10, e32, m4,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(11, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE12(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 2; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A],10, e64, m4,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(12, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +///////////////////////////////////////////////////////////////////////////////// + +//***********LMUL = 8**********// + +void TEST_CASE13(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 3; + uint64_t vsew = 0; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A], 30, e8, m8,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(13, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE14(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 3; + uint64_t vsew = 1; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A], 20, e16, m8,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(14, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE15(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 3; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A],10, e32, m8,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(15, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE16(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 3; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A], 10, e64, m8,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(16, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +///////////////////////////////////////////////////////////////////////////////// + +//***********LMUL = 1/8**********// + +void TEST_CASE17(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 5; + uint64_t vsew = 0; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A], 10, e8, mf8,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(17, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE18(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 5; + uint64_t vsew = 1; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A], 10, e16,mf8,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(18, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE19(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 5; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A], 5, e32, mf8,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(19, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE20(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 5; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A],7, e64, mf8,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(20, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +/////////////////////////////////////////////////////////////////////////// + +//***********LMUL = 1/4**********// + +void TEST_CASE21(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 6; + uint64_t vsew = 0; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A], 10, e8, mf4,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(21, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE22(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 6; + uint64_t vsew = 1; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A], 10, e16, mf4,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(22, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE23(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 6; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A],5, e32, mf4,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(23, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE24(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 6; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A],15, e64, mf4,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(24, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +///////////////////////////////////////////////////////////////////////////////// + +//***********LMUL = 1/2**********// + +void TEST_CASE25(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 7; + uint64_t vsew = 0; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A],20, e8, mf2,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(25, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE26(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 7; + uint64_t vsew = 1; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A], 20, e16, mf2,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(26, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE27(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 7; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A], 20, e32, mf2,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(27, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE28(void) { + uint64_t avl, vtype, vl; + uint64_t vlmul = 7; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetivli %[A], 30, e64, mf2,ta,ma" : [A] "=r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(28, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + printf("************* Running Test for vsetivli *************\n"); + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + TEST_CASE8(); + TEST_CASE9(); + TEST_CASE10(); + TEST_CASE11(); + TEST_CASE12(); + TEST_CASE13(); + TEST_CASE14(); + TEST_CASE15(); + TEST_CASE16(); + TEST_CASE17(); + TEST_CASE18(); + TEST_CASE19(); + TEST_CASE20(); + TEST_CASE21(); + TEST_CASE22(); + TEST_CASE23(); + TEST_CASE24(); + TEST_CASE25(); + TEST_CASE26(); + TEST_CASE27(); + TEST_CASE28(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsetvl.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsetvl.c new file mode 100644 index 000000000..b238ecfc6 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsetvl.c @@ -0,0 +1,526 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include + +#include "vector_macros.h" +// Define VLEN before compiling me +#define VLEN 128 +//***********LMUL = 1**********// +void TEST_CASE1(void) { + uint64_t vtype, vl; // Setting avl and declaring vtype and vl + // variables to pass for comparison + uint64_t vlmul = 0; // Setting value of vlmul + uint64_t vsew = 0; // Setting value of vsew + uint64_t vta = 1; // Setting value of vta + uint64_t vma = 1; // Setting value of vma + uint64_t golden_vtype; // Declaring variable to use as a reference value + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, + vma); // Setting up reference variable golden_vtype by assigning + // different fields of configurations + __asm__ volatile("vsetvl t0, %[A], %[B]" ::[A] "r"(avl), + [B] "r"(golden_vtype)); // Executing vsetvl instruction + read_vtype(vtype); // Reading vtype CSR + read_vl(vl); // Reading vl CSR + check_vtype_vl( + 1, vtype, golden_vtype, avl, vl, vsew, + vlmul); // Passsing actual values and reference values for comparison +} + +void TEST_CASE2(void) { + uint64_t vtype, vl; + uint64_t vlmul = 0; + uint64_t vsew = 0; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(2, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE3(void) { + uint64_t vtype, vl; + uint64_t vlmul = 0; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(3, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE4(void) { + uint64_t vtype, vl; + uint64_t vlmul = 0; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(4, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +///////////////////////////////////////////////////////////////////////////////// + +//***********LMUL = 2**********// +void TEST_CASE5(void) { + uint64_t vtype, vl; + uint64_t vlmul = 1; + uint64_t vsew = 0; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(5, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE6(void) { + uint64_t vtype, vl; + uint64_t vlmul = 1; + uint64_t vsew = 1; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(6, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE7(void) { + uint64_t vtype, vl; + uint64_t vlmul = 1; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(7, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE8(void) { + uint64_t vtype, vl; + uint64_t vlmul = 1; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(8, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +/////////////////////////////////////////////////////////////////////////// + +//***********LMUL = 4**********// + +void TEST_CASE9(void) { + uint64_t vtype, vl; + uint64_t vlmul = 2; + uint64_t vsew = 0; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(9, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE10(void) { + uint64_t vtype, vl; + uint64_t vlmul = 2; + uint64_t vsew = 1; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(10, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE11(void) { + uint64_t vtype, vl; + uint64_t vlmul = 2; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(11, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE12(void) { + uint64_t vtype, vl; + uint64_t vlmul = 2; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(12, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +///////////////////////////////////////////////////////////////////////////////// + +//***********LMUL = 8**********// + +void TEST_CASE13(void) { + uint64_t vtype, vl; + uint64_t vlmul = 3; + uint64_t vsew = 0; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(13, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE14(void) { + uint64_t vtype, vl; + uint64_t vlmul = 3; + uint64_t vsew = 1; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(14, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE15(void) { + uint64_t vtype, vl; + uint64_t vlmul = 3; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(15, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE16(void) { + uint64_t vtype, vl; + uint64_t vlmul = 3; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(16, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +///////////////////////////////////////////////////////////////////////////////// + +//***********LMUL = 1/8**********// + +void TEST_CASE17(void) { + uint64_t vtype, vl; + uint64_t vlmul = 5; + uint64_t vsew = 0; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 8); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(17, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE18(void) { + uint64_t vtype, vl; + uint64_t vlmul = 5; + uint64_t vsew = 1; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 8); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(18, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE19(void) { + uint64_t vtype, vl; + uint64_t vlmul = 5; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 8); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(19, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE20(void) { + uint64_t vtype, vl; + uint64_t vlmul = 5; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 8); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(20, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +/////////////////////////////////////////////////////////////////////////// + +//***********LMUL = 1/4**********// + +void TEST_CASE21(void) { + uint64_t vtype, vl; + uint64_t vlmul = 6; + uint64_t vsew = 0; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + vtype(golden_vtype, vlmul, vsew, vta, vma); + uint64_t avl = ((VLEN / (8 << vsew)) / 4) - 1; + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(21, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE22(void) { + uint64_t vtype, vl; + uint64_t vlmul = 6; + uint64_t vsew = 1; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 4); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(22, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE23(void) { + uint64_t vtype, vl; + uint64_t vlmul = 6; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 4); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(23, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE24(void) { + uint64_t vtype, vl; + uint64_t vlmul = 6; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 4); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(24, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +///////////////////////////////////////////////////////////////////////////////// + +//***********LMUL = 1/2**********// + +void TEST_CASE25(void) { + uint64_t vtype, vl; + uint64_t vlmul = 7; + uint64_t vsew = 0; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 2) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(25, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE26(void) { + uint64_t vtype, vl; + uint64_t vlmul = 7; + uint64_t vsew = 1; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 2) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma) - 1; + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(26, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE27(void) { + uint64_t vtype, vl; + uint64_t vlmul = 7; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 2); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(27, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +void TEST_CASE28(void) { + uint64_t vtype, vl; + uint64_t vlmul = 7; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 2); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile( + "vsetvl t0, %[A], %[B]" ::[A] "r"(avl), [B] "r"(golden_vtype)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(28, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + printf("************* Running Test for vsetvl *************\n"); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + TEST_CASE8(); + TEST_CASE9(); + TEST_CASE10(); + TEST_CASE11(); + TEST_CASE12(); + TEST_CASE13(); + TEST_CASE14(); + TEST_CASE15(); + TEST_CASE16(); + TEST_CASE17(); + TEST_CASE18(); + TEST_CASE19(); + TEST_CASE20(); + TEST_CASE21(); + TEST_CASE22(); + TEST_CASE23(); + TEST_CASE24(); + TEST_CASE25(); + TEST_CASE26(); + TEST_CASE27(); + TEST_CASE28(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsetvli.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsetvli.c new file mode 100644 index 000000000..27689fcd8 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsetvli.c @@ -0,0 +1,528 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include + +#include "vector_macros.h" +// Define VLEN before compiling me +#define VLEN 128 + +//***********LMUL = 1**********// + +//****** SEW = 8 +void TEST_CASE1(void) { + uint64_t vtype, vl; // Setting avl and declaring vtype and vl + // variables to pass for comparison + uint64_t vlmul = 0; // Setting value of vlmul + uint64_t vsew = 0; // Setting value of vsew + uint64_t vta = 1; // Setting value of vta + uint64_t vma = 1; // Setting value of vma + uint64_t golden_vtype; // Declaring variable to use as a reference value + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, + vma); // Setting up reference variable golden_vtype by assigning + // different fields of configurations + __asm__ volatile("vsetvli t0, %[A], e8, m1,ta,ma" ::[A] "r"( + avl)); // Executing vsetvli instruction + read_vtype(vtype); // Reading vtype CSR + read_vl(vl); // Reading vl CSR + check_vtype_vl( + 1, vtype, golden_vtype, avl, vl, vsew, + vlmul); // Passsing actual values and reference values for comparison +} + +//****** SEW = 16 +void TEST_CASE2(void) { + uint64_t vtype, vl; + uint64_t vlmul = 0; + uint64_t vsew = 1; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e16, m1,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(2, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +//****** SEW = 32 +void TEST_CASE3(void) { + uint64_t vtype, vl; + uint64_t vlmul = 0; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e32, m1,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(3, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +//****** SEW = 64 +void TEST_CASE4(void) { + uint64_t vtype, vl; + uint64_t vlmul = 0; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e64, m1,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(4, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +///////////////////////////////////////////////////////////////////////////////// + +//***********LMUL = 2**********// +//****** SEW = 8 +void TEST_CASE5(void) { + uint64_t vtype, vl; + uint64_t vlmul = 1; + uint64_t vsew = 0; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e8, m2,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(5, vtype, golden_vtype, avl, vl, vsew, vlmul); +} +//****** SEW = 16 +void TEST_CASE6(void) { + uint64_t vtype, vl; + uint64_t vlmul = 1; + uint64_t vsew = 1; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e16, m2,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(6, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +//****** SEW = 32 +void TEST_CASE7(void) { + uint64_t vtype, vl; + uint64_t vlmul = 1; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e32, m2,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(7, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +//****** SEW = 64 +void TEST_CASE8(void) { + uint64_t vtype, vl; + uint64_t vlmul = 1; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e64, m2,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(8, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +/////////////////////////////////////////////////////////////////////////// + +//***********LMUL = 4**********// + +//****** SEW = 8 +void TEST_CASE9(void) { + uint64_t vtype, vl; + uint64_t vlmul = 2; + uint64_t vsew = 0; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e8, m4,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(9, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +//****** SEW = 16 +void TEST_CASE10(void) { + uint64_t vtype, vl; + uint64_t vlmul = 2; + uint64_t vsew = 1; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e16, m4,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(10, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +//****** SEW = 32 +void TEST_CASE11(void) { + uint64_t vtype, vl; + uint64_t vlmul = 2; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e32, m4,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(11, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +//****** SEW = 64 +void TEST_CASE12(void) { + uint64_t vtype, vl; + uint64_t vlmul = 2; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e64, m4,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(12, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +///////////////////////////////////////////////////////////////////////////////// + +//***********LMUL = 8**********// + +//****** SEW = 8 +void TEST_CASE13(void) { + uint64_t vtype, vl; + uint64_t vlmul = 3; + uint64_t vsew = 0; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e8, m8,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(13, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +//****** SEW = 16 +void TEST_CASE14(void) { + uint64_t vtype, vl; + uint64_t vlmul = 3; + uint64_t vsew = 1; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e16, m8,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(14, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +//****** SEW = 32 +void TEST_CASE15(void) { + uint64_t vtype, vl; + uint64_t vlmul = 3; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e32, m8,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(15, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +//****** SEW = 64 +void TEST_CASE16(void) { + uint64_t vtype, vl; + uint64_t vlmul = 3; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e64, m8,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(16, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +///////////////////////////////////////////////////////////////////////////////// + +//***********LMUL = 1/8**********// + +//****** SEW = 8 +void TEST_CASE17(void) { + uint64_t vtype, vl; + uint64_t vlmul = 5; + uint64_t vsew = 0; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 8); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e8, mf8,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(17, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +//****** SEW = 16 +void TEST_CASE18(void) { + uint64_t vtype, vl; + uint64_t vlmul = 5; + uint64_t vsew = 1; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 8); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e16,mf8,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(18, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +//****** SEW = 32 +void TEST_CASE19(void) { + uint64_t vtype, vl; + uint64_t vlmul = 5; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 8); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e32, mf8,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(19, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +//****** SEW = 64 +void TEST_CASE20(void) { + uint64_t vtype, vl; + uint64_t vlmul = 5; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 8); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e64, mf8,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(20, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +/////////////////////////////////////////////////////////////////////////// + +//***********LMUL = 1/4**********// + +//****** SEW = 8 +void TEST_CASE21(void) { + uint64_t vtype, vl; + uint64_t vlmul = 6; + uint64_t vsew = 0; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 4); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e8, mf4,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(21, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +//****** SEW = 16 +void TEST_CASE22(void) { + uint64_t vtype, vl; + uint64_t vlmul = 6; + uint64_t vsew = 1; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 4); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e16, mf4,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(22, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +//****** SEW = 32 +void TEST_CASE23(void) { + uint64_t vtype, vl; + uint64_t vlmul = 6; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 4); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e32, mf4,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(23, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +//****** SEW = 64 +void TEST_CASE24(void) { + uint64_t vtype, vl; + uint64_t vlmul = 6; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 4); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e64, mf4,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(24, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +///////////////////////////////////////////////////////////////////////////////// + +//***********LMUL = 1/2**********// + +//****** SEW = 8 +void TEST_CASE25(void) { + uint64_t vtype, vl; + uint64_t vlmul = 7; + uint64_t vsew = 0; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 2); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e8, mf2,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(25, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +//****** SEW = 16 +void TEST_CASE26(void) { + uint64_t vtype, vl; + uint64_t vlmul = 7; + uint64_t vsew = 1; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 2); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e16, mf2,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(26, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +//****** SEW = 32 +void TEST_CASE27(void) { + uint64_t vtype, vl; + uint64_t vlmul = 7; + uint64_t vsew = 2; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 2); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e32, mf2,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(27, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +//****** SEW = 64 +void TEST_CASE28(void) { + uint64_t vtype, vl; + uint64_t vlmul = 7; + uint64_t vsew = 3; + uint64_t vta = 1; + uint64_t vma = 1; + uint64_t golden_vtype; + uint64_t avl = ((VLEN / (8 << vsew)) / 2); + vtype(golden_vtype, vlmul, vsew, vta, vma); + __asm__ volatile("vsetvli t0, %[A], e64, mf2,ta,ma" ::[A] "r"(avl)); + read_vtype(vtype); + read_vl(vl); + check_vtype_vl(28, vtype, golden_vtype, avl, vl, vsew, vlmul); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + printf("************* Running Test for vsetvli *************\n"); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + TEST_CASE8(); + TEST_CASE9(); + TEST_CASE10(); + TEST_CASE11(); + TEST_CASE12(); + TEST_CASE13(); + TEST_CASE14(); + TEST_CASE15(); + TEST_CASE16(); + TEST_CASE17(); + TEST_CASE18(); + TEST_CASE19(); + TEST_CASE20(); + TEST_CASE21(); + TEST_CASE22(); + TEST_CASE23(); + TEST_CASE24(); + TEST_CASE25(); + TEST_CASE26(); + TEST_CASE27(); + TEST_CASE28(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsext.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsext.c new file mode 100644 index 000000000..75b584b65 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsext.c @@ -0,0 +1,106 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(8, e16, m1); + VLOAD_8(v1, 1, 2, -3, -4, 5, 6, -7, -8); + asm volatile("vsext.vf2 v2, v1"); + VCMP_U16(1, v2, 1, 2, -3, -4, 5, 6, -7, -8); + + VSET(16, e32, m1); + VLOAD_16(v1, 1, 2, -3, -4); + asm volatile("vsext.vf2 v0, v1"); + VCMP_U32(2, v0, 1, 2, -3, -4); + + VSET(16, e64, m1); + VLOAD_32(v1, 1, 2); + asm volatile("vsext.vf2 v0, v1"); + VCMP_U64(3, v0, 1, 2); +} + +void TEST_CASE2(void) { + VSET(16, e16, m1); + VLOAD_8(v1, 1, 2, -3, -4, 5, 6, -7, -8); + VLOAD_8(v0, 0xAA); + VCLEAR(v2); + asm volatile("vsext.vf2 v2, v1, v0.t"); + VCMP_U16(4, v2, 0, 2, 0, -4, 0, 6, 0, -8); + + VSET(16, e32, m1); + VLOAD_16(v1, 1, 2, -3, -4); + VLOAD_8(v0, 0x0A); + VCLEAR(v2); + asm volatile("vsext.vf2 v2, v1, v0.t"); + VCMP_U32(5, v2, 0, 2, 0, -4); + + VSET(16, e64, m1); + VLOAD_32(v1, 1, 2); + VLOAD_8(v0, 0x02); + VCLEAR(v2); + asm volatile("vsext.vf2 v2, v1, v0.t"); + VCMP_U64(6, v2, 0, 2); +} + +void TEST_CASE3(void) { + VSET(16, e32, m1); + VLOAD_8(v1, 1, 2, -3, -4); + asm volatile("vsext.vf4 v2, v1"); + VCMP_U32(7, v2, 1, 2, -3, -4); + + VSET(8, e64, m1); + VLOAD_16(v1, 1, 2); + asm volatile("vsext.vf4 v2, v1"); + VCMP_U64(8, v2, 1, 2); +} + +void TEST_CASE4(void) { + VSET(16, e32, m1); + VLOAD_8(v1, 1, 2, -3, -4); + VLOAD_8(v0, 0x0A); + VCLEAR(v2); + asm volatile("vsext.vf4 v2, v1, v0.t"); + VCMP_U32(9, v2, 0, 2, 0, -4); + + VSET(16, e64, m1); + VLOAD_16(v1, 1, 2); + VLOAD_8(v0, 0x02); + VCLEAR(v2); + asm volatile("vsext.vf4 v2, v1, v0.t"); + VCMP_U64(10, v2, 0, 2); +} + +void TEST_CASE5(void) { + VSET(16, e64, m1); + VLOAD_8(v1, 1, 2); + asm volatile("vsext.vf8 v2, v1"); + VCMP_U64(11, v2, 1, 2); +} + +void TEST_CASE6(void) { + VSET(16, e64, m1); + VLOAD_8(v1, 1, 2); + VLOAD_8(v0, 0x02); + VCLEAR(v2); + asm volatile("vsext.vf8 v2, v1, v0.t"); + VCMP_U64(12, v2, 0, 2); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vslide1down.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vslide1down.c new file mode 100644 index 000000000..743640abc --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vslide1down.c @@ -0,0 +1,101 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + uint64_t scalar = 99; + + VSET(32, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslide1down.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U8(1, v1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 99); + + VSET(32, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_16(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslide1down.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VCMP_U16(2, v2, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 99); + + VSET(32, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_32(v4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslide1down.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U32(3, v4, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 99); + + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(9, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9); + VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslide1down.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U64(4, v8, 2, 3, 4, 5, 6, 7, 8, 9, 99); +} + +void TEST_CASE2() { + uint64_t scalar = 99; + + VSET(32, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vslide1down.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(5, v1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15, -1, 99); + + VSET(32, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_16(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0x55, 0x55); + asm volatile("vslide1down.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(6, v2, 2, -1, 4, -1, 6, -1, 8, -1, 10, -1, 12, -1, 14, -1, 16, -1); + + VSET(32, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_32(v4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vslide1down.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(7, v4, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15, -1, 99); + + VSET(32, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0x55, 0x55); + asm volatile("vslide1down.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(8, v8, 2, -1, 4, -1, 6, -1, 8, -1, 10, -1, 12, -1, 14, -1, 16, -1); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vslide1up.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vslide1up.c new file mode 100644 index 000000000..029a17850 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vslide1up.c @@ -0,0 +1,78 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + uint64_t scalar = 99; + + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslide1up.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U8(1, v1, 99, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_16(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslide1up.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VCMP_U16(2, v2, 99, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_32(v4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslide1up.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U32(3, v4, 99, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslide1up.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U64(4, v8, 99, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); +} + +void TEST_CASE2() { + uint64_t scalar = 99; + + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vslide1up.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(5, v1, -1, 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_16(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0x55, 0x55); + asm volatile("vslide1up.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(6, v2, 99, -1, 2, -1, 4, -1, 6, -1, 8, -1, 10, -1, 12, -1, 14, -1); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_32(v4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vslide1up.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(7, v4, -1, 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15); + + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0x55, 0x55); + asm volatile("vslide1up.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(8, v8, 99, -1, 2, -1, 4, -1, 6, -1, 8, -1, 10, -1, 12, -1, 14, -1); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vslidedown.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vslidedown.c new file mode 100644 index 000000000..8d1d4a1e7 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vslidedown.c @@ -0,0 +1,164 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(9, e8, m1); + VLOAD_8(v1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslidedown.vi v1, v2, 3"); + VCMP_U8(1, v1, 4, 5, 6, 7, 8, 9, 10, 11, 12); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(9, e16, m2); + VLOAD_16(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslidedown.vi v2, v4, 4"); + VCMP_U16(2, v2, 5, 6, 7, 8, 9, 10, 11, 12, 13); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(9, e32, m4); + VLOAD_32(v4, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslidedown.vi v4, v8, 5"); + VCMP_U32(3, v4, 6, 7, 8, 9, 10, 11, 12, 13, 14); + + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(9, e64, m8); + VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslidedown.vi v8, v16, 6"); + VCMP_U64(4, v8, 7, 8, 9, 10, 11, 12, 13, 14, 15); +} + +void TEST_CASE2() { + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(9, e8, m1); + VLOAD_8(v1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vslidedown.vi v1, v2, 3, v0.t"); + VCMP_U8(5, v1, -1, 5, -1, 7, -1, 9, -1, 11, -1); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(9, e16, m2); + VLOAD_16(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslidedown.vi v2, v4, 4, v0.t"); + VCMP_U16(6, v2, -1, 6, -1, 8, -1, 10, -1, 12, -1); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(9, e32, m4); + VLOAD_32(v4, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslidedown.vi v4, v8, 5, v0.t"); + VCMP_U32(7, v4, -1, 7, -1, 9, -1, 11, -1, 13, -1); + + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(9, e64, m8); + VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslidedown.vi v8, v16, 6, v0.t"); + VCMP_U64(8, v8, -1, 8, -1, 10, -1, 12, -1, 14, -1); +} + +void TEST_CASE3() { + uint64_t scalar = 3; + + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(9, e8, m1); + VLOAD_8(v1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslidedown.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U8(9, v1, 4, 5, 6, 7, 8, 9, 10, 11, 12); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(9, e16, m2); + VLOAD_16(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslidedown.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VCMP_U16(10, v2, 4, 5, 6, 7, 8, 9, 10, 11, 12); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(9, e32, m4); + VLOAD_32(v4, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslidedown.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U32(11, v4, 4, 5, 6, 7, 8, 9, 10, 11, 12); + + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + VSET(9, e64, m8); + VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslidedown.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U64(12, v8, 4, 5, 6, 7, 8, 9, 10, 11, 12); +} + +void TEST_CASE4() { + uint64_t scalar = 3; + + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(9, e8, m1); + VLOAD_8(v1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vslidedown.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(13, v1, -1, 5, -1, 7, -1, 9, -1, 11, -1); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(9, e16, m2); + VLOAD_16(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vslidedown.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(14, v2, -1, 5, -1, 7, -1, 9, -1, 11, -1); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(9, e32, m4); + VLOAD_32(v4, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vslidedown.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(15, v4, -1, 5, -1, 7, -1, 9, -1, 11, -1); + + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VSET(9, e64, m8); + VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vslidedown.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(16, v8, -1, 5, -1, 7, -1, 9, -1, 11, -1); +} + +// Corner case: NrLanes divides vl, but the stride requires the operand +// requester to request an additional 64-bit packet per lane, and not only an +// additional 32-bit element per lane. Otherwise, it gets stuck +void TEST_CASE5() { + VSET(32, e32, m8); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + VSET(9, e32, m8); + VLOAD_32(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslidedown.vi v8, v16, 7"); + VCMP_U32(17, v8, 8, 9, 10, 11, 12, 13, 14, 15, 16); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vslideup.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vslideup.c new file mode 100644 index 000000000..a33aae101 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vslideup.c @@ -0,0 +1,166 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslideup.vi v1, v2, 3"); + VCMP_U8(1, v1, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_16(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslideup.vi v2, v4, 4"); + VCMP_U16(2, v2, -1, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_32(v4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslideup.vi v4, v8, 5"); + VCMP_U32(3, v4, -1, -1, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); + + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslideup.vi v8, v16, 6"); + VCMP_U64(4, v8, -1, -1, -1, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +} + +void TEST_CASE2() { + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vslideup.vi v1, v2, 3, v0.t"); + VCMP_U8(5, v1, -1, -1, -1, 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_16(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslideup.vi v2, v4, 4, v0.t"); + VCMP_U16(6, v2, -1, -1, -1, -1, -1, 2, -1, 4, -1, 6, -1, 8, -1, 10, -1, 12); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_32(v4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslideup.vi v4, v8, 5, v0.t"); + VCMP_U32(7, v4, -1, -1, -1, -1, -1, 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11); + + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslideup.vi v8, v16, 6, v0.t"); + VCMP_U64(8, v8, -1, -1, -1, -1, -1, -1, -1, 2, -1, 4, -1, 6, -1, 8, -1, 10); +} + +void TEST_CASE3() { + uint64_t scalar = 3; + + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslideup.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U8(9, v1, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_16(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslideup.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VCMP_U16(10, v2, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_32(v4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslideup.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U32(11, v4, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13); + + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslideup.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U64(12, v8, -1, -1, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13); +} + +void TEST_CASE4() { + uint64_t scalar = 3; + + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vslideup.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(13, v1, -1, -1, -1, 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_16(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vslideup.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(14, v2, -1, -1, -1, 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_32(v4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vslideup.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(15, v4, -1, -1, -1, 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13); + + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0x55, 0x55); + asm volatile("vslideup.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(16, v8, -1, -1, -1, 1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13); +} + +// Stress the masked VSLIDEUP to enforce that the used mask bit indices should +// follow the output vector element indices and not the input ones +void TEST_CASE5() { + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + VLOAD_8(v0, 0xAA, 0x80); + asm volatile("vslideup.vi v1, v2, 3, v0.t"); + VCMP_U8(17, v1, -1, -1, -1, 1, -1, 3, -1, 5, -1, -1, -1, -1, -1, -1, -1, 13); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_16(v2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslideup.vi v2, v4, 4, v0.t"); + VCMP_U16(18, v2, -1, -1, -1, -1, -1, 2, -1, 4, -1, -1, -1, -1, -1, -1, -1, + 12); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_32(v4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslideup.vi v4, v8, 5, v0.t"); + VCMP_U32(19, v4, -1, -1, -1, -1, -1, 1, -1, 3, -1, -1, -1, -1, -1, -1, -1, + 11); + + VSET(16, e64, m8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); + asm volatile("vslideup.vi v8, v16, 6, v0.t"); + VCMP_U64(20, v8, -1, -1, -1, -1, -1, -1, -1, 2, -1, -1, -1, -1, -1, -1, -1, + 10); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + // TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsll.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsll.c new file mode 100644 index 000000000..5ff5e8f20 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsll.c @@ -0,0 +1,316 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01); + VLOAD_8(v3, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vsll.vv v4, v2, v3"); + VCMP_U8(1, v4, 0x01, 0x02, 0x04, 0x08, 0x80, 0x80, 0x80, 0x01, 0x01, 0x02, + 0x04, 0x08, 0x80, 0x80, 0x80, 0x01); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, + 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001); + VLOAD_16(v4, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vsll.vv v6, v2, v4"); + VCMP_U16(2, v6, 0x0001, 0x0002, 0x0004, 0x0008, 0x0080, 0x8000, 0x8000, + 0x0001, 0x0001, 0x0002, 0x0004, 0x0008, 0x0080, 0x8000, 0x8000, + 0x0001); + + VSET(16, e32, m4); + VLOAD_32(v4, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, + 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, + 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, + 0x00000001); + VLOAD_32(v8, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vsll.vv v12, v4, v8"); + VCMP_U32(3, v12, 0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000080, + 0x00008000, 0x80000000, 0x00000001, 0x00000001, 0x00000002, + 0x00000004, 0x00000008, 0x00000080, 0x00008000, 0x80000000, + 0x00000001); + + VSET(16, e64, m8); + VLOAD_64(v8, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, + 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, + 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, + 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, + 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, + 0x0000000000000001); + VLOAD_64(v16, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vsll.vv v24, v8, v16"); + VCMP_U64(4, v24, 0x000000000000001, 0x0000000000000002, 0x0000000000000004, + 0x0000000000000008, 0x0000000000000080, 0x0000000000008000, + 0x0000000080000000, 0x0000000100000000, 0x0000000000000001, + 0x0000000000000002, 0x0000000000000004, 0x0000000000000008, + 0x0000000000000080, 0x0000000000008000, 0x0000000080000000, + 0x0000000100000000); +}; + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01); + VLOAD_8(v3, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vsll.vv v4, v2, v3, v0.t"); + VCMP_U8(5, v4, 0x00, 0x02, 0x00, 0x08, 0x00, 0x80, 0x00, 0x01, 0x00, 0x02, + 0x00, 0x08, 0x00, 0x80, 0x00, 0x01); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, + 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001); + VLOAD_16(v4, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vsll.vv v8, v2, v4, v0.t"); + VCMP_U16(6, v8, 0x0000, 0x0002, 0x0000, 0x0008, 0x0000, 0x8000, 0x0000, + 0x0001, 0x0000, 0x0002, 0x0000, 0x0008, 0x0000, 0x8000, 0x0000, + 0x0001); + + VSET(16, e32, m4); + VLOAD_32(v4, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, + 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, + 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, + 0x00000001); + VLOAD_32(v8, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + asm volatile("vsll.vv v12, v4, v8, v0.t"); + VCMP_U32(7, v12, 0x00000000, 0x00000002, 0x00000000, 0x00000008, 0x00000000, + 0x00008000, 0x00000000, 0x00000001, 0x00000000, 0x00000002, + 0x00000000, 0x00000008, 0x00000000, 0x00008000, 0x00000000, + 0x00000001); + + VSET(16, e64, m8); + VLOAD_64(v8, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, + 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, + 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, + 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, + 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, + 0x0000000000000001); + VLOAD_64(v16, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + asm volatile("vsll.vv v24, v8, v16, v0.t"); + VCMP_U64(8, v24, 0x000000000000000, 0x0000000000000002, 0x0000000000000000, + 0x0000000000000008, 0x0000000000000000, 0x0000000000008000, + 0x0000000000000000, 0x0000000100000000, 0x0000000000000000, + 0x0000000000000002, 0x0000000000000000, 0x0000000000000008, + 0x0000000000000000, 0x0000000000008000, 0x0000000000000000, + 0x0000000100000000); +}; + +void TEST_CASE3(void) { + const uint64_t scalar = 2; + + VSET(16, e8, m1); + VLOAD_8(v2, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0xF8, 0xF9, 0xFA, + 0xFB, 0xFC, 0xFD, 0xFE, 0xFF); + asm volatile("vsll.vx v4, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U8(9, v4, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C, 0x20, 0xE0, 0xE4, + 0xE8, 0xEC, 0xF0, 0xF4, 0xF8, 0xFC); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, + 0xFFF8, 0xFFF9, 0xFFFA, 0xFFFB, 0xFFFC, 0xFFFD, 0xFFFE, 0x00FF); + asm volatile("vsll.vx v4, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U16(10, v4, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, + 0x0020, 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, + 0x03FC); + + VSET(16, e32, m4); + VLOAD_32(v4, 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, + 0x00000006, 0x00000007, 0x00000008, 0xFFFFFFF8, 0xFFFFFFF9, + 0xFFFFFFFA, 0xFFFFFFFB, 0xFFFFFFFC, 0xFFFFFFFD, 0xFFFFFFFE, + 0x000000FF); + asm volatile("vsll.vx v8, v4, %[A]" ::[A] "r"(scalar)); + VCMP_U32(11, v8, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, + 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, + 0x000003FC); + + VSET(16, e64, m8); + VLOAD_64(v8, 0x0000000000000001, 0x0000000000000002, 0x0000000000000003, + 0x0000000000000004, 0x0000000000000005, 0x0000000000000006, + 0x0000000000000007, 0x0000000000000008, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFF9, 0xFFFFFFFFFFFFFFFA, 0xFFFFFFFFFFFFFFFB, + 0xFFFFFFFFFFFFFFFC, 0xFFFFFFFFFFFFFFFD, 0xFFFFFFFFFFFFFFFE, + 0x00000000000000FF); + asm volatile("vsll.vx v16, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U64(12, v16, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, + 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, + 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, + 0xFFFFFFFFFFFFFFF0, 0xFFFFFFFFFFFFFFF4, 0xFFFFFFFFFFFFFFF8, + 0x00000000000003FC); +}; + +void TEST_CASE4(void) { + const uint64_t scalar = 2; + + VSET(16, e8, m1); + VLOAD_8(v2, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0xF8, 0xF9, 0xFA, + 0xFB, 0xFC, 0xFD, 0xFE, 0xFF); + VCLEAR(v4); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vsll.vx v4, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(13, v4, 0x00, 0x08, 0x00, 0x10, 0x00, 0x18, 0x00, 0x20, 0x00, 0xE4, + 0x00, 0xEC, 0x00, 0xF4, 0x00, 0xFC); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, + 0xFFF8, 0xFFF9, 0xFFFA, 0xFFFB, 0xFFFC, 0xFFFD, 0xFFFE, 0x00FF); + VCLEAR(v4); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vsll.vx v4, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(14, v4, 0x0000, 0x0008, 0x0000, 0x0010, 0x0000, 0x0018, 0x0000, + 0x0020, 0x0000, 0xFFE4, 0x0000, 0xFFEC, 0x0000, 0xFFF4, 0x0000, + 0x03FC); + + VSET(16, e32, m4); + VLOAD_32(v4, 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, + 0x00000006, 0x00000007, 0x00000008, 0xFFFFFFF8, 0xFFFFFFF9, + 0xFFFFFFFA, 0xFFFFFFFB, 0xFFFFFFFC, 0xFFFFFFFD, 0xFFFFFFFE, + 0x000000FF); + VCLEAR(v8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vsll.vx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(15, v8, 0x00000000, 0x00000008, 0x00000000, 0x00000010, 0x00000000, + 0x00000018, 0x00000000, 0x00000020, 0x00000000, 0xFFFFFFE4, + 0x00000000, 0xFFFFFFEC, 0x00000000, 0xFFFFFFF4, 0x00000000, + 0x000003FC); + + VSET(16, e64, m8); + VLOAD_64(v8, 0x0000000000000001, 0x0000000000000002, 0x0000000000000003, + 0x0000000000000004, 0x0000000000000005, 0x0000000000000006, + 0x0000000000000007, 0x0000000000000008, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFF9, 0xFFFFFFFFFFFFFFFA, 0xFFFFFFFFFFFFFFFB, + 0xFFFFFFFFFFFFFFFC, 0xFFFFFFFFFFFFFFFD, 0xFFFFFFFFFFFFFFFE, + 0x00000000000000FF); + VCLEAR(v16); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vsll.vx v16, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(16, v16, 0x0000000000000000, 0x0000000000000008, 0x0000000000000000, + 0x0000000000000010, 0x0000000000000000, 0x0000000000000018, + 0x0000000000000000, 0x0000000000000020, 0x0000000000000000, + 0xFFFFFFFFFFFFFFE4, 0x0000000000000000, 0xFFFFFFFFFFFFFFEC, + 0x0000000000000000, 0xFFFFFFFFFFFFFFF4, 0x0000000000000000, + 0x00000000000003FC); +}; + +void TEST_CASE5(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0xF8, 0xF9, 0xFA, + 0xFB, 0xFC, 0xFD, 0xFE, 0xFF); + asm volatile("vsll.vi v4, v2, 2"); + VCMP_U8(17, v4, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C, 0x20, 0xE0, 0xE4, + 0xE8, 0xEC, 0xF0, 0xF4, 0xF8, 0xFC); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, + 0xFFF8, 0xFFF9, 0xFFFA, 0xFFFB, 0xFFFC, 0xFFFD, 0xFFFE, 0x00FF); + asm volatile("vsll.vi v4, v2, 2"); + VCMP_U16(18, v4, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, + 0x0020, 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, + 0x03FC); + + VSET(16, e32, m4); + VLOAD_32(v4, 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, + 0x00000006, 0x00000007, 0x00000008, 0xFFFFFFF8, 0xFFFFFFF9, + 0xFFFFFFFA, 0xFFFFFFFB, 0xFFFFFFFC, 0xFFFFFFFD, 0xFFFFFFFE, + 0x000000FF); + asm volatile("vsll.vi v8, v4, 2"); + VCMP_U32(19, v8, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, + 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, + 0x000003FC); + + VSET(16, e64, m8); + VLOAD_64(v8, 0x0000000000000001, 0x0000000000000002, 0x0000000000000003, + 0x0000000000000004, 0x0000000000000005, 0x0000000000000006, + 0x0000000000000007, 0x0000000000000008, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFF9, 0xFFFFFFFFFFFFFFFA, 0xFFFFFFFFFFFFFFFB, + 0xFFFFFFFFFFFFFFFC, 0xFFFFFFFFFFFFFFFD, 0xFFFFFFFFFFFFFFFE, + 0x00000000000000FF); + asm volatile("vsll.vi v16, v8, 2"); + VCMP_U64(20, v16, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, + 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, + 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, + 0xFFFFFFFFFFFFFFF0, 0xFFFFFFFFFFFFFFF4, 0xFFFFFFFFFFFFFFF8, + 0x00000000000003FC); +}; + +void TEST_CASE6(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0xF8, 0xF9, 0xFA, + 0xFB, 0xFC, 0xFD, 0xFE, 0xFF); + VCLEAR(v4); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vsll.vi v4, v2, 2, v0.t"); + VCMP_U8(21, v4, 0x00, 0x08, 0x00, 0x10, 0x00, 0x18, 0x00, 0x20, 0x00, 0xE4, + 0x00, 0xEC, 0x00, 0xF4, 0x00, 0xFC); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, + 0xFFF8, 0xFFF9, 0xFFFA, 0xFFFB, 0xFFFC, 0xFFFD, 0xFFFE, 0x00FF); + VCLEAR(v4); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vsll.vi v4, v2, 2, v0.t"); + VCMP_U16(22, v4, 0x0000, 0x0008, 0x0000, 0x0010, 0x0000, 0x0018, 0x0000, + 0x0020, 0x0000, 0xFFE4, 0x0000, 0xFFEC, 0x0000, 0xFFF4, 0x0000, + 0x03FC); + + VSET(16, e32, m4); + VLOAD_32(v4, 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, + 0x00000006, 0x00000007, 0x00000008, 0xFFFFFFF8, 0xFFFFFFF9, + 0xFFFFFFFA, 0xFFFFFFFB, 0xFFFFFFFC, 0xFFFFFFFD, 0xFFFFFFFE, + 0x000000FF); + VCLEAR(v8); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vsll.vi v8, v4, 2, v0.t"); + VCMP_U32(23, v8, 0x00000000, 0x00000008, 0x00000000, 0x00000010, 0x00000000, + 0x00000018, 0x00000000, 0x00000020, 0x00000000, 0xFFFFFFE4, + 0x00000000, 0xFFFFFFEC, 0x00000000, 0xFFFFFFF4, 0x00000000, + 0x000003FC); + + VSET(16, e64, m8); + VLOAD_64(v8, 0x0000000000000001, 0x0000000000000002, 0x0000000000000003, + 0x0000000000000004, 0x0000000000000005, 0x0000000000000006, + 0x0000000000000007, 0x0000000000000008, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFF9, 0xFFFFFFFFFFFFFFFA, 0xFFFFFFFFFFFFFFFB, + 0xFFFFFFFFFFFFFFFC, 0xFFFFFFFFFFFFFFFD, 0xFFFFFFFFFFFFFFFE, + 0x00000000000000FF); + VCLEAR(v16); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vsll.vi v16, v8, 2, v0.t"); + VCMP_U64(24, v16, 0x0000000000000000, 0x0000000000000008, 0x0000000000000000, + 0x0000000000000010, 0x0000000000000000, 0x0000000000000018, + 0x0000000000000000, 0x0000000000000020, 0x0000000000000000, + 0xFFFFFFFFFFFFFFE4, 0x0000000000000000, 0xFFFFFFFFFFFFFFEC, + 0x0000000000000000, 0xFFFFFFFFFFFFFFF4, 0x0000000000000000, + 0x00000000000003FC); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsmul.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsmul.c new file mode 100644 index 000000000..0b97f2d4e --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsmul.c @@ -0,0 +1,59 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(3, e8, m1); + VLOAD_8(v2, 127, 127, -50); + VLOAD_8(v3, 127, 10, 127); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vsmul.vv v1, v2, v3"); + VCMP_I8(1, v1, 126, 9, -50); +} + +void TEST_CASE2() { + VSET(3, e8, m1); + VLOAD_8(v2, 127, 127, -50); + VLOAD_8(v3, 127, 10, 127); + VLOAD_8(v0, 5, 0, 0); + VCLEAR(v1); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vsmul.vv v1, v2, v3, v0.t"); + VCMP_I8(2, v1, 126, 0, -50); +} + +void TEST_CASE3() { + VSET(3, e8, m1); + VLOAD_8(v2, 127, 63, -50); + int8_t scalar = 55; + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vsmul.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_I8(3, v1, 54, 27, -22); +} + +void TEST_CASE4() { + VSET(3, e8, m1); + VLOAD_8(v2, 127, 127, -50); + int8_t scalar = 55; + VLOAD_8(v0, 5, 0, 0); + VCLEAR(v1); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vsmul.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I8(4, v1, 54, 0, -22); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + EXIT_CHECK(); +} \ No newline at end of file diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsra.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsra.c new file mode 100644 index 000000000..8cfcdab47 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsra.c @@ -0,0 +1,316 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80); + VLOAD_8(v3, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vsra.vv v4, v2, v3"); + VCMP_U8(1, v4, 0x80, 0xC0, 0xE0, 0xF0, 0xFF, 0xFF, 0xFF, 0x80, 0x80, 0xC0, + 0xE0, 0xF0, 0xFF, 0xFF, 0xFF, 0x80); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000); + VLOAD_16(v4, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vsra.vv v6, v2, v4"); + VCMP_U16(2, v6, 0x8000, 0xC000, 0xE000, 0xF000, 0xFF00, 0xFFFF, 0xFFFF, + 0x8000, 0x8000, 0xC000, 0xE000, 0xF000, 0xFF00, 0xFFFF, 0xFFFF, + 0x8000); + + VSET(16, e32, m4); + VLOAD_32(v4, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000); + VLOAD_32(v8, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vsra.vv v12, v4, v8"); + VCMP_U32(3, v12, 0x80000000, 0xC0000000, 0xE0000000, 0xF0000000, 0xFF000000, + 0xFFFF0000, 0xFFFFFFFF, 0x80000000, 0x80000000, 0xC0000000, + 0xE0000000, 0xF0000000, 0xFF000000, 0xFFFF0000, 0xFFFFFFFF, + 0x80000000); + + VSET(16, e64, m8); + VLOAD_64(v8, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + 0x8000000000000000); + VLOAD_64(v16, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vsra.vv v24, v8, v16"); + VCMP_U64(4, v24, 0x8000000000000000, 0xC000000000000000, 0xE000000000000000, + 0xF000000000000000, 0xFF00000000000000, 0xFFFF000000000000, + 0xFFFFFFFF00000000, 0xFFFFFFFF80000000, 0x8000000000000000, + 0xC000000000000000, 0xE000000000000000, 0xF000000000000000, + 0xFF00000000000000, 0xFFFF000000000000, 0xFFFFFFFF00000000, + 0xFFFFFFFF80000000); +}; + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80); + VLOAD_8(v3, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vsra.vv v4, v2, v3, v0.t"); + VCMP_U8(5, v4, 0x00, 0xC0, 0x00, 0xF0, 0x00, 0xFF, 0x00, 0x80, 0x00, 0xC0, + 0x00, 0xF0, 0x00, 0xFF, 0x00, 0x80); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000); + VLOAD_16(v4, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + asm volatile("vsra.vv v6, v2, v4, v0.t"); + VCMP_U16(6, v6, 0x0000, 0xC000, 0x0000, 0xF000, 0x0000, 0xFFFF, 0x0000, + 0x8000, 0x0000, 0xC000, 0x0000, 0xF000, 0x0000, 0xFFFF, 0x0000, + 0x8000); + + VSET(16, e32, m4); + VLOAD_32(v4, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000); + VLOAD_32(v8, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + asm volatile("vsra.vv v12, v4, v8, v0.t"); + VCMP_U32(7, v12, 0x00000000, 0xC0000000, 0x00000000, 0xF0000000, 0x00000000, + 0xFFFF0000, 0x00000000, 0x80000000, 0x00000000, 0xC0000000, + 0x00000000, 0xF0000000, 0x00000000, 0xFFFF0000, 0x00000000, + 0x80000000); + + VSET(16, e64, m8); + VLOAD_64(v8, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + 0x8000000000000000); + VLOAD_64(v16, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + asm volatile("vsra.vv v24, v8, v16, v0.t"); + VCMP_U64(8, v24, 0x0000000000000000, 0xC000000000000000, 0x0000000000000000, + 0xF000000000000000, 0x0000000000000000, 0xFFFF000000000000, + 0x0000000000000000, 0xFFFFFFFF80000000, 0x0000000000000000, + 0xC000000000000000, 0x0000000000000000, 0xF000000000000000, + 0x0000000000000000, 0xFFFF000000000000, 0x0000000000000000, + 0xFFFFFFFF80000000); +}; + +void TEST_CASE3(void) { + const uint64_t scalar = 2; + + VSET(16, e8, m1); + VLOAD_8(v2, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C, 0x20, 0xE0, 0xE4, 0xE8, + 0xEC, 0xF0, 0xF4, 0xF8, 0xFC); + asm volatile("vsra.vx v4, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U8(9, v4, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0xF8, 0xF9, + 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, + 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC); + asm volatile("vsra.vx v4, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U16(10, v4, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + 0x0008, 0xFFF8, 0xFFF9, 0xFFFA, 0xFFFB, 0xFFFC, 0xFFFD, 0xFFFE, + 0xFFFF); + + VSET(16, e32, m4); + VLOAD_32(v4, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, + 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, + 0xFFFFFFFC); + asm volatile("vsra.vx v8, v4, %[A]" ::[A] "r"(scalar)); + VCMP_U32(11, v8, 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, + 0x00000006, 0x00000007, 0x00000008, 0xFFFFFFF8, 0xFFFFFFF9, + 0xFFFFFFFA, 0xFFFFFFFB, 0xFFFFFFFC, 0xFFFFFFFD, 0xFFFFFFFE, + 0xFFFFFFFF); + + VSET(16, e64, m8); + VLOAD_64(v8, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, + 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, + 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, + 0xFFFFFFFFFFFFFFF0, 0xFFFFFFFFFFFFFFF4, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFFC); + asm volatile("vsra.vx v16, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U64(12, v16, 0x0000000000000001, 0x0000000000000002, 0x0000000000000003, + 0x0000000000000004, 0x0000000000000005, 0x0000000000000006, + 0x0000000000000007, 0x0000000000000008, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFF9, 0xFFFFFFFFFFFFFFFA, 0xFFFFFFFFFFFFFFFB, + 0xFFFFFFFFFFFFFFFC, 0xFFFFFFFFFFFFFFFD, 0xFFFFFFFFFFFFFFFE, + 0xFFFFFFFFFFFFFFFF); +}; + +void TEST_CASE4(void) { + const uint64_t scalar = 2; + + VSET(16, e8, m1); + VLOAD_8(v2, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C, 0x20, 0xE0, 0xE4, 0xE8, + 0xEC, 0xF0, 0xF4, 0xF8, 0xFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vsra.vx v4, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(13, v4, 0x00, 0x02, 0x00, 0x04, 0x00, 0x06, 0x00, 0x08, 0x00, 0xF9, + 0x00, 0xFB, 0x00, 0xFD, 0x00, 0xFF); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, + 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vsra.vx v4, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(14, v4, 0x0000, 0x0002, 0x0000, 0x0004, 0x0000, 0x0006, 0x0000, + 0x0008, 0x0000, 0xFFF9, 0x0000, 0xFFFB, 0x0000, 0xFFFD, 0x0000, + 0xFFFF); + + VSET(16, e32, m4); + VLOAD_32(v4, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, + 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, + 0xFFFFFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vsra.vx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(15, v8, 0x00000000, 0x00000002, 0x00000000, 0x00000004, 0x00000000, + 0x00000006, 0x00000000, 0x00000008, 0x00000000, 0xFFFFFFF9, + 0x00000000, 0xFFFFFFFB, 0x00000000, 0xFFFFFFFD, 0x00000000, + 0xFFFFFFFF); + + VSET(16, e64, m8); + VLOAD_64(v8, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, + 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, + 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, + 0xFFFFFFFFFFFFFFF0, 0xFFFFFFFFFFFFFFF4, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + asm volatile("vsra.vx v16, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(16, v16, 0x0000000000000000, 0x0000000000000002, 0x0000000000000000, + 0x0000000000000004, 0x0000000000000000, 0x0000000000000006, + 0x0000000000000000, 0x0000000000000008, 0x0000000000000000, + 0xFFFFFFFFFFFFFFF9, 0x0000000000000000, 0xFFFFFFFFFFFFFFFB, + 0x0000000000000000, 0xFFFFFFFFFFFFFFFD, 0x0000000000000000, + 0xFFFFFFFFFFFFFFFF); +}; + +void TEST_CASE5(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C, 0x20, 0xE0, 0xE4, 0xE8, + 0xEC, 0xF0, 0xF4, 0xF8, 0xFC); + asm volatile("vsra.vi v4, v2, 2"); + VCMP_U8(17, v4, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0xF8, 0xF9, + 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, + 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC); + asm volatile("vsra.vi v4, v2, 2"); + VCMP_U16(18, v4, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + 0x0008, 0xFFF8, 0xFFF9, 0xFFFA, 0xFFFB, 0xFFFC, 0xFFFD, 0xFFFE, + 0xFFFF); + + VSET(16, e32, m4); + VLOAD_32(v4, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, + 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, + 0xFFFFFFFC); + asm volatile("vsra.vi v8, v4, 2"); + VCMP_U32(19, v8, 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, + 0x00000006, 0x00000007, 0x00000008, 0xFFFFFFF8, 0xFFFFFFF9, + 0xFFFFFFFA, 0xFFFFFFFB, 0xFFFFFFFC, 0xFFFFFFFD, 0xFFFFFFFE, + 0xFFFFFFFF); + + VSET(16, e64, m8); + VLOAD_64(v8, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, + 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, + 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, + 0xFFFFFFFFFFFFFFF0, 0xFFFFFFFFFFFFFFF4, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFFC); + asm volatile("vsra.vi v16, v8, 2"); + VCMP_U64(20, v16, 0x0000000000000001, 0x0000000000000002, 0x0000000000000003, + 0x0000000000000004, 0x0000000000000005, 0x0000000000000006, + 0x0000000000000007, 0x0000000000000008, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFF9, 0xFFFFFFFFFFFFFFFA, 0xFFFFFFFFFFFFFFFB, + 0xFFFFFFFFFFFFFFFC, 0xFFFFFFFFFFFFFFFD, 0xFFFFFFFFFFFFFFFE, + 0xFFFFFFFFFFFFFFFF); +}; + +void TEST_CASE6(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C, 0x20, 0xE0, 0xE4, 0xE8, + 0xEC, 0xF0, 0xF4, 0xF8, 0xFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vsra.vi v4, v2, 2, v0.t"); + VCMP_U8(21, v4, 0x00, 0x02, 0x00, 0x04, 0x00, 0x06, 0x00, 0x08, 0x00, 0xF9, + 0x00, 0xFB, 0x00, 0xFD, 0x00, 0xFF); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, + 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vsra.vi v4, v2, 2, v0.t"); + VCMP_U16(22, v4, 0x0000, 0x0002, 0x0000, 0x0004, 0x0000, 0x0006, 0x0000, + 0x0008, 0x0000, 0xFFF9, 0x0000, 0xFFFB, 0x0000, 0xFFFD, 0x0000, + 0xFFFF); + + VSET(16, e32, m4); + VLOAD_32(v4, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, + 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, + 0xFFFFFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vsra.vi v8, v4, 2, v0.t"); + VCMP_U32(23, v8, 0x00000000, 0x00000002, 0x00000000, 0x00000004, 0x00000000, + 0x00000006, 0x00000000, 0x00000008, 0x00000000, 0xFFFFFFF9, + 0x00000000, 0xFFFFFFFB, 0x00000000, 0xFFFFFFFD, 0x00000000, + 0xFFFFFFFF); + + VSET(16, e64, m8); + VLOAD_64(v8, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, + 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, + 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, + 0xFFFFFFFFFFFFFFF0, 0xFFFFFFFFFFFFFFF4, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + asm volatile("vsra.vi v16, v8, 2, v0.t"); + VCMP_U64(24, v16, 0x0000000000000000, 0x0000000000000002, 0x0000000000000000, + 0x0000000000000004, 0x0000000000000000, 0x0000000000000006, + 0x0000000000000000, 0x0000000000000008, 0x0000000000000000, + 0xFFFFFFFFFFFFFFF9, 0x0000000000000000, 0xFFFFFFFFFFFFFFFB, + 0x0000000000000000, 0xFFFFFFFFFFFFFFFD, 0x0000000000000000, + 0xFFFFFFFFFFFFFFFF); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsrl.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsrl.c new file mode 100644 index 000000000..4f7aa571f --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsrl.c @@ -0,0 +1,316 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80); + VLOAD_8(v3, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vsrl.vv v4, v2, v3"); + VCMP_U8(1, v4, 0x80, 0x40, 0x20, 0x10, 0x01, 0x01, 0x01, 0x80, 0x80, 0x40, + 0x20, 0x10, 0x01, 0x01, 0x01, 0x80); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000); + VLOAD_16(v4, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vsrl.vv v6, v2, v4"); + VCMP_U16(2, v6, 0x8000, 0x4000, 0x2000, 0x1000, 0x0100, 0x0001, 0x0001, + 0x8000, 0x8000, 0x4000, 0x2000, 0x1000, 0x0100, 0x0001, 0x0001, + 0x8000); + + VSET(16, e32, m4); + VLOAD_32(v4, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000); + VLOAD_32(v8, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vsrl.vv v12, v4, v8"); + VCMP_U32(3, v12, 0x80000000, 0x40000000, 0x20000000, 0x10000000, 0x01000000, + 0x00010000, 0x00000001, 0x80000000, 0x80000000, 0x40000000, + 0x20000000, 0x10000000, 0x01000000, 0x00010000, 0x00000001, + 0x80000000); + + VSET(16, e64, m8); + VLOAD_64(v8, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + 0x8000000000000000); + VLOAD_64(v16, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + asm volatile("vsrl.vv v24, v8, v16"); + VCMP_U64(4, v24, 0x8000000000000000, 0x4000000000000000, 0x2000000000000000, + 0x1000000000000000, 0x0100000000000000, 0x0001000000000000, + 0x0000000100000000, 0x0000000080000000, 0x8000000000000000, + 0x4000000000000000, 0x2000000000000000, 0x1000000000000000, + 0x0100000000000000, 0x0001000000000000, 0x0000000100000000, + 0x0000000080000000); +}; + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80); + VLOAD_8(v3, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vsrl.vv v4, v2, v3, v0.t"); + VCMP_U8(5, v4, 0x00, 0x40, 0x00, 0x10, 0x00, 0x01, 0x00, 0x80, 0x00, 0x40, + 0x00, 0x10, 0x00, 0x01, 0x00, 0x80); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000); + VLOAD_16(v4, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + asm volatile("vsrl.vv v6, v2, v4, v0.t"); + VCMP_U16(6, v6, 0x0000, 0x4000, 0x0000, 0x1000, 0x0000, 0x0001, 0x0000, + 0x8000, 0x0000, 0x4000, 0x0000, 0x1000, 0x0000, 0x0001, 0x0000, + 0x8000); + + VSET(16, e32, m4); + VLOAD_32(v4, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, + 0x80000000); + VLOAD_32(v8, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + asm volatile("vsrl.vv v12, v4, v8, v0.t"); + VCMP_U32(7, v12, 0x00000000, 0x40000000, 0x00000000, 0x10000000, 0x00000000, + 0x00010000, 0x00000000, 0x80000000, 0x00000000, 0x40000000, + 0x00000000, 0x10000000, 0x00000000, 0x00010000, 0x00000000, + 0x80000000); + + VSET(16, e64, m8); + VLOAD_64(v8, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, + 0x8000000000000000); + VLOAD_64(v16, 0, 1, 2, 3, 7, 15, 31, 32, 0, 1, 2, 3, 7, 15, 31, 32); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + asm volatile("vsrl.vv v24, v8, v16, v0.t"); + VCMP_U64(8, v24, 0x0000000000000000, 0x4000000000000000, 0x0000000000000000, + 0x1000000000000000, 0x0000000000000000, 0x0001000000000000, + 0x0000000000000000, 0x0000000080000000, 0x0000000000000000, + 0x4000000000000000, 0x0000000000000000, 0x1000000000000000, + 0x0000000000000000, 0x0001000000000000, 0x0000000000000000, + 0x0000000080000000); +}; + +void TEST_CASE3(void) { + const uint64_t scalar = 2; + + VSET(16, e8, m1); + VLOAD_8(v2, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C, 0x20, 0xE0, 0xE4, 0xE8, + 0xEC, 0xF0, 0xF4, 0xF8, 0xFC); + asm volatile("vsrl.vx v4, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U8(9, v4, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x38, 0x39, + 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, + 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC); + asm volatile("vsrl.vx v4, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U16(10, v4, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + 0x0008, 0x3FF8, 0x3FF9, 0x3FFA, 0x3FFB, 0x3FFC, 0x3FFD, 0x3FFE, + 0x3FFF); + + VSET(16, e32, m4); + VLOAD_32(v4, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, + 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, + 0xFFFFFFFC); + asm volatile("vsrl.vx v8, v4, %[A]" ::[A] "r"(scalar)); + VCMP_U32(11, v8, 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, + 0x00000006, 0x00000007, 0x00000008, 0x3FFFFFF8, 0x3FFFFFF9, + 0x3FFFFFFA, 0x3FFFFFFB, 0x3FFFFFFC, 0x3FFFFFFD, 0x3FFFFFFE, + 0x3FFFFFFF); + + VSET(16, e64, m8); + VLOAD_64(v8, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, + 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, + 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, + 0xFFFFFFFFFFFFFFF0, 0xFFFFFFFFFFFFFFF4, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFFC); + asm volatile("vsrl.vx v16, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U64(12, v16, 0x0000000000000001, 0x0000000000000002, 0x0000000000000003, + 0x0000000000000004, 0x0000000000000005, 0x0000000000000006, + 0x0000000000000007, 0x0000000000000008, 0x3FFFFFFFFFFFFFF8, + 0x3FFFFFFFFFFFFFF9, 0x3FFFFFFFFFFFFFFA, 0x3FFFFFFFFFFFFFFB, + 0x3FFFFFFFFFFFFFFC, 0x3FFFFFFFFFFFFFFD, 0x3FFFFFFFFFFFFFFE, + 0x3FFFFFFFFFFFFFFF); +}; + +void TEST_CASE4(void) { + const uint64_t scalar = 2; + + VSET(16, e8, m1); + VLOAD_8(v2, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C, 0x20, 0xE0, 0xE4, 0xE8, + 0xEC, 0xF0, 0xF4, 0xF8, 0xFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vsrl.vx v4, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(13, v4, 0x00, 0x02, 0x00, 0x04, 0x00, 0x06, 0x00, 0x08, 0x00, 0x39, + 0x00, 0x3B, 0x00, 0x3D, 0x00, 0x3F); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, + 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vsrl.vx v4, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(14, v4, 0x0000, 0x0002, 0x0000, 0x0004, 0x0000, 0x0006, 0x0000, + 0x0008, 0x0000, 0x3FF9, 0x0000, 0x3FFB, 0x0000, 0x3FFD, 0x0000, + 0x3FFF); + + VSET(16, e32, m4); + VLOAD_32(v4, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, + 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, + 0xFFFFFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vsrl.vx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(15, v8, 0x00000000, 0x00000002, 0x00000000, 0x00000004, 0x00000000, + 0x00000006, 0x00000000, 0x00000008, 0x00000000, 0x3FFFFFF9, + 0x00000000, 0x3FFFFFFB, 0x00000000, 0x3FFFFFFD, 0x00000000, + 0x3FFFFFFF); + + VSET(16, e64, m8); + VLOAD_64(v8, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, + 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, + 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, + 0xFFFFFFFFFFFFFFF0, 0xFFFFFFFFFFFFFFF4, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + asm volatile("vsrl.vx v16, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(16, v16, 0x0000000000000000, 0x0000000000000002, 0x0000000000000000, + 0x0000000000000004, 0x0000000000000000, 0x0000000000000006, + 0x0000000000000000, 0x0000000000000008, 0x0000000000000000, + 0x3FFFFFFFFFFFFFF9, 0x0000000000000000, 0x3FFFFFFFFFFFFFFB, + 0x0000000000000000, 0x3FFFFFFFFFFFFFFD, 0x0000000000000000, + 0x3FFFFFFFFFFFFFFF); +}; + +void TEST_CASE5(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C, 0x20, 0xE0, 0xE4, 0xE8, + 0xEC, 0xF0, 0xF4, 0xF8, 0xFC); + asm volatile("vsrl.vi v4, v2, 2"); + VCMP_U8(17, v4, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x38, 0x39, + 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, + 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC); + asm volatile("vsrl.vi v4, v2, 2"); + VCMP_U16(18, v4, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + 0x0008, 0x3FF8, 0x3FF9, 0x3FFA, 0x3FFB, 0x3FFC, 0x3FFD, 0x3FFE, + 0x3FFF); + + VSET(16, e32, m4); + VLOAD_32(v4, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, + 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, + 0xFFFFFFFC); + asm volatile("vsrl.vi v8, v4, 2"); + VCMP_U32(19, v8, 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, + 0x00000006, 0x00000007, 0x00000008, 0x3FFFFFF8, 0x3FFFFFF9, + 0x3FFFFFFA, 0x3FFFFFFB, 0x3FFFFFFC, 0x3FFFFFFD, 0x3FFFFFFE, + 0x3FFFFFFF); + + VSET(16, e64, m8); + VLOAD_64(v8, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, + 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, + 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, + 0xFFFFFFFFFFFFFFF0, 0xFFFFFFFFFFFFFFF4, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFFC); + asm volatile("vsrl.vi v16, v8, 2"); + VCMP_U64(20, v16, 0x0000000000000001, 0x0000000000000002, 0x0000000000000003, + 0x0000000000000004, 0x0000000000000005, 0x0000000000000006, + 0x0000000000000007, 0x0000000000000008, 0x3FFFFFFFFFFFFFF8, + 0x3FFFFFFFFFFFFFF9, 0x3FFFFFFFFFFFFFFA, 0x3FFFFFFFFFFFFFFB, + 0x3FFFFFFFFFFFFFFC, 0x3FFFFFFFFFFFFFFD, 0x3FFFFFFFFFFFFFFE, + 0x3FFFFFFFFFFFFFFF); +}; + +void TEST_CASE6(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C, 0x20, 0xE0, 0xE4, 0xE8, + 0xEC, 0xF0, 0xF4, 0xF8, 0xFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vsrl.vi v4, v2, 2, v0.t"); + VCMP_U8(21, v4, 0x00, 0x02, 0x00, 0x04, 0x00, 0x06, 0x00, 0x08, 0x00, 0x39, + 0x00, 0x3B, 0x00, 0x3D, 0x00, 0x3F); + + VSET(16, e16, m2); + VLOAD_16(v2, 0x0004, 0x0008, 0x000C, 0x0010, 0x0014, 0x0018, 0x001C, 0x0020, + 0xFFE0, 0xFFE4, 0xFFE8, 0xFFEC, 0xFFF0, 0xFFF4, 0xFFF8, 0xFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vsrl.vi v4, v2, 2, v0.t"); + VCMP_U16(22, v4, 0x0000, 0x0002, 0x0000, 0x0004, 0x0000, 0x0006, 0x0000, + 0x0008, 0x0000, 0x3FF9, 0x0000, 0x3FFB, 0x0000, 0x3FFD, 0x0000, + 0x3FFF); + + VSET(16, e32, m4); + VLOAD_32(v4, 0x00000004, 0x00000008, 0x0000000C, 0x00000010, 0x00000014, + 0x00000018, 0x0000001C, 0x00000020, 0xFFFFFFE0, 0xFFFFFFE4, + 0xFFFFFFE8, 0xFFFFFFEC, 0xFFFFFFF0, 0xFFFFFFF4, 0xFFFFFFF8, + 0xFFFFFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vsrl.vi v8, v4, 2, v0.t"); + VCMP_U32(23, v8, 0x00000000, 0x00000002, 0x00000000, 0x00000004, 0x00000000, + 0x00000006, 0x00000000, 0x00000008, 0x00000000, 0x3FFFFFF9, + 0x00000000, 0x3FFFFFFB, 0x00000000, 0x3FFFFFFD, 0x00000000, + 0x3FFFFFFF); + + VSET(16, e64, m8); + VLOAD_64(v8, 0x0000000000000004, 0x0000000000000008, 0x000000000000000C, + 0x0000000000000010, 0x0000000000000014, 0x0000000000000018, + 0x000000000000001C, 0x0000000000000020, 0xFFFFFFFFFFFFFFE0, + 0xFFFFFFFFFFFFFFE4, 0xFFFFFFFFFFFFFFE8, 0xFFFFFFFFFFFFFFEC, + 0xFFFFFFFFFFFFFFF0, 0xFFFFFFFFFFFFFFF4, 0xFFFFFFFFFFFFFFF8, + 0xFFFFFFFFFFFFFFFC); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + asm volatile("vsrl.vi v16, v8, 2, v0.t"); + VCMP_U64(24, v16, 0x0000000000000000, 0x0000000000000002, 0x0000000000000000, + 0x0000000000000004, 0x0000000000000000, 0x0000000000000006, + 0x0000000000000000, 0x0000000000000008, 0x0000000000000000, + 0x3FFFFFFFFFFFFFF9, 0x0000000000000000, 0x3FFFFFFFFFFFFFFB, + 0x0000000000000000, 0x3FFFFFFFFFFFFFFD, 0x0000000000000000, + 0x3FFFFFFFFFFFFFFF); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vss.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vss.c new file mode 100644 index 000000000..fa4d3f709 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vss.c @@ -0,0 +1,146 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "vector_macros.h" + +// Positive-stride tests +void TEST_CASE1(void) { + VSET(4, e8, m1); + volatile uint8_t OUT1[] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; + uint64_t stride = 3; + VLOAD_8(v1, 0x9f, 0xe4, 0x19, 0x20); + asm volatile("vsse8.v v1, (%0), %1" ::"r"(OUT1), "r"(stride)); + VVCMP_U8(1, OUT1, 0x9f, 0x00, 0x00, 0xe4, 0x00, 0x00, 0x19, 0x00, 0x00, 0x20, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); +} + +void TEST_CASE2(void) { + VSET(8, e16, m1); + volatile uint16_t OUT1[] = {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000}; + uint64_t stride = 4; + VLOAD_16(v1, 0x9f11, 0xe478, 0x1549, 0x3240, 0x2f11, 0xe448, 0x1546, 0x3220); + asm volatile("vsse16.v v1, (%0), %1" ::"r"(OUT1), "r"(stride)); + VVCMP_U16(2, OUT1, 0x9f11, 0x0000, 0xe478, 0x0000, 0x1549, 0x0000, 0x3240, + 0x0000, 0x2f11, 0x0000, 0xe448, 0x0000, 0x1546, 0x0000, 0x3220, + 0x0000); +} + +void TEST_CASE3(void) { + VSET(4, e32, m1); + volatile uint32_t OUT1[] = {0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000}; + uint64_t stride = 8; + VLOAD_32(v1, 0x9f872456, 0xe1356784, 0x13241139, 0x20862497); + asm volatile("vsse32.v v1, (%0), %1" ::"r"(OUT1), "r"(stride)); + VVCMP_U32(3, OUT1, 0x9f872456, 0x00000000, 0xe1356784, 0x00000000, 0x13241139, + 0x00000000, 0x20862497, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000); +} + +void TEST_CASE4(void) { + VSET(16, e64, m8); + volatile uint64_t OUT1[] = { + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000}; + uint64_t stride = 16; + VLOAD_64(v8, 0x9f87245315434136, 0xe135578794246784, 0x1315345345241139, + 0x2086252110062497, 0x1100229933847136, 0xaaffaaffaaffaaff, + 0xaf87245315434136, 0xa135578794246784, 0x2315345345241139, + 0x1086252110062497, 0x1100229933847134, 0xaaffaaffaaffaaf4, + 0x9315345345241139, 0x9086252110062497, 0x9100229933847134, + 0x9affaaffaaffaaf4); + asm volatile("vsse64.v v8, (%0), %1" ::"r"(OUT1), "r"(stride)); + VVCMP_U64(4, OUT1, 0x9f87245315434136, 0x0000000000000000, 0xe135578794246784, + 0x0000000000000000, 0x1315345345241139, 0x0000000000000000, + 0x2086252110062497, 0x0000000000000000, 0x1100229933847136, + 0x0000000000000000, 0xaaffaaffaaffaaff, 0x0000000000000000, + 0xaf87245315434136, 0x0000000000000000, 0xa135578794246784, + 0x0000000000000000, 0x2315345345241139, 0x0000000000000000, + 0x1086252110062497, 0x0000000000000000, 0x1100229933847134, + 0x0000000000000000, 0xaaffaaffaaffaaf4, 0x0000000000000000, + 0x9315345345241139, 0x0000000000000000, 0x9086252110062497, + 0x0000000000000000, 0x9100229933847134, 0x0000000000000000, + 0x9affaaffaaffaaf4, 0x0000000000000000); +} + +// Masked strided store +void TEST_CASE5(void) { + VSET(4, e8, m1); + volatile uint8_t OUT1[] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; + uint64_t stride = 3; + VLOAD_8(v0, 0xAA); + VLOAD_8(v1, 0x9f, 0xe4, 0x19, 0x20); + asm volatile("vsse8.v v1, (%0), %1, v0.t" ::"r"(OUT1), "r"(stride)); + VVCMP_U8(5, OUT1, 0x00, 0x00, 0x00, 0xe4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); +} + +void TEST_CASE6(void) { + VSET(16, e64, m8); + volatile uint64_t OUT1[] = { + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000}; + uint64_t stride = 16; + VLOAD_64(v8, 0x9f87245315434136, 0xe135578794246784, 0x1315345345241139, + 0x2086252110062497, 0x1100229933847136, 0xaaffaaffaaffaaff, + 0xaf87245315434136, 0xa135578794246784, 0x2315345345241139, + 0x1086252110062497, 0x1100229933847134, 0xaaffaaffaaffaaf4, + 0x9315345345241139, 0x9086252110062497, 0x9100229933847134, + 0x9affaaffaaffaaf4); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vsse64.v v8, (%0), %1, v0.t" ::"r"(OUT1), "r"(stride)); + VVCMP_U64(6, OUT1, 0x0000000000000000, 0x0000000000000000, 0xe135578794246784, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x2086252110062497, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0xaaffaaffaaffaaff, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0xa135578794246784, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x1086252110062497, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0xaaffaaffaaffaaf4, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x9086252110062497, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x9affaaffaaffaaf4, 0x0000000000000000); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + TEST_CASE5(); + TEST_CASE6(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vssra.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vssra.c new file mode 100644 index 000000000..a21d6aaf5 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vssra.c @@ -0,0 +1,79 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(4, e8, m1); + VLOAD_8(v2, 0xff, 0x00, 0xf0, 0x0f); + VLOAD_8(v3, 1, 2, 3, 4); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vssra.vv v1, v2, v3"); + VCMP_I8(1, v1, 0xff, 0, 0xfe, 0); +} + +void TEST_CASE2() { + VSET(4, e8, m1); + VLOAD_8(v2, 0xff, 0x00, 0xf0, 0x0f); + VLOAD_8(v3, 1, 2, 3, 4); + VLOAD_8(v0, 5, 0, 0, 0); + VCLEAR(v1); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vssra.vv v1, v2, v3, v0.t"); + VCMP_I8(2, v1, 0xff, 0, 0xfe, 0); +} + +void TEST_CASE3() { + VSET(4, e8, m1); + VLOAD_8(v2, 0xff, 0x00, 0xf0, 0x0f); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vssra.vi v1, v2, 2"); + VCMP_I8(3, v1, 0xff, 0, 0xfc, 3); +} + +void TEST_CASE4() { + VSET(4, e8, m1); + VLOAD_8(v2, 0xff, 0x00, 0xf0, 0x0f); + VLOAD_8(v0, 5, 0, 0, 0); + VCLEAR(v1); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vssra.vi v1, v2, 2, v0.t"); + VCMP_I8(4, v1, 0xff, 0, 0xfc, 0); +} + +void TEST_CASE5() { + VSET(4, e8, m1); + VLOAD_8(v2, 0xff, 0x00, 0xf0, 0x0f); + uint64_t scalar = 2; + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vssra.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_I8(5, v1, 0xff, 0, 0xfc, 3); +} + +void TEST_CASE6() { + VSET(4, e8, m1); + VLOAD_8(v2, 0xff, 0x00, 0xf0, 0x0f); + uint64_t scalar = 2; + VLOAD_8(v0, 5, 0, 0, 0); + VCLEAR(v1); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vssra.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_I8(6, v1, 0xff, 0, 0xfc, 0); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vssrl.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vssrl.c new file mode 100644 index 000000000..de73e9fba --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vssrl.c @@ -0,0 +1,79 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(4, e8, m1); + VLOAD_8(v2, 0xff, 0x00, 0xf0, 0x0f); + VLOAD_8(v3, 1, 2, 3, 4); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vssrl.vv v1, v2, v3"); + VCMP_U8(1, v1, 0x7f, 0, 0x1e, 0x00); +} + +void TEST_CASE2() { + VSET(4, e8, m1); + VLOAD_8(v2, 0xff, 0x00, 0xf0, 0x0f); + VLOAD_8(v3, 1, 2, 3, 4); + VLOAD_8(v0, 5, 0, 0, 0); + VCLEAR(v1); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vssrl.vv v1, v2, v3, v0.t"); + VCMP_U8(2, v1, 0x7f, 0, 0x1e, 0); +} + +void TEST_CASE3() { + VSET(4, e8, m1); + VLOAD_8(v2, 0xff, 0x00, 0xf0, 0x0f); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vssrl.vi v1, v2, 5"); + VCMP_U8(3, v1, 7, 0, 7, 0); +} + +void TEST_CASE4() { + VSET(4, e8, m1); + VLOAD_8(v2, 0xff, 0x00, 0xf0, 0x0f); + VLOAD_8(v0, 5, 0, 0, 0); + VCLEAR(v1); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vssrl.vi v1, v2, 5, v0.t"); + VCMP_U8(4, v1, 7, 0, 7, 0); +} + +void TEST_CASE5() { + VSET(4, e8, m1); + VLOAD_8(v2, 0xff, 0x00, 0xf0, 0x0f); + uint64_t scalar = 5; + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vssrl.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U8(5, v1, 7, 0, 7, 0); +} + +void TEST_CASE6() { + VSET(4, e8, m1); + VLOAD_8(v2, 0xff, 0x00, 0xf0, 0x0f); + uint64_t scalar = 5; + VLOAD_8(v0, 5, 0, 0, 0); + VCLEAR(v1); + __asm__ volatile("csrw vxrm, 2"); + __asm__ volatile("vssrl.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(6, v1, 7, 0, 7, 0); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + EXIT_CHECK(); +} \ No newline at end of file diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vssub.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vssub.c new file mode 100644 index 000000000..8b2c1f538 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vssub.c @@ -0,0 +1,55 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(4, e32, m1); + VLOAD_32(v1, 0xfffffff0, 0x7FFFFFFC, 15, 20); + VLOAD_32(v2, 0x7ffffff0, -500, 3, 25); + __asm__ volatile("vssub.vv v3, v1, v2" ::); + VEC_CMP_32(1, v3, 0x80000000, 0x7fffffff, 12, -5); +} + +void TEST_CASE2(void) { + VSET(4, e32, m1); + VLOAD_32(v1, 0xfffffff0, 0x7FFFFFFC, 15, 20); + VLOAD_32(v2, 0x7ffffff0, -500, 3, 25); + VLOAD_32(v0, 10, 0, 0, 0); + CLEAR(v3); + __asm__ volatile("vssub.vv v3, v1, v2, v0.t" ::); + VEC_CMP_32(1, v3, 0, 0x7fffffff, 0, -5); +} + +void TEST_CASE3(void) { + VSET(4, e32, m1); + VLOAD_32(v1, 5, -2147483645, 15, 20); + const int64_t scalar = 5; + __asm__ volatile("vssub.vx v3, v1, %[A]" ::[A] "r"(scalar)); + VEC_CMP_32(3, v3, 0, 0x80000000, 10, 15); +} + +void TEST_CASE4(void) { + VSET(4, e32, m1); + VLOAD_32(v1, 5, -2147483645, 15, 20); + const int64_t scalar = 5; + VLOAD_32(v0, 10, 0, 0, 0); + CLEAR(v3); + __asm__ volatile("vssub.vx v3, v1, %[A], v0.t" ::[A] "r"(scalar)); + VEC_CMP_32(4, v3, 0, 0x80000000, 0, 15); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vssubu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vssubu.c new file mode 100644 index 000000000..8a7bb9cec --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vssubu.c @@ -0,0 +1,55 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(4, e32, m1); + VLOAD_U32(v1, 5, 10, 15, 20); + VLOAD_U32(v2, 1, 2, 3, 25); + __asm__ volatile("vssubu.vv v3, v1, v2" ::); + VEC_CMP_U32(1, v3, 4, 8, 12, 0); +} + +void TEST_CASE2(void) { + VSET(4, e32, m1); + VLOAD_U32(v1, 5, 10, 15, 20); + VLOAD_U32(v2, 1, 2, 3, 120); + VLOAD_U32(v0, 10, 0, 0, 0); + CLEAR(v3); + __asm__ volatile("vssubu.vv v3, v1, v2, v0.t" ::); + VEC_CMP_U32(2, v3, 0, 8, 0, 0); +} + +void TEST_CASE3(void) { + VSET(4, e32, m1); + VLOAD_U32(v1, 5, 1, 15, 20); + const uint64_t scalar = 5; + __asm__ volatile("vssubu.vx v3, v1, %[A]" ::[A] "r"(scalar)); + VEC_CMP_U32(3, v3, 0, 0, 10, 15); +} + +void TEST_CASE4(void) { + VSET(4, e32, m1); + VLOAD_U32(v1, 5, 1, 15, 20); + const uint64_t scalar = 5; + VLOAD_U32(v0, 10, 0, 0, 0); + CLEAR(v3); + __asm__ volatile("vssubu.vx v3, v1, %[A], v0.t" ::[A] "r"(scalar)); + VEC_CMP_U32(4, v3, 0, 0, 0, 15); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + enable_fp(); + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsub.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsub.c new file mode 100644 index 000000000..d50029719 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsub.c @@ -0,0 +1,136 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vsub.vv v3, v1, v2"); + VCMP_U8(1, v3, 4, 8, 12, 16, 20, 24, 28, 32, 4, 8, 12, 16, 20, 24, 28, 32); + + VSET(16, e16, m2); + VLOAD_16(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vsub.vv v6, v2, v4"); + VCMP_U16(2, v6, 4, 8, 12, 16, 20, 24, 28, 32, 4, 8, 12, 16, 20, 24, 28, 32); + + VSET(16, e32, m4); + VLOAD_32(v4, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vsub.vv v12, v4, v8"); + VCMP_U32(3, v12, 4, 8, 12, 16, 20, 24, 28, 32, 4, 8, 12, 16, 20, 24, 28, 32); + + VSET(16, e64, m8); + VLOAD_64(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vsub.vv v24, v8, v16"); + VCMP_U64(4, v24, 4, 8, 12, 16, 20, 24, 28, 32, 4, 8, 12, 16, 20, 24, 28, 32); +} + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v1, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v3); + asm volatile("vsub.vv v3, v1, v2, v0.t"); + VCMP_U8(5, v3, 0, 8, 0, 16, 0, 24, 0, 32, 0, 8, 0, 16, 0, 24, 0, 32); + + VSET(16, e16, m2); + VLOAD_16(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + asm volatile("vsub.vv v6, v2, v4, v0.t"); + VCMP_U16(6, v6, 0, 8, 0, 16, 0, 24, 0, 32, 0, 8, 0, 16, 0, 24, 0, 32); + + VSET(16, e32, m4); + VLOAD_32(v4, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + asm volatile("vsub.vv v12, v4, v8, v0.t"); + VCMP_U32(7, v12, 0, 8, 0, 16, 0, 24, 0, 32, 0, 8, 0, 16, 0, 24, 0, 32); + + VSET(16, e32, m8); + VLOAD_32(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + asm volatile("vsub.vv v24, v8, v16, v0.t"); + VCMP_U32(8, v24, 0, 8, 0, 16, 0, 24, 0, 32, 0, 8, 0, 16, 0, 24, 0, 32); +} + +void TEST_CASE3(void) { + const uint64_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_8(v1, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + asm volatile("vsub.vx v3, v1, %[A]" ::[A] "r"(scalar)); + VCMP_U8(9, v3, 0, 5, 10, 15, 20, 25, 30, 35, 0, 5, 10, 15, 20, 25, 30, 35); + + VSET(16, e16, m2); + VLOAD_16(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + asm volatile("vsub.vx v4, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U16(10, v4, 0, 5, 10, 15, 20, 25, 30, 35, 0, 5, 10, 15, 20, 25, 30, 35); + + VSET(16, e32, m4); + VLOAD_32(v4, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + asm volatile("vsub.vx v8, v4, %[A]" ::[A] "r"(scalar)); + VCMP_U32(11, v8, 0, 5, 10, 15, 20, 25, 30, 35, 0, 5, 10, 15, 20, 25, 30, 35); + + VSET(16, e64, m8); + VLOAD_64(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + asm volatile("vsub.vx v16, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U64(12, v16, 0, 5, 10, 15, 20, 25, 30, 35, 0, 5, 10, 15, 20, 25, 30, 35); +} + +void TEST_CASE4(void) { + const uint64_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_8(v1, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v3); + asm volatile("vsub.vx v3, v1, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(13, v3, 0, 5, 0, 15, 0, 25, 0, 35, 0, 5, 0, 15, 0, 25, 0, 35); + + VSET(16, e16, m2); + VLOAD_16(v2, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + asm volatile("vsub.vx v4, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(14, v4, 0, 5, 0, 15, 0, 25, 0, 35, 0, 5, 0, 15, 0, 25, 0, 35); + + VSET(16, e32, m4); + VLOAD_32(v4, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + asm volatile("vsub.vx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(15, v8, 0, 5, 0, 15, 0, 25, 0, 35, 0, 5, 0, 15, 0, 25, 0, 35); + + VSET(16, e64, m8); + VLOAD_64(v8, 5, 10, 15, 20, 25, 30, 35, 40, 5, 10, 15, 20, 25, 30, 35, 40); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + asm volatile("vsub.vx v16, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(16, v16, 0, 5, 0, 15, 0, 25, 0, 35, 0, 5, 0, 15, 0, 25, 0, 35); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsux.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsux.c new file mode 100644 index 000000000..5f21ec80d --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsux.c @@ -0,0 +1,104 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(4, e8, m1); + VLOAD_U8(v2, 0, 1, 2, 3); + volatile uint8_t OUP[] = {0xef, 0xef, 0xef, 0xef}; + VLOAD_U8(v1, 0xff, 0x00, 0xf0, 0x0f); + __asm__ volatile("vsuxei8.v v1, (%0), v2" ::"r"(OUP)); + VEC_EQUAL_U8_RAW(1, OUP, 0xff, 0x00, 0xf0, 0x0f); +} + +// void TEST_CASE2(void) { +// VSET(4,e8,m1); +// VLOAD_U8(v2,0,1,2,3); +// volatile uint8_t OUP[] = {0xef, 0xef, 0xef, 0xef}; +// VLOAD_U8(v1,0xff,0x00,0xf0,0x0f); +// VLOAD_U8(v0,0x12,0x0,0x0,0x0); +// __asm__ volatile("vsuxei8.v v1, (%0), v2, v0.t"::"r"(OUP)); +// VEC_EQUAL_U8_RAW(2,OUP,0xef,0x00,0xef,0xef); +// } + +void TEST_CASE3(void) { + VSET(4, e16, m1); + VLOAD_U16(v2, 0, 2, 4, 6); + volatile uint16_t OUP[] = {0xdead, 0xbeef, 0xdead, 0xbeef}; + VLOAD_U16(v1, 0xffff, 0x0000, 0xf0f0, 0x0f0f); + __asm__ volatile("vsuxei16.v v1, (%0), v2" ::"r"(OUP)); + VEC_EQUAL_U16_RAW(3, OUP, 0xffff, 0x0000, 0xf0f0, 0x0f0f); +} + +// void TEST_CASE4(void) { +// VSET(4,e16,m1); +// VLOAD_U16(v2,0,2,4,6); +// volatile uint16_t OUP[] = {0xdead, 0xbeef, 0xdead, 0xbeef}; +// VLOAD_U16(v1,0xffff,0x0000,0xf0f0,0x0f0f); +// VLOAD_U16(v0,0x12,0x0,0x0,0x0); +// __asm__ volatile("vsuxei16.v v1, (%0), v2, v0.t"::"r"(OUP)); +// MEMBARRIER; +// VEC_EQUAL_U16_RAW(4,OUP,0xdead,0x0000,0xdead,0xbeef); +// } + +void TEST_CASE5(void) { + VSET(4, e32, m1); + VLOAD_U32(v2, 0, 4, 8, 12); + volatile uint32_t OUP[] = {0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef}; + VLOAD_U32(v1, 0xffffffff, 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f); + __asm__ volatile("vsuxei32.v v1, (%0), v2" ::"r"(OUP)); + VEC_EQUAL_U32_RAW(5, OUP, 0xffffffff, 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f); +} + +// void TEST_CASE6(void) { +// VSET(4,e32,m1); +// VLOAD_U32(v2,0,4,8,12); +// volatile uint32_t OUP[] = {0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef}; +// VLOAD_U32(v1,0xffffffff,0x00000000,0xf0f0f0f0,0x0f0f0f0f); +// VLOAD_U32(v0,0x12,0x0,0x0,0x0); +// __asm__ volatile("vsuxei32.v v1, (%0), v2, v0.t"::"r"(OUP)); +// MEMBARRIER; +// VEC_EQUAL_U32_RAW(6,OUP,0xdeadbeef,0x00000000,0xdeadbeef,0xdeadbeef); +// } + +void TEST_CASE7(void) { + VSET(4, e64, m1); + VLOAD_U64(v2, 0, 8, 16, 24); + volatile uint64_t OUP[] = {0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef}; + VLOAD_U64(v1, 0xdeadbeef00000000, 0xdeadbeefffffffff, 0xdeadbeeff0f0f0f0, + 0xdeadbeef0f0f0f0f); + __asm__ volatile("vsuxei64.v v1, (%0), v2" ::"r"(OUP)); + VEC_EQUAL_U64_RAW(7, OUP, 0xdeadbeef00000000, 0xdeadbeefffffffff, + 0xdeadbeeff0f0f0f0, 0xdeadbeef0f0f0f0f); +} + +// void TEST_CASE8(void) { +// VSET(4,e64,m1); +// VLOAD_U64(v2,0,8,16,24); +// volatile uint64_t OUP[] = +// {0xdeadbeefdeadbeef,0xdeadbeefdeadbeef,0xdeadbeefdeadbeef,0xdeadbeefdeadbeef}; +// VLOAD_U64(v1,0xdeadbeef00000000,0xdeadbeefffffffff,0xdeadbeeff0f0f0f0,0xdeadbeef0f0f0f0f); +// VLOAD_U64(v0,0x6,0x0,0x0,0x0); +// __asm__ volatile("vsuxei64.v v1, (%0), v2, v0.t"::"r"(OUP)); +// VEC_EQUAL_U64_RAW(8,OUP,0xdeadbeefdeadbeef,0xdeadbeefffffffff,0xdeadbeeff0f0f0f0,0xdeadbeefdeadbeef); +// } + +int main(void) { + INIT_CHECK(); + enable_vec(); + TEST_CASE1(); + TEST_CASE3(); + TEST_CASE5(); + TEST_CASE7(); + // TEST_CASE2(); + // TEST_CASE4(); + // TEST_CASE6(); + // TEST_CASE8(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsuxei.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsuxei.c new file mode 100644 index 000000000..2f0686722 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsuxei.c @@ -0,0 +1,137 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "vector_macros.h" + +#define AXI_DWIDTH 32 + +#define INIT 98 + +void reset_vec8(volatile uint8_t *vec, int rst_val, uint64_t len) { + for (uint64_t i = 0; i < len; ++i) vec[i] = rst_val; +} +void reset_vec16(volatile uint16_t *vec, int rst_val, uint64_t len) { + for (uint64_t i = 0; i < len; ++i) vec[i] = rst_val; +} +void reset_vec32(volatile uint32_t *vec, int rst_val, uint64_t len) { + for (uint64_t i = 0; i < len; ++i) vec[i] = rst_val; +} +void reset_vec64(volatile uint64_t *vec, int rst_val, uint64_t len) { + for (uint64_t i = 0; i < len; ++i) vec[i] = rst_val; +} +static volatile uint8_t BUFFER_O8[16] __attribute__((aligned(AXI_DWIDTH))) = { + INIT, INIT, INIT, INIT, INIT, INIT, INIT, INIT, + INIT, INIT, INIT, INIT, INIT, INIT, INIT, INIT}; +static volatile uint16_t BUFFER_O16[16] __attribute__((aligned(AXI_DWIDTH))) = { + INIT, INIT, INIT, INIT, INIT, INIT, INIT, INIT, + INIT, INIT, INIT, INIT, INIT, INIT, INIT, INIT}; +static volatile uint32_t BUFFER_O32[16] __attribute__((aligned(AXI_DWIDTH))) = { + INIT, INIT, INIT, INIT, INIT, INIT, INIT, INIT, + INIT, INIT, INIT, INIT, INIT, INIT, INIT, INIT}; +static volatile uint64_t BUFFER_O64[16] __attribute__((aligned(AXI_DWIDTH))) = { + INIT, INIT, INIT, INIT, INIT, INIT, INIT, INIT, + INIT, INIT, INIT, INIT, INIT, INIT, INIT, INIT}; + +// Naive test +void TEST_CASE1(void) { + VSET(12, e8, m1); + VLOAD_8(v1, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x88, 0x88, 0xae, 0x91, 0x02, 0x59, + 0x89); + VLOAD_8(v2, 1, 2, 3, 4, 5, 7, 8, 9, 11, 12, 13, 15); + asm volatile("vsuxei8.v v1, (%0), v2" ::"r"(&BUFFER_O8[0])); + VVCMP_U8(1, BUFFER_O8, INIT, 0xd3, 0x40, 0xd1, 0x84, 0x48, INIT, 0x88, 0x88, + 0xae, INIT, 0x91, 0x02, 0x59, INIT, 0x89); + + VSET(12, e16, m2); + VLOAD_16(v2, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x9388, 0x8188, 0x11ae, + 0x4891, 0x4902, 0x8759, 0x1989); + VLOAD_16(v4, 2, 4, 6, 8, 10, 14, 16, 18, 22, 24, 26, 30); + asm volatile("vsuxei16.v v2, (%0), v4" ::"r"(&BUFFER_O16[0])); + VVCMP_U16(2, BUFFER_O16, INIT, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, INIT, + 0x9388, 0x8188, 0x11ae, INIT, 0x4891, 0x4902, 0x8759, INIT, 0x1989); + + VSET(12, e32, m4); + VLOAD_32(v4, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, 0x38197598, + 0x81937598, 0x18747547, 0x3eeeeeee, 0xab8b9148, 0x90318509, + 0x31897598, 0x89139848); + VLOAD_32(v8, 4, 8, 12, 16, 20, 28, 32, 36, 44, 48, 52, 60); + asm volatile("vsuxei32.v v4, (%0), v8" ::"r"(&BUFFER_O32[0])); + VVCMP_U32(3, BUFFER_O32, INIT, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, INIT, 0x81937598, 0x18747547, 0x3eeeeeee, INIT, + 0xab8b9148, 0x90318509, 0x31897598, INIT, 0x89139848); + + VSET(12, e64, m8); + VLOAD_64(v8, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, 0x99991348a9f38cd1, + 0x9fa831c7a11a9384, 0x3819759853987548, 0x81937598aa819388, + 0x1874754791888188, 0x3eeeeeeee33111ae, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8913984898951989); + VLOAD_64(v16, 8, 16, 24, 32, 40, 56, 64, 72, 88, 96, 104, 120); + asm volatile("vsuxei64.v v8, (%0), v16" ::"r"(&BUFFER_O64[0])); + VVCMP_U64(4, BUFFER_O64, INIT, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, INIT, + 0x81937598aa819388, 0x1874754791888188, 0x3eeeeeeee33111ae, INIT, + 0xab8b914891484891, 0x9031850931584902, 0x3189759837598759, INIT, + 0x8913984898951989); +} + +// Naive test, masked +void TEST_CASE2(void) { + reset_vec8(&BUFFER_O8[0], INIT, 16); + VSET(12, e8, m1); + VLOAD_8(v0, 0xAA, 0x0A); + VLOAD_8(v1, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x88, 0x88, 0xae, 0x91, 0x02, 0x59, + 0x89); + VLOAD_8(v2, 1, 2, 3, 4, 5, 7, 8, 9, 11, 12, 13, 15); + asm volatile("vsuxei8.v v1, (%0), v2, v0.t" ::"r"(&BUFFER_O8[0])); + VVCMP_U8(5, BUFFER_O8, INIT, INIT, 0x40, INIT, 0x84, INIT, INIT, 0x88, INIT, + 0xae, INIT, INIT, 0x02, INIT, INIT, 0x89); + + reset_vec16(&BUFFER_O16[0], INIT, 16); + VSET(12, e16, m2); + VLOAD_8(v0, 0xAA, 0x0A); + VLOAD_16(v2, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x9388, 0x8188, 0x11ae, + 0x4891, 0x4902, 0x8759, 0x1989); + VLOAD_16(v4, 2, 4, 6, 8, 10, 14, 16, 18, 22, 24, 26, 30); + asm volatile("vsuxei16.v v2, (%0), v4, v0.t" ::"r"(&BUFFER_O16[0])); + VVCMP_U16(6, BUFFER_O16, INIT, INIT, 0x3840, INIT, 0x9384, INIT, INIT, 0x9388, + INIT, 0x11ae, INIT, INIT, 0x4902, INIT, INIT, 0x1989); + + reset_vec32(&BUFFER_O32[0], INIT, 16); + VSET(12, e32, m4); + VLOAD_8(v0, 0xAA, 0x0A); + VLOAD_32(v4, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, 0x38197598, + 0x81937598, 0x18747547, 0x3eeeeeee, 0xab8b9148, 0x90318509, + 0x31897598, 0x89139848); + VLOAD_32(v8, 4, 8, 12, 16, 20, 28, 32, 36, 44, 48, 52, 60); + asm volatile("vsuxei32.v v4, (%0), v8, v0.t" ::"r"(&BUFFER_O32[0])); + VVCMP_U32(7, BUFFER_O32, INIT, INIT, 0xa11a9384, INIT, 0x9fa831c7, INIT, INIT, + 0x81937598, INIT, 0x3eeeeeee, INIT, INIT, 0x90318509, INIT, INIT, + 0x89139848); + + reset_vec64(&BUFFER_O64[0], INIT, 16); + VSET(12, e64, m8); + VLOAD_8(v0, 0xAA, 0x0A); + VLOAD_64(v8, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, 0x99991348a9f38cd1, + 0x9fa831c7a11a9384, 0x3819759853987548, 0x81937598aa819388, + 0x1874754791888188, 0x3eeeeeeee33111ae, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8913984898951989); + VLOAD_64(v16, 8, 16, 24, 32, 40, 56, 64, 72, 88, 96, 104, 120); + asm volatile("vsuxei64.v v8, (%0), v16, v0.t" ::"r"(&BUFFER_O64[0])); + VVCMP_U64(8, BUFFER_O64, INIT, INIT, 0xa11a9384a7163840, INIT, + 0x9fa831c7a11a9384, INIT, INIT, 0x81937598aa819388, INIT, + 0x3eeeeeeee33111ae, INIT, INIT, 0x9031850931584902, INIT, INIT, + 0x8913984898951989); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} \ No newline at end of file diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsx.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsx.c new file mode 100644 index 000000000..fc91e81b8 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsx.c @@ -0,0 +1,102 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(4, e8, m1); + VLOAD_U8(v2, 0, 1, 2, 3); + volatile uint8_t OUP[] = {0xef, 0xef, 0xef, 0xef}; + VLOAD_U8(v1, 0xff, 0x00, 0xf0, 0x0f); + __asm__ volatile("vsxei8.v v1, (%0), v2" ::"r"(OUP)); + VEC_EQUAL_U8_RAW(1, OUP, 0xff, 0x00, 0xf0, 0x0f); +} + +// void TEST_CASE2(void) { +// VSET(4,e8,m1); +// VLOAD_8(v2,0,1,2,3); +// volatile int8_t OUP[] = {0xef, 0xef, 0xef, 0xef}; +// VLOAD_8(v1,0xff,0x00,0xf0,0x0f); +// VLOAD_8(v0,12,0,0,0); +// __asm__ volatile("vsxei8.v v1, (%0), v2, v0.t"::"r"(OUP)); +// VEC_EQUAL_8_RAW(2,OUP,0xef,0xef,0xf0,0x0f); +// } + +void TEST_CASE3(void) { + VSET(4, e16, m1); + VLOAD_U16(v2, 0, 2, 4, 6); + volatile uint16_t OUP[] = {0xdead, 0xbeef, 0xdead, 0xbeef}; + VLOAD_U16(v1, 0xffff, 0x0000, 0xf0f0, 0x0f0f); + __asm__ volatile("vsxei16.v v1, (%0), v2" ::"r"(OUP)); + VEC_EQUAL_U16_RAW(3, OUP, 0xffff, 0x0000, 0xf0f0, 0x0f0f); +} + +// void TEST_CASE4(void) { +// VSET(4,e16,m1); +// VLOAD_16(v2,0,2,4,6); +// volatile int16_t OUP[] = {0xdead, 0xbeef, 0xdead, 0xbeef}; +// VLOAD_16(v1,0xffff,0x0000,0xf0f0,0x0f0f); +// VLOAD_16(v0,12,0,0,0); +// __asm__ volatile("vsxei16.v v1, (%0), v2, v0.t"::"r"(OUP)); +// VEC_EQUAL_16_RAW(4,OUP,0xdead,0xbeef,0xf0f0,0x0f0f); +// } + +void TEST_CASE5(void) { + VSET(4, e32, m1); + VLOAD_U32(v2, 0, 4, 8, 12); + volatile uint32_t OUP[] = {0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef}; + VLOAD_U32(v1, 0xffffffff, 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f); + __asm__ volatile("vsxei32.v v1, (%0), v2" ::"r"(OUP)); + VEC_EQUAL_U32_RAW(5, OUP, 0xffffffff, 0x00000000, 0xf0f0f0f0, 0x0f0f0f0f); +} + +// void TEST_CASE6(void) { +// VSET(4,e32,m1); +// VLOAD_U32(v2,0,4,8,12); +// volatile int32_t OUP[] = {0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef}; +// VLOAD_32(v1,0xffffffff,0x00000000,0xf0f0f0f0,0x0f0f0f0f); +// VLOAD_32(v0,12,0,0,0); +// __asm__ volatile("vsxei32.v v1, (%0), v2, v0.t"::"r"(OUP)); +// VEC_EQUAL_32_RAW(6,OUP,0xdeadbeef,0xdeadbeef,0xf0f0f0f0,0x0f0f0f0f); +// } + +void TEST_CASE7(void) { + VSET(4, e64, m1); + VLOAD_U64(v2, 0, 8, 16, 24); + volatile uint64_t OUP[] = {0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef}; + VLOAD_U64(v1, 0xdeadbeef00000000, 0xdeadbeefffffffff, 0xdeadbeeff0f0f0f0, + 0xdeadbeef0f0f0f0f); + __asm__ volatile("vsxei64.v v1, (%0), v2" ::"r"(OUP)); + VEC_EQUAL_U64_RAW(7, OUP, 0xdeadbeef00000000, 0xdeadbeefffffffff, + 0xdeadbeeff0f0f0f0, 0xdeadbeef0f0f0f0f); +} + +// void TEST_CASE8(void) { +// VSET(4,e64,m1); +// VLOAD_64(v2,0,8,16,24); +// volatile int64_t OUP[] = +// {0xdeadbeefdeadbeef,0xdeadbeefdeadbeef,0xdeadbeefdeadbeef,0xdeadbeefdeadbeef}; +// VLOAD_64(v1,0xdeadbeef00000000,0xdeadbeefffffffff,0xdeadbeeff0f0f0f0,0xdeadbeef0f0f0f0f); +// VLOAD_64(v0,6,0,0,0); +// __asm__ volatile("vsxei64.v v1, (%0), v2, v0.t"::"r"(OUP)); +// VEC_EQUAL_64_RAW(8,OUP,0xdeadbeefdeadbeef,0xdeadbeefffffffff,0xdeadbeeff0f0f0f0,0xdeadbeefdeadbeef); +// } + +int main(void) { + INIT_CHECK(); + enable_vec(); + TEST_CASE1(); + TEST_CASE3(); + TEST_CASE5(); + TEST_CASE7(); + // TEST_CASE2(); + // TEST_CASE4(); + // TEST_CASE6(); + // TEST_CASE8(); + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwadd.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwadd.c new file mode 100644 index 000000000..96777a93c --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwadd.c @@ -0,0 +1,241 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwadd.vv v6, v2, v4"); + VSET(16, e16, m2); + VCMP_U16(1, v6, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwadd.vv v12, v4, v8"); + VSET(16, e32, m4); + VCMP_U32(2, v12, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwadd.vv v24, v8, v16"); + VSET(16, e64, m8); + VCMP_U64(3, v24, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); +} + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + VCLEAR(v7); + asm volatile("vwadd.vv v6, v2, v4, v0.t"); + VSET(16, e16, m2); + VCMP_U16(4, v6, 0, 4, 0, 8, 0, 12, 0, 16, 0, 4, 0, 8, 0, 12, 0, 16); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + VCLEAR(v14); + asm volatile("vwadd.vv v12, v4, v8, v0.t"); + VSET(16, e32, m4); + VCMP_U32(5, v12, 0, 4, 0, 8, 0, 12, 0, 16, 0, 4, 0, 8, 0, 12, 0, 16); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + VCLEAR(v28); + asm volatile("vwadd.vv v24, v8, v16, v0.t"); + VSET(16, e64, m8); + VCMP_U64(6, v24, 0, 4, 0, 8, 0, 12, 0, 16, 0, 4, 0, 8, 0, 12, 0, 16); +} + +void TEST_CASE3(void) { + const uint32_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_8(v2, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwadd.vx v4, v2, %[A]" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_U16(7, v4, 6, 3, 8, 1, 10, -1, 12, -3, 14, -5, 16, -7, 18, -9, 20, -11); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwadd.vx v8, v4, %[A]" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(8, v8, 6, 3, 8, 1, 10, -1, 12, -3, 14, -5, 16, -7, 18, -9, 20, -11); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwadd.vx v16, v8, %[A]" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_U64(9, v16, 6, 3, 8, 1, 10, -1, 12, -3, 14, -5, 16, -7, 18, -9, 20, -11); +} + +void TEST_CASE4(void) { + const uint32_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + VCLEAR(v5); + asm volatile("vwadd.vx v4, v2, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_U16(10, v4, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vwadd.vx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(11, v8, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vwadd.vx v16, v8, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_U64(12, v16, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); +} + +void TEST_CASE5(void) { + VSET(16, e8, m1); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwadd.wv v6, v2, v4"); + VSET(16, e16, m2); + VCMP_U16(13, v6, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); + + VSET(16, e16, m2); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwadd.wv v12, v4, v8"); + VSET(16, e32, m4); + VCMP_U32(14, v12, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); + + VSET(16, e32, m4); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwadd.wv v24, v8, v16"); + VSET(16, e64, m8); + VCMP_U64(15, v24, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); +} + +void TEST_CASE6(void) { + VSET(16, e8, m1); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + VCLEAR(v7); + asm volatile("vwadd.wv v6, v2, v4, v0.t"); + VSET(16, e16, m2); + VCMP_U16(16, v6, 0, 4, 0, 8, 0, 12, 0, 16, 0, 4, 0, 8, 0, 12, 0, 16); + + VSET(16, e16, m2); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + VCLEAR(v14); + asm volatile("vwadd.wv v12, v4, v8, v0.t"); + VSET(16, e32, m4); + VCMP_U32(17, v12, 0, 4, 0, 8, 0, 12, 0, 16, 0, 4, 0, 8, 0, 12, 0, 16); + + VSET(16, e32, m4); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + VCLEAR(v28); + asm volatile("vwadd.wv v24, v8, v16, v0.t"); + VSET(16, e64, m8); + VCMP_U64(18, v24, 0, 4, 0, 8, 0, 12, 0, 16, 0, 4, 0, 8, 0, 12, 0, 16); +} + +void TEST_CASE7(void) { + const uint32_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_16(v2, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwadd.wx v4, v2, %[A]" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_U16(19, v4, 6, 3, 8, 1, 10, -1, 12, -3, 14, -5, 16, -7, 18, -9, 20, -11); + + VSET(16, e16, m2); + VLOAD_32(v4, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwadd.wx v8, v4, %[A]" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(20, v8, 6, 3, 8, 1, 10, -1, 12, -3, 14, -5, 16, -7, 18, -9, 20, -11); + + VSET(16, e32, m4); + VLOAD_64(v8, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwadd.wx v16, v8, %[A]" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_U64(21, v16, 6, 3, 8, 1, 10, -1, 12, -3, 14, -5, 16, -7, 18, -9, 20, + -11); +} + +void TEST_CASE8(void) { + const uint32_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + VCLEAR(v5); + asm volatile("vwadd.wx v4, v2, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_U16(22, v4, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); + + VSET(16, e16, m2); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vwadd.wx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(23, v8, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); + + VSET(16, e32, m4); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vwadd.wx v16, v8, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_U64(24, v16, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + TEST_CASE8(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwaddu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwaddu.c new file mode 100644 index 000000000..27cc4fa48 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwaddu.c @@ -0,0 +1,244 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwaddu.vv v6, v2, v4"); + VSET(16, e16, m2); + VCMP_U16(1, v6, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwaddu.vv v12, v4, v8"); + VSET(16, e32, m4); + VCMP_U32(2, v12, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwaddu.vv v24, v8, v16"); + VSET(16, e64, m8); + VCMP_U64(3, v24, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); +} + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + VCLEAR(v7); + asm volatile("vwaddu.vv v6, v4, v2, v0.t"); + VSET(16, e16, m2); + VCMP_U16(4, v6, 0, 4, 0, 8, 0, 12, 0, 16, 0, 4, 0, 8, 0, 12, 0, 16); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + VCLEAR(v14); + asm volatile("vwaddu.vv v12, v4, v8, v0.t"); + VSET(16, e32, m4); + VCMP_U32(5, v12, 0, 4, 0, 8, 0, 12, 0, 16, 0, 4, 0, 8, 0, 12, 0, 16); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + VCLEAR(v28); + asm volatile("vwaddu.vv v24, v8, v16, v0.t"); + VSET(16, e64, m8); + VCMP_U64(6, v24, 0, 4, 0, 8, 0, 12, 0, 16, 0, 4, 0, 8, 0, 12, 0, 16); +} + +void TEST_CASE3(void) { + const uint32_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_8(v2, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwaddu.vx v6, v2, %[A]" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_U16(7, v6, 6, 259, 8, 257, 10, 255, 12, 253, 14, 251, 16, 249, 18, 247, + 20, 245); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwaddu.vx v8, v4, %[A]" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(8, v8, 6, 65539, 8, 65537, 10, 65535, 12, 65533, 14, 65531, 16, + 65529, 18, 65527, 20, 65525); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwaddu.vx v16, v8, %[A]" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_U64(9, v16, 6, 4294967299, 8, 4294967297, 10, 4294967295, 12, 4294967293, + 14, 4294967291, 16, 4294967289, 18, 4294967287, 20, 4294967285); +} + +void TEST_CASE4(void) { + const uint32_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + VCLEAR(v7); + asm volatile("vwaddu.vx v6, v2, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_U16(10, v6, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vwaddu.vx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(11, v8, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vwaddu.vx v16, v8, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_U64(12, v16, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); +} + +void TEST_CASE5(void) { + VSET(16, e8, m1); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwaddu.wv v6, v2, v4"); + VSET(16, e16, m2); + VCMP_U16(13, v6, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); + + VSET(16, e16, m2); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwaddu.wv v12, v4, v8"); + VSET(16, e32, m4); + VCMP_U32(14, v12, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); + + VSET(16, e32, m4); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwaddu.wv v24, v8, v16"); + VSET(16, e64, m8); + VCMP_U64(15, v24, 2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16); +} + +void TEST_CASE6(void) { + VSET(16, e8, m1); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + VCLEAR(v7); + asm volatile("vwaddu.wv v6, v2, v4, v0.t"); + VSET(16, e16, m2); + VCMP_U16(16, v6, 0, 4, 0, 8, 0, 12, 0, 16, 0, 4, 0, 8, 0, 12, 0, 16); + + VSET(16, e16, m2); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + VCLEAR(v14); + asm volatile("vwaddu.wv v12, v4, v8, v0.t"); + VSET(16, e32, m4); + VCMP_U32(17, v12, 0, 4, 0, 8, 0, 12, 0, 16, 0, 4, 0, 8, 0, 12, 0, 16); + + VSET(16, e32, m4); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + VCLEAR(v28); + asm volatile("vwaddu.wv v24, v8, v16, v0.t"); + VSET(16, e64, m8); + VCMP_U64(18, v24, 0, 4, 0, 8, 0, 12, 0, 16, 0, 4, 0, 8, 0, 12, 0, 16); +} + +void TEST_CASE7(void) { + const uint32_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_16(v2, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwaddu.wx v6, v2, %[A]" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_U16(19, v6, 6, 3, 8, 1, 10, -1, 12, -3, 14, -5, 16, -7, 18, -9, 20, -11); + + VSET(16, e16, m2); + VLOAD_32(v4, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwaddu.wx v8, v4, %[A]" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(20, v8, 6, 3, 8, 1, 10, -1, 12, -3, 14, -5, 16, -7, 18, -9, 20, -11); + + VSET(16, e32, m4); + VLOAD_64(v8, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwaddu.wx v16, v8, %[A]" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_U64(21, v16, 6, 3, 8, 1, 10, -1, 12, -3, 14, -5, 16, -7, 18, -9, 20, + -11); +} + +void TEST_CASE8(void) { + const uint32_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + VCLEAR(v7); + asm volatile("vwaddu.wx v6, v2, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_U16(22, v6, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); + + VSET(16, e16, m2); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vwaddu.wx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(23, v8, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); + + VSET(16, e32, m4); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vwaddu.wx v16, v8, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_U64(24, v16, 0, 7, 0, 9, 0, 11, 0, 13, 0, 7, 0, 9, 0, 11, 0, 13); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + TEST_CASE8(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmacc.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmacc.c new file mode 100644 index 000000000..c7adbaf2f --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmacc.c @@ -0,0 +1,248 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e16, m2); + VLOAD_16(v6, 0x460f, 0x1c3e, 0xa322, 0xa7de, 0xd343, 0xa068, 0xf7a8, 0x3a62, + 0x3f7f, 0x0ae0, 0x0e38, 0x57fe, 0xdc97, 0x61e5, 0xe3f4, 0xb1bd); + VSET(16, e8, m1); + VLOAD_8(v2, 0x19, 0x87, 0x46, 0xf5, 0x3d, 0x66, 0xd7, 0xcf, 0x9f, 0x73, 0x35, + 0x92, 0xb4, 0xc4, 0xdb, 0x1a); + VLOAD_8(v4, 0xd0, 0x62, 0xb7, 0xd9, 0x39, 0xdf, 0x3e, 0x3d, 0xa2, 0xbb, 0xf1, + 0xba, 0xe2, 0xd7, 0x51, 0x5d); + asm volatile("vwmacc.vv v6, v2, v4"); + VSET(16, e16, m2); + VCMP_I16(1, v6, 0x415f, 0xedec, 0x8f2c, 0xa98b, 0xe0d8, 0x9342, 0xedba, + 0x2eb5, 0x631d, 0xebe1, 0x0b1d, 0x7612, 0xe57f, 0x6b81, 0xd83f, + 0xbb2f); + + VSET(16, e32, m4); + VLOAD_32(v12, 0x1d5e4130, 0x9a736c84, 0xe2c407c1, 0x62baf7c8, 0xc157159f, + 0x6cea275d, 0x0c385a3e, 0xf8f640d1, 0x484e89df, 0xb7720e91, + 0x17a7a4cf, 0x9cba6dac, 0x177e67d2, 0x491950da, 0x5b48691f, + 0x03289e10); + VSET(16, e16, m2); + VLOAD_16(v4, 0x6930, 0x239f, 0x2214, 0x555e, 0x9868, 0x02e7, 0x784f, 0x8c32, + 0xe8d1, 0xe941, 0xaaaf, 0x4833, 0xc773, 0x6156, 0xdad9, 0x02a5); + VLOAD_16(v8, 0xe798, 0x1fe5, 0xca4f, 0xb93c, 0xafe4, 0x5641, 0x4848, 0x82a3, + 0x6065, 0x1385, 0x5a53, 0x3318, 0xd488, 0xb1cf, 0x5142, 0x0277); + asm volatile("vwmacc.vv v12, v4, v8"); + VSET(16, e32, m4); + VCMP_I32(2, v12, 0x135705b0, 0x9ee38abf, 0xdb9e53ed, 0x4b21e7d0, 0xe1c1ea3f, + 0x6de47e04, 0x2e306876, 0x31abe8a7, 0x3f93c454, 0xb5b61056, + 0xf98d818c, 0xab235b74, 0x211898ea, 0x2b5e7b64, 0x4f7d7e11, + 0x032f22c3); + + VSET(16, e64, m8); + VLOAD_64(v24, 0x37abc1433be408eb, 0xb3af312be2d38e09, 0x3a99dc46913b03d2, + 0xb2cca27c11815d4d, 0x456749124aaf479a, 0xc11d5ef0eaa5ee72, + 0x1e6a624541e03978, 0x36ce0e391abb8a91, 0x552a61c1f7116723, + 0x621ae1e17b7074c2, 0x4c3f1888b5df72b9, 0xde3961024df8c2cf, + 0x37cd59f214853904, 0xe76372440eb37d3d, 0x0f0ff8cee2000142, + 0x061e905b827b9818); + VSET(16, e32, m4); + VLOAD_32(v8, 0xb5c0475b, 0xda0c4af7, 0xa939123e, 0xb7261aa3, 0x510b75c1, + 0x7d5e66d9, 0x3b263bb7, 0xc35c07a0, 0x03b0bb28, 0xba423d88, + 0xb4ddeabb, 0x97b1e0ce, 0x01d07d01, 0x16174f78, 0x40c6b24f, + 0x7fab39a9); + VLOAD_32(v16, 0x376ce1ba, 0x9cc53665, 0x9292669b, 0xcaec0663, 0x174f60ba, + 0x5fc79836, 0x6597295d, 0x737b18f1, 0x8cb86656, 0x044f320e, + 0x2a881643, 0x2e1a8f59, 0xfdc331d1, 0xca03d155, 0x0a51ebfe, + 0xcac2c353); + asm volatile("vwmacc.vv v24, v8, v16"); + VSET(16, e64, m8); + VCMP_I64(3, v24, 0x27987c3defb2dc09, 0xc2652748b5903b7c, 0x5fb1b6348769c35c, + 0xc1e76e2cf6217c56, 0x4cc871cf26ba35d4, 0xf0052607e34f7838, + 0x35e364f04a4539f3, 0x1b733cf52ef5b831, 0x5380f57403c23693, + 0x60ee57a5b80c6232, 0x3fc390677f77f3aa, 0xcb708510404efc6d, + 0x37c94aa4ac6b77d5, 0xe2badbd70ab9d815, 0x11ac765b0dd270a4, + 0xeb91935c5ffd04e3); +} + +void TEST_CASE2() { + VSET(16, e16, m2); + VLOAD_16(v6, 0x460f, 0x1c3e, 0xa322, 0xa7de, 0xd343, 0xa068, 0xf7a8, 0x3a62, + 0x3f7f, 0x0ae0, 0x0e38, 0x57fe, 0xdc97, 0x61e5, 0xe3f4, 0xb1bd); + VSET(16, e8, m1); + VLOAD_8(v2, 0x19, 0x87, 0x46, 0xf5, 0x3d, 0x66, 0xd7, 0xcf, 0x9f, 0x73, 0x35, + 0x92, 0xb4, 0xc4, 0xdb, 0x1a); + VLOAD_8(v4, 0xd0, 0x62, 0xb7, 0xd9, 0x39, 0xdf, 0x3e, 0x3d, 0xa2, 0xbb, 0xf1, + 0xba, 0xe2, 0xd7, 0x51, 0x5d); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmacc.vv v6, v2, v4, v0.t"); + VSET(16, e16, m2); + VCMP_I16(4, v6, 0x460f, 0xedec, 0xa322, 0xa98b, 0xd343, 0x9342, 0xf7a8, + 0x2eb5, 0x3f7f, 0xebe1, 0x0e38, 0x7612, 0xdc97, 0x6b81, 0xe3f4, + 0xbb2f); + + VSET(16, e32, m4); + VLOAD_32(v12, 0x1d5e4130, 0x9a736c84, 0xe2c407c1, 0x62baf7c8, 0xc157159f, + 0x6cea275d, 0x0c385a3e, 0xf8f640d1, 0x484e89df, 0xb7720e91, + 0x17a7a4cf, 0x9cba6dac, 0x177e67d2, 0x491950da, 0x5b48691f, + 0x03289e10); + VSET(16, e16, m2); + VLOAD_16(v4, 0x6930, 0x239f, 0x2214, 0x555e, 0x9868, 0x02e7, 0x784f, 0x8c32, + 0xe8d1, 0xe941, 0xaaaf, 0x4833, 0xc773, 0x6156, 0xdad9, 0x02a5); + VLOAD_16(v8, 0xe798, 0x1fe5, 0xca4f, 0xb93c, 0xafe4, 0x5641, 0x4848, 0x82a3, + 0x6065, 0x1385, 0x5a53, 0x3318, 0xd488, 0xb1cf, 0x5142, 0x0277); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmacc.vv v12, v4, v8, v0.t"); + VSET(16, e32, m4); + VCMP_I32(5, v12, 0x1d5e4130, 0x9ee38abf, 0xe2c407c1, 0x4b21e7d0, 0xc157159f, + 0x6de47e04, 0x0c385a3e, 0x31abe8a7, 0x484e89df, 0xb5b61056, + 0x17a7a4cf, 0xab235b74, 0x177e67d2, 0x2b5e7b64, 0x5b48691f, + 0x032f22c3); + + VSET(16, e64, m8); + VLOAD_64(v24, 0x37abc1433be408eb, 0xb3af312be2d38e09, 0x3a99dc46913b03d2, + 0xb2cca27c11815d4d, 0x456749124aaf479a, 0xc11d5ef0eaa5ee72, + 0x1e6a624541e03978, 0x36ce0e391abb8a91, 0x552a61c1f7116723, + 0x621ae1e17b7074c2, 0x4c3f1888b5df72b9, 0xde3961024df8c2cf, + 0x37cd59f214853904, 0xe76372440eb37d3d, 0x0f0ff8cee2000142, + 0x061e905b827b9818); + VSET(16, e32, m4); + VLOAD_32(v8, 0xb5c0475b, 0xda0c4af7, 0xa939123e, 0xb7261aa3, 0x510b75c1, + 0x7d5e66d9, 0x3b263bb7, 0xc35c07a0, 0x03b0bb28, 0xba423d88, + 0xb4ddeabb, 0x97b1e0ce, 0x01d07d01, 0x16174f78, 0x40c6b24f, + 0x7fab39a9); + VLOAD_32(v16, 0x376ce1ba, 0x9cc53665, 0x9292669b, 0xcaec0663, 0x174f60ba, + 0x5fc79836, 0x6597295d, 0x737b18f1, 0x8cb86656, 0x044f320e, + 0x2a881643, 0x2e1a8f59, 0xfdc331d1, 0xca03d155, 0x0a51ebfe, + 0xcac2c353); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmacc.vv v24, v8, v16, v0.t"); + VSET(16, e64, m8); + VCMP_I64(6, v24, 0x37abc1433be408eb, 0xc2652748b5903b7c, 0x3a99dc46913b03d2, + 0xc1e76e2cf6217c56, 0x456749124aaf479a, 0xf0052607e34f7838, + 0x1e6a624541e03978, 0x1b733cf52ef5b831, 0x552a61c1f7116723, + 0x60ee57a5b80c6232, 0x4c3f1888b5df72b9, 0xcb708510404efc6d, + 0x37cd59f214853904, 0xe2badbd70ab9d815, 0x0f0ff8cee2000142, + 0xeb91935c5ffd04e3); +} + +void TEST_CASE3() { + VSET(16, e16, m2); + VLOAD_16(v6, 0x17db, 0x9069, 0x8e1f, 0x3584, 0xbb3d, 0x39b2, 0x82cf, 0x015b, + 0xd556, 0xd603, 0x85d1, 0x66a6, 0x4e3e, 0xb965, 0xaa7b, 0x9d27); + VSET(16, e8, m1); + int64_t scalar = 5; + VLOAD_8(v4, 0x50, 0x56, 0x94, 0x1e, 0x09, 0x8f, 0xe1, 0x9e, 0x86, 0x97, 0x71, + 0x5e, 0x55, 0x09, 0xdd, 0x23); + asm volatile("vwmacc.vx v6, %[A], v4" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_I16(7, v6, 0x196b, 0x9217, 0x8c03, 0x361a, 0xbb6a, 0x377d, 0x8234, + 0xff71, 0xd2f4, 0xd3f6, 0x8806, 0x687c, 0x4fe7, 0xb992, 0xa9cc, + 0x9dd6); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xc9b9ade8, 0xfc9c14a8, 0xe1ace4f7, 0x43ea8b48, 0x3ab3025c, + 0xe545695b, 0x538304ce, 0xf430c148, 0xd126fac1, 0xbf51d251, + 0x85ebc0a4, 0x2167faaf, 0x0a2e18cc, 0x0ae19395, 0x03cc9899, + 0x05524f83); + VSET(16, e16, m2); + scalar = -5383; + VLOAD_16(v4, 0x4324, 0xd762, 0xc34b, 0x6f67, 0x5134, 0x4d9d, 0xfa05, 0xacb7, + 0xb7d2, 0xb079, 0x5bb2, 0x7949, 0x51df, 0xbadd, 0xee81, 0x3b49); + asm volatile("vwmacc.vx v8, %[A], v4" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_I32(8, v8, 0xc435e3ec, 0xfff226fa, 0xe6a966ea, 0x3ac40c77, 0x340785f0, + 0xdee56910, 0x5400c5ab, 0xfb080547, 0xd714ba03, 0xc5da1202, + 0x7e63a4c6, 0x1771acb0, 0x037490b3, 0x108f568a, 0x053c7e12, + 0x0073b384); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xc3afd90f697a742a, 0x585e39767c2959ab, 0xfd5f5c31e16d95ba, + 0x2c39235d58ff74a1, 0x4a793d202092aeac, 0x6d31f07b7bdfb6ea, + 0x902b8e28be41b10d, 0x89114b9383c4b511, 0x1f9a7e912f5a51f0, + 0x5494b9380432890c, 0xfd260f5f1fc1eb45, 0x80381e728c1baa95, + 0xa6be6d48744a823b, 0xd37b8ae766a82bf8, 0x7992c128f1c1f6ab, + 0xbeca06f79871e7e8); + VSET(16, e32, m4); + scalar = 6474219; + VLOAD_32(v8, 0x56545434, 0x99cd1438, 0xa1d42f8a, 0x3500b207, 0x642cd563, + 0x7405746d, 0xe92c3246, 0xdab496dc, 0xcbe26107, 0x6bb989c7, + 0xc8542e0c, 0x5849a179, 0x04aac7de, 0x7b5ce579, 0x0ce6e7ea, + 0x77402b10); + asm volatile("vwmacc.vx v16, %[A], v8" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_I64(9, v16, 0xc3d1296f1ae893e6, 0x5836c95c6dbae113, 0xfd3b05253b5c9368, + 0x2c4d976fb318600e, 0x4a9fe54cf92b4b8d, 0x6d5eb614d7052bf9, + 0x9022bf12bc18cd4f, 0x8902e74ad235ed05, 0x1f86621f3b05e25d, + 0x54be4b3652df41b9, 0xfd1093afbdc79c49, 0x805a304537cce5a8, + 0xa6c03a5756f94905, 0xd3ab25c46d6cd30b, 0x7997bbbadd639479, + 0xbef80b96ee48ff98); +} + +void TEST_CASE4() { + VSET(16, e16, m2); + VLOAD_16(v6, 0x17db, 0x9069, 0x8e1f, 0x3584, 0xbb3d, 0x39b2, 0x82cf, 0x015b, + 0xd556, 0xd603, 0x85d1, 0x66a6, 0x4e3e, 0xb965, 0xaa7b, 0x9d27); + VSET(16, e8, m1); + int64_t scalar = 5; + VLOAD_8(v4, 0x50, 0x56, 0x94, 0x1e, 0x09, 0x8f, 0xe1, 0x9e, 0x86, 0x97, 0x71, + 0x5e, 0x55, 0x09, 0xdd, 0x23); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmacc.vx v6, %[A], v4, v0.t" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_I16(10, v6, 0x17db, 0x9217, 0x8e1f, 0x361a, 0xbb3d, 0x377d, 0x82cf, + 0xff71, 0xd556, 0xd3f6, 0x85d1, 0x687c, 0x4e3e, 0xb992, 0xaa7b, + 0x9dd6); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xc9b9ade8, 0xfc9c14a8, 0xe1ace4f7, 0x43ea8b48, 0x3ab3025c, + 0xe545695b, 0x538304ce, 0xf430c148, 0xd126fac1, 0xbf51d251, + 0x85ebc0a4, 0x2167faaf, 0x0a2e18cc, 0x0ae19395, 0x03cc9899, + 0x05524f83); + VSET(16, e16, m2); + scalar = -5383; + VLOAD_16(v4, 0x4324, 0xd762, 0xc34b, 0x6f67, 0x5134, 0x4d9d, 0xfa05, 0xacb7, + 0xb7d2, 0xb079, 0x5bb2, 0x7949, 0x51df, 0xbadd, 0xee81, 0x3b49); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmacc.vx v8, %[A], v4, v0.t" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_I32(11, v8, 0xc9b9ade8, 0xfff226fa, 0xe1ace4f7, 0x3ac40c77, 0x3ab3025c, + 0xdee56910, 0x538304ce, 0xfb080547, 0xd126fac1, 0xc5da1202, + 0x85ebc0a4, 0x1771acb0, 0x0a2e18cc, 0x108f568a, 0x03cc9899, + 0x0073b384); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xc3afd90f697a742a, 0x585e39767c2959ab, 0xfd5f5c31e16d95ba, + 0x2c39235d58ff74a1, 0x4a793d202092aeac, 0x6d31f07b7bdfb6ea, + 0x902b8e28be41b10d, 0x89114b9383c4b511, 0x1f9a7e912f5a51f0, + 0x5494b9380432890c, 0xfd260f5f1fc1eb45, 0x80381e728c1baa95, + 0xa6be6d48744a823b, 0xd37b8ae766a82bf8, 0x7992c128f1c1f6ab, + 0xbeca06f79871e7e8); + VSET(16, e32, m4); + scalar = 6474219; + VLOAD_32(v8, 0x56545434, 0x99cd1438, 0xa1d42f8a, 0x3500b207, 0x642cd563, + 0x7405746d, 0xe92c3246, 0xdab496dc, 0xcbe26107, 0x6bb989c7, + 0xc8542e0c, 0x5849a179, 0x04aac7de, 0x7b5ce579, 0x0ce6e7ea, + 0x77402b10); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmacc.vx v16, %[A], v8, v0.t" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_I64(12, v16, 0xc3afd90f697a742a, 0x5836c95c6dbae113, 0xfd5f5c31e16d95ba, + 0x2c4d976fb318600e, 0x4a793d202092aeac, 0x6d5eb614d7052bf9, + 0x902b8e28be41b10d, 0x8902e74ad235ed05, 0x1f9a7e912f5a51f0, + 0x54be4b3652df41b9, 0xfd260f5f1fc1eb45, 0x805a304537cce5a8, + 0xa6be6d48744a823b, 0xd3ab25c46d6cd30b, 0x7992c128f1c1f6ab, + 0xbef80b96ee48ff98); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmaccsu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmaccsu.c new file mode 100644 index 000000000..c2368776e --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmaccsu.c @@ -0,0 +1,248 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e16, m2); + VLOAD_16(v6, 0x30dc, 0x7235, 0xd5f6, 0xa008, 0x6e79, 0xa159, 0xa05c, 0x5914, + 0xd06f, 0x69c5, 0x9475, 0x5625, 0xa5bd, 0x7be7, 0x823c, 0x5fb2); + VSET(16, e8, m1); + VLOAD_8(v2, 0xb6, 0xbb, 0xb6, 0x57, 0xf9, 0x7c, 0xbf, 0x62, 0x1a, 0xeb, 0xa4, + 0x34, 0xde, 0x96, 0x80, 0xe6); + VLOAD_8(v4, 0x26, 0xea, 0xe8, 0x85, 0x2e, 0xf1, 0x46, 0x8f, 0x68, 0x29, 0xbb, + 0x9b, 0xec, 0x5c, 0x8e, 0x77); + asm volatile("vwmaccsu.vv v6, v2, v4"); + VSET(16, e16, m2); + VCMP_I16(1, v6, 0x25e0, 0x3323, 0x92e6, 0xcd3b, 0x6d37, 0x1615, 0x8e96, + 0x8fd2, 0xdaff, 0x6668, 0x5141, 0x75a1, 0x8665, 0x55cf, 0x3b3c, + 0x539c); + + VSET(16, e32, m4); + VLOAD_32(v12, 0xdbc5b23d, 0x86bd7dad, 0xb744b5c2, 0xc32f4a47, 0x237edfc4, + 0x5d6e851a, 0xbd3110cd, 0x18c61b57, 0x7ade2943, 0x7e4f5ed6, + 0x90e5ba77, 0xce45b744, 0x82d1976e, 0xa88bb4e1, 0x989fbb9a, + 0xab29da17); + VSET(16, e16, m2); + VLOAD_16(v4, 0x23fb, 0xcee7, 0xa704, 0xc00f, 0xed9f, 0x2cf0, 0x4b53, 0xc0ba, + 0x775b, 0x557c, 0x57b7, 0xbb06, 0xf9ba, 0x178f, 0xec73, 0x8240); + VLOAD_16(v8, 0xad9d, 0x104d, 0xdc56, 0x96af, 0x8c68, 0x1d25, 0x2d70, 0x467a, + 0xc27c, 0x96e2, 0x1c85, 0xe8b6, 0xf7e0, 0xd069, 0x0bca, 0x4f36); + asm volatile("vwmaccsu.vv v12, v4, v8"); + VSET(16, e32, m4); + VCMP_I32(2, v12, 0xf42c622c, 0x839d2928, 0x6aae411a, 0x9d8c5e88, 0x196a5c5c, + 0x628c33ca, 0xca8f9c1d, 0x075acffb, 0xd58aef57, 0xb0b17e4e, + 0x9aab508a, 0x8f921d88, 0x7cbe902e, 0xbbb98e88, 0x97b93f58, + 0x84411397); + + VSET(16, e64, m8); + VLOAD_64(v24, 0xf8e162af4fefb46a, 0x8e859cff3b076a9d, 0xa7279ec622e749eb, + 0x67bbdace6d6bf1a9, 0xf2090d8d3b00e5b8, 0x9259e92430c5a337, + 0x7cc51e4cc8fd46c6, 0xe5c6946a8e9787fd, 0x0d36e747a75534cc, + 0x9c1a70c0989504f9, 0xa7b0f15e7b51c000, 0x4566f8ffa299d104, + 0xf385b581a4c1c25b, 0xb067f1a7621f9cdd, 0x54ffc96dc442d7b5, + 0x3fc18a6aa65ab8d5); + VSET(16, e32, m4); + VLOAD_32(v8, 0x189138d0, 0xe2f3f48f, 0x58448029, 0x44298d07, 0x6f6b15cf, + 0x13e9cf30, 0x23b6edb8, 0xd532420a, 0xdab302ee, 0xa5e6854e, + 0x538f91b0, 0xc5d4db0e, 0xbc6d31b3, 0x754d418c, 0x96198b07, + 0xf54f785a); + VLOAD_32(v16, 0x52d1517d, 0xa592227e, 0xbb122792, 0x531a3046, 0x88193da7, + 0x13db3502, 0x64efb3f9, 0x55c57a21, 0x31cd5a79, 0x5c0b4048, + 0x899cfb88, 0xfab9de9d, 0x6fa41232, 0x9462cda3, 0x0f8de6ea, + 0x8064029f); + asm volatile("vwmaccsu.vv v24, v8, v16"); + VSET(16, e64, m8); + VCMP_I64(3, v24, 0x00d3fd4343a241fa, 0x7bbc44d6fa22c6ff, 0xe7a7ead9df60a04d, + 0x7ddc4c9a72efd193, 0x2d44ed6d874572c1, 0x93e550a88e8e3197, + 0x8ada040c3cea26be, 0xd76f3f99213ccf47, 0x05f53f01db8f434a, + 0x7bb552f0a51802e9, 0xd49c03ec9aaeb580, 0x0c6e9b2885384c9a, + 0xd60dbf3493400d51, 0xf465e158f7657501, 0x4e90951ee65f361b, + 0x3a651986a4cf2cbb); +} + +void TEST_CASE2() { + VSET(16, e16, m2); + VLOAD_16(v6, 0x30dc, 0x7235, 0xd5f6, 0xa008, 0x6e79, 0xa159, 0xa05c, 0x5914, + 0xd06f, 0x69c5, 0x9475, 0x5625, 0xa5bd, 0x7be7, 0x823c, 0x5fb2); + VSET(16, e8, m1); + VLOAD_8(v2, 0xb6, 0xbb, 0xb6, 0x57, 0xf9, 0x7c, 0xbf, 0x62, 0x1a, 0xeb, 0xa4, + 0x34, 0xde, 0x96, 0x80, 0xe6); + VLOAD_8(v4, 0x26, 0xea, 0xe8, 0x85, 0x2e, 0xf1, 0x46, 0x8f, 0x68, 0x29, 0xbb, + 0x9b, 0xec, 0x5c, 0x8e, 0x77); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmaccsu.vv v6, v2, v4, v0.t"); + VSET(16, e16, m2); + VCMP_I16(4, v6, 0x30dc, 0x3323, 0xd5f6, 0xcd3b, 0x6e79, 0x1615, 0xa05c, + 0x8fd2, 0xd06f, 0x6668, 0x9475, 0x75a1, 0xa5bd, 0x55cf, 0x823c, + 0x539c); + + VSET(16, e32, m4); + VLOAD_32(v12, 0xdbc5b23d, 0x86bd7dad, 0xb744b5c2, 0xc32f4a47, 0x237edfc4, + 0x5d6e851a, 0xbd3110cd, 0x18c61b57, 0x7ade2943, 0x7e4f5ed6, + 0x90e5ba77, 0xce45b744, 0x82d1976e, 0xa88bb4e1, 0x989fbb9a, + 0xab29da17); + VSET(16, e16, m2); + VLOAD_16(v4, 0x23fb, 0xcee7, 0xa704, 0xc00f, 0xed9f, 0x2cf0, 0x4b53, 0xc0ba, + 0x775b, 0x557c, 0x57b7, 0xbb06, 0xf9ba, 0x178f, 0xec73, 0x8240); + VLOAD_16(v8, 0xad9d, 0x104d, 0xdc56, 0x96af, 0x8c68, 0x1d25, 0x2d70, 0x467a, + 0xc27c, 0x96e2, 0x1c85, 0xe8b6, 0xf7e0, 0xd069, 0x0bca, 0x4f36); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmaccsu.vv v12, v4, v8, v0.t"); + VSET(16, e32, m4); + VCMP_I32(5, v12, 0xdbc5b23d, 0x839d2928, 0xb744b5c2, 0x9d8c5e88, 0x237edfc4, + 0x628c33ca, 0xbd3110cd, 0x075acffb, 0x7ade2943, 0xb0b17e4e, + 0x90e5ba77, 0x8f921d88, 0x82d1976e, 0xbbb98e88, 0x989fbb9a, + 0x84411397); + + VSET(16, e64, m8); + VLOAD_64(v24, 0xf8e162af4fefb46a, 0x8e859cff3b076a9d, 0xa7279ec622e749eb, + 0x67bbdace6d6bf1a9, 0xf2090d8d3b00e5b8, 0x9259e92430c5a337, + 0x7cc51e4cc8fd46c6, 0xe5c6946a8e9787fd, 0x0d36e747a75534cc, + 0x9c1a70c0989504f9, 0xa7b0f15e7b51c000, 0x4566f8ffa299d104, + 0xf385b581a4c1c25b, 0xb067f1a7621f9cdd, 0x54ffc96dc442d7b5, + 0x3fc18a6aa65ab8d5); + VSET(16, e32, m4); + VLOAD_32(v8, 0x189138d0, 0xe2f3f48f, 0x58448029, 0x44298d07, 0x6f6b15cf, + 0x13e9cf30, 0x23b6edb8, 0xd532420a, 0xdab302ee, 0xa5e6854e, + 0x538f91b0, 0xc5d4db0e, 0xbc6d31b3, 0x754d418c, 0x96198b07, + 0xf54f785a); + VLOAD_32(v16, 0x52d1517d, 0xa592227e, 0xbb122792, 0x531a3046, 0x88193da7, + 0x13db3502, 0x64efb3f9, 0x55c57a21, 0x31cd5a79, 0x5c0b4048, + 0x899cfb88, 0xfab9de9d, 0x6fa41232, 0x9462cda3, 0x0f8de6ea, + 0x8064029f); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmaccsu.vv v24, v8, v16, v0.t"); + VSET(16, e64, m8); + VCMP_I64(6, v24, 0xf8e162af4fefb46a, 0x7bbc44d6fa22c6ff, 0xa7279ec622e749eb, + 0x7ddc4c9a72efd193, 0xf2090d8d3b00e5b8, 0x93e550a88e8e3197, + 0x7cc51e4cc8fd46c6, 0xd76f3f99213ccf47, 0x0d36e747a75534cc, + 0x7bb552f0a51802e9, 0xa7b0f15e7b51c000, 0x0c6e9b2885384c9a, + 0xf385b581a4c1c25b, 0xf465e158f7657501, 0x54ffc96dc442d7b5, + 0x3a651986a4cf2cbb); +} + +void TEST_CASE3() { + VSET(16, e16, m2); + VLOAD_16(v6, 0xadd2, 0x2112, 0xbbc6, 0xd113, 0xc6f7, 0xbd07, 0xfd9a, 0x0c0e, + 0xe110, 0xe81b, 0xb432, 0x5c2c, 0x4da9, 0x8c48, 0x6f94, 0x6250); + VSET(16, e8, m1); + int64_t scalar = 5; + VLOAD_8(v4, 0xfe, 0xd1, 0xc2, 0x3b, 0x79, 0x2f, 0xf5, 0xe8, 0x7f, 0x4b, 0x64, + 0x57, 0x2b, 0x4f, 0x4e, 0xda); + asm volatile("vwmaccsu.vx v6, %[A], v4" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_I16(7, v6, 0xb2c8, 0x2527, 0xbf90, 0xd23a, 0xc954, 0xbdf2, 0x0263, + 0x1096, 0xe38b, 0xe992, 0xb626, 0x5ddf, 0x4e80, 0x8dd3, 0x711a, + 0x6692); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x39d3ea89, 0x451d8e1a, 0x83edb2d7, 0xc1919ab3, 0x243c3d4d, + 0xd4745be8, 0x50a58cbe, 0x53b75e9f, 0x2a648b62, 0xd74ce1cf, + 0xa2c6a2e7, 0xc30eadb0, 0x7a908fb9, 0xd4455b56, 0x48109ee2, + 0x2f5b537a); + VSET(16, e16, m2); + scalar = -5383; + VLOAD_16(v4, 0x29f8, 0x6958, 0x6635, 0x03a0, 0x07bc, 0x4881, 0x7d4e, 0x37e3, + 0x8370, 0x405f, 0x1f0d, 0x1252, 0xacf1, 0x06ee, 0x790d, 0x73af); + asm volatile("vwmaccsu.vx v8, %[A], v4" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_I32(8, v8, 0x36616cc1, 0x3c7674b2, 0x7b888e64, 0xc1456153, 0x23999b29, + 0xce7fcb61, 0x465ab99c, 0x4f20386a, 0x1f98c352, 0xd2035436, + 0xa039b88c, 0xc18d7372, 0x6c5c1022, 0xd3b3a4d4, 0x3e1f3e87, + 0x25daceb1); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xd860771ff910e8a1, 0xd8de9ddf3be66e90, 0xe55e25348ff4c406, + 0x6ee24d9ebeda1c54, 0x78437fc8299017d1, 0x46a2833ed69dec1d, + 0x0331761dcc2485b7, 0x99c00b7ecbecb5bf, 0xd68d230a95510605, + 0x0e82f981980d47c8, 0x7bb0e1dd5f273626, 0x044cc7c24be55121, + 0x341b063e01c35796, 0xb77a96fdf1826215, 0xdcbd3fe115470433, + 0xc2797417b552325b); + VSET(16, e32, m4); + scalar = 6474219; + VLOAD_32(v8, 0x53046c2d, 0x3b0c65ed, 0x6565f981, 0xaa4c1d70, 0x0a18c71e, + 0xbc91ff46, 0xa52c32d1, 0x73cca3fc, 0xb2a7e5d2, 0x1939af0a, + 0xe4fdb1f5, 0x783f5c5d, 0x3514c875, 0xce346d04, 0x68047428, + 0x72ca548f); + asm volatile("vwmaccsu.vx v16, %[A], v8" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_I64(9, v16, 0xd880804c10498af0, 0xd8f567313109141f, 0xe585463838d91671, + 0x6f2405141ab61224, 0x7847653b1c216e5b, 0x46eb47de4add375f, + 0x03713350939f4492, 0x99ecbb2b8c001a13, 0xd6d2143a6346dfcb, + 0x0e8cb57c0c43cff6, 0x7c093f98e48cef0d, 0x047b2edbc92e1f80, + 0x342f8210ca0537fd, 0xb7ca29b420c298c1, 0xdce563a087e10ceb, + 0xc2a5c016503018a0); +} + +void TEST_CASE4() { + VSET(16, e16, m2); + VLOAD_16(v6, 0xadd2, 0x2112, 0xbbc6, 0xd113, 0xc6f7, 0xbd07, 0xfd9a, 0x0c0e, + 0xe110, 0xe81b, 0xb432, 0x5c2c, 0x4da9, 0x8c48, 0x6f94, 0x6250); + VSET(16, e8, m1); + int64_t scalar = 5; + VLOAD_8(v4, 0xfe, 0xd1, 0xc2, 0x3b, 0x79, 0x2f, 0xf5, 0xe8, 0x7f, 0x4b, 0x64, + 0x57, 0x2b, 0x4f, 0x4e, 0xda); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmaccsu.vx v6, %[A], v4, v0.t" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_I16(10, v6, 0xadd2, 0x2527, 0xbbc6, 0xd23a, 0xc6f7, 0xbdf2, 0xfd9a, + 0x1096, 0xe110, 0xe992, 0xb432, 0x5ddf, 0x4da9, 0x8dd3, 0x6f94, + 0x6692); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x39d3ea89, 0x451d8e1a, 0x83edb2d7, 0xc1919ab3, 0x243c3d4d, + 0xd4745be8, 0x50a58cbe, 0x53b75e9f, 0x2a648b62, 0xd74ce1cf, + 0xa2c6a2e7, 0xc30eadb0, 0x7a908fb9, 0xd4455b56, 0x48109ee2, + 0x2f5b537a); + VSET(16, e16, m2); + scalar = -5383; + VLOAD_16(v4, 0x29f8, 0x6958, 0x6635, 0x03a0, 0x07bc, 0x4881, 0x7d4e, 0x37e3, + 0x8370, 0x405f, 0x1f0d, 0x1252, 0xacf1, 0x06ee, 0x790d, 0x73af); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmaccsu.vx v8, %[A], v4, v0.t" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_I32(11, v8, 0x39d3ea89, 0x3c7674b2, 0x83edb2d7, 0xc1456153, 0x243c3d4d, + 0xce7fcb61, 0x50a58cbe, 0x4f20386a, 0x2a648b62, 0xd2035436, + 0xa2c6a2e7, 0xc18d7372, 0x7a908fb9, 0xd3b3a4d4, 0x48109ee2, + 0x25daceb1); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xd860771ff910e8a1, 0xd8de9ddf3be66e90, 0xe55e25348ff4c406, + 0x6ee24d9ebeda1c54, 0x78437fc8299017d1, 0x46a2833ed69dec1d, + 0x0331761dcc2485b7, 0x99c00b7ecbecb5bf, 0xd68d230a95510605, + 0x0e82f981980d47c8, 0x7bb0e1dd5f273626, 0x044cc7c24be55121, + 0x341b063e01c35796, 0xb77a96fdf1826215, 0xdcbd3fe115470433, + 0xc2797417b552325b); + VSET(16, e32, m4); + scalar = 6474219; + VLOAD_32(v8, 0x53046c2d, 0x3b0c65ed, 0x6565f981, 0xaa4c1d70, 0x0a18c71e, + 0xbc91ff46, 0xa52c32d1, 0x73cca3fc, 0xb2a7e5d2, 0x1939af0a, + 0xe4fdb1f5, 0x783f5c5d, 0x3514c875, 0xce346d04, 0x68047428, + 0x72ca548f); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmaccsu.vx v16, %[A], v8, v0.t" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_I64(12, v16, 0xd860771ff910e8a1, 0xd8f567313109141f, 0xe55e25348ff4c406, + 0x6f2405141ab61224, 0x78437fc8299017d1, 0x46eb47de4add375f, + 0x0331761dcc2485b7, 0x99ecbb2b8c001a13, 0xd68d230a95510605, + 0x0e8cb57c0c43cff6, 0x7bb0e1dd5f273626, 0x047b2edbc92e1f80, + 0x341b063e01c35796, 0xb7ca29b420c298c1, 0xdcbd3fe115470433, + 0xc2a5c016503018a0); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmaccu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmaccu.c new file mode 100644 index 000000000..9f0b04ebc --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmaccu.c @@ -0,0 +1,248 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e16, m2); + VLOAD_16(v6, 0x519d, 0x7122, 0x672c, 0x4d97, 0x436e, 0x3f1f, 0x423d, 0x44e8, + 0x3d7b, 0x5570, 0x1e90, 0x79f4, 0x456b, 0x0283, 0x02b5, 0x6865); + VSET(16, e8, m1); + VLOAD_8(v2, 0xce, 0x96, 0x33, 0x88, 0xf8, 0x3f, 0x0c, 0xde, 0x1e, 0x9d, 0x5a, + 0x75, 0x73, 0x43, 0xd9, 0x43); + VLOAD_8(v4, 0x51, 0x88, 0x16, 0xf6, 0x57, 0xab, 0xd8, 0x26, 0x2e, 0x35, 0x94, + 0xd1, 0xf0, 0xb9, 0x09, 0x8a); + asm volatile("vwmaccu.vv v6, v2, v4"); + VSET(16, e16, m2); + VCMP_U16(1, v6, 0x92cb, 0xc0d2, 0x6b8e, 0xd047, 0x97b6, 0x6934, 0x4c5d, + 0x65dc, 0x42df, 0x75f1, 0x5298, 0xd979, 0xb13b, 0x32ee, 0x0a56, + 0x8c83); + + VSET(16, e32, m4); + VLOAD_32(v12, 0x1f50b763, 0x6d1a7f46, 0x17b8b2b5, 0x6b69c966, 0x25d945cb, + 0x3e6c375b, 0x314db8d3, 0x35ade27d, 0x74fa2d58, 0x735f513d, + 0x3cad4e4d, 0x628eb81a, 0x1c48c2f9, 0x14f08921, 0x77de05bf, + 0x528c354b); + VSET(16, e16, m2); + VLOAD_16(v4, 0x4ed5, 0xcf74, 0x3442, 0x280f, 0x795e, 0x3007, 0xdf3e, 0xb348, + 0x3865, 0xcb59, 0x1291, 0xa04b, 0xc5bd, 0x957f, 0xefe4, 0xe75d); + VLOAD_16(v8, 0x7d39, 0xddd8, 0x17d7, 0x0574, 0x251a, 0x4ce4, 0x4817, 0x9de1, + 0xd773, 0xdcc8, 0xeb92, 0x8fa8, 0x9382, 0x4369, 0xb1c7, 0x9185); + asm volatile("vwmaccu.vv v12, v4, v8"); + VSET(16, e32, m4); + VCMP_U32(2, v12, 0x45e045d0, 0x20e0ad26, 0x1c968423, 0x6c443b32, 0x37702f57, + 0x4cd91197, 0x702b3765, 0xa43e9cc5, 0xa47055b7, 0x22beaac5, + 0x4dc2ffff, 0xbc81ce52, 0x8e38b3f3, 0x3c4e1738, 0x1e7523fb, + 0xd610159c); + + VSET(16, e64, m8); + VLOAD_64(v24, 0x5118504f9237ea08, 0x6b71d4ee88073dde, 0x4420559f61e6927c, + 0x24eea54c6530475c, 0x289211cb16ebbbc9, 0x1a0b2b7644ecd474, + 0x159c16af3e71f736, 0x12dda0d2ca288012, 0x790fab107c1346b6, + 0x589cc8132c869645, 0x323623bba87568ce, 0x5ce2e94d5e335c5d, + 0x6e6b3c19c0d78ca0, 0x0502bed40a0600bc, 0x5ff6f4b3610e009c, + 0x40d6eb0605052915); + VSET(16, e32, m4); + VLOAD_32(v8, 0xd1247b78, 0xfd5d326b, 0x7fe40cf5, 0xfd802d90, 0x9ec23b7e, + 0x67219fe8, 0x9dc7f026, 0x257d8b7f, 0x782bc512, 0x42fa808b, + 0x48d3273d, 0x7ca0371d, 0x06409254, 0xb77ce3ba, 0x28aac174, + 0xd2e4cdbf); + VLOAD_32(v16, 0xab3b5969, 0xe91aa966, 0x336c2f4c, 0xfcc75a99, 0x1854180c, + 0xeec0354b, 0x8b4595bf, 0x9200fb5c, 0x0d627fcf, 0xdf0a8280, + 0x4b5733be, 0x4f3bd496, 0x10f5d788, 0x3499c99d, 0xdeee29dd, + 0x7e8643a4); + asm volatile("vwmaccu.vv v24, v8, v16"); + VSET(16, e64, m8); + VCMP_U64(3, v24, 0xdcfc2b3ca5c34640, 0x52260af0834ff780, 0x5dd0d00a7e576638, + 0x1f3e3a8021c0226c, 0x37a86e0412f255b1, 0x7a39dcd00fe1b56c, + 0x6b728942b1a24190, 0x283f5f30c90c26b6, 0x7f5820d2b91f8e44, + 0x92f7bdf9f97b71c5, 0x47a4ceb4fab8af14, 0x83757ab9b866ab5b, + 0x6ed5464e986dd540, 0x2ab6573b8294b3ce, 0x8360dddc474a95c0, + 0xa91223c6b56bf471); +} + +void TEST_CASE2() { + VSET(16, e16, m2); + VLOAD_16(v6, 0x519d, 0x7122, 0x672c, 0x4d97, 0x436e, 0x3f1f, 0x423d, 0x44e8, + 0x3d7b, 0x5570, 0x1e90, 0x79f4, 0x456b, 0x0283, 0x02b5, 0x6865); + VSET(16, e8, m1); + VLOAD_8(v2, 0xce, 0x96, 0x33, 0x88, 0xf8, 0x3f, 0x0c, 0xde, 0x1e, 0x9d, 0x5a, + 0x75, 0x73, 0x43, 0xd9, 0x43); + VLOAD_8(v4, 0x51, 0x88, 0x16, 0xf6, 0x57, 0xab, 0xd8, 0x26, 0x2e, 0x35, 0x94, + 0xd1, 0xf0, 0xb9, 0x09, 0x8a); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmaccu.vv v6, v2, v4, v0.t"); + VSET(16, e16, m2); + VCMP_U16(4, v6, 0x519d, 0xc0d2, 0x672c, 0xd047, 0x436e, 0x6934, 0x423d, + 0x65dc, 0x3d7b, 0x75f1, 0x1e90, 0xd979, 0x456b, 0x32ee, 0x02b5, + 0x8c83); + + VSET(16, e32, m4); + VLOAD_32(v12, 0x1f50b763, 0x6d1a7f46, 0x17b8b2b5, 0x6b69c966, 0x25d945cb, + 0x3e6c375b, 0x314db8d3, 0x35ade27d, 0x74fa2d58, 0x735f513d, + 0x3cad4e4d, 0x628eb81a, 0x1c48c2f9, 0x14f08921, 0x77de05bf, + 0x528c354b); + VSET(16, e16, m2); + VLOAD_16(v4, 0x4ed5, 0xcf74, 0x3442, 0x280f, 0x795e, 0x3007, 0xdf3e, 0xb348, + 0x3865, 0xcb59, 0x1291, 0xa04b, 0xc5bd, 0x957f, 0xefe4, 0xe75d); + VLOAD_16(v8, 0x7d39, 0xddd8, 0x17d7, 0x0574, 0x251a, 0x4ce4, 0x4817, 0x9de1, + 0xd773, 0xdcc8, 0xeb92, 0x8fa8, 0x9382, 0x4369, 0xb1c7, 0x9185); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmaccu.vv v12, v4, v8, v0.t"); + VSET(16, e32, m4); + VCMP_U32(5, v12, 0x1f50b763, 0x20e0ad26, 0x17b8b2b5, 0x6c443b32, 0x25d945cb, + 0x4cd91197, 0x314db8d3, 0xa43e9cc5, 0x74fa2d58, 0x22beaac5, + 0x3cad4e4d, 0xbc81ce52, 0x1c48c2f9, 0x3c4e1738, 0x77de05bf, + 0xd610159c); + + VSET(16, e64, m8); + VLOAD_64(v24, 0x5118504f9237ea08, 0x6b71d4ee88073dde, 0x4420559f61e6927c, + 0x24eea54c6530475c, 0x289211cb16ebbbc9, 0x1a0b2b7644ecd474, + 0x159c16af3e71f736, 0x12dda0d2ca288012, 0x790fab107c1346b6, + 0x589cc8132c869645, 0x323623bba87568ce, 0x5ce2e94d5e335c5d, + 0x6e6b3c19c0d78ca0, 0x0502bed40a0600bc, 0x5ff6f4b3610e009c, + 0x40d6eb0605052915); + VSET(16, e32, m4); + VLOAD_32(v8, 0xd1247b78, 0xfd5d326b, 0x7fe40cf5, 0xfd802d90, 0x9ec23b7e, + 0x67219fe8, 0x9dc7f026, 0x257d8b7f, 0x782bc512, 0x42fa808b, + 0x48d3273d, 0x7ca0371d, 0x06409254, 0xb77ce3ba, 0x28aac174, + 0xd2e4cdbf); + VLOAD_32(v16, 0xab3b5969, 0xe91aa966, 0x336c2f4c, 0xfcc75a99, 0x1854180c, + 0xeec0354b, 0x8b4595bf, 0x9200fb5c, 0x0d627fcf, 0xdf0a8280, + 0x4b5733be, 0x4f3bd496, 0x10f5d788, 0x3499c99d, 0xdeee29dd, + 0x7e8643a4); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmaccu.vv v24, v8, v16, v0.t"); + VSET(16, e64, m8); + VCMP_U64(6, v24, 0x5118504f9237ea08, 0x52260af0834ff780, 0x4420559f61e6927c, + 0x1f3e3a8021c0226c, 0x289211cb16ebbbc9, 0x7a39dcd00fe1b56c, + 0x159c16af3e71f736, 0x283f5f30c90c26b6, 0x790fab107c1346b6, + 0x92f7bdf9f97b71c5, 0x323623bba87568ce, 0x83757ab9b866ab5b, + 0x6e6b3c19c0d78ca0, 0x2ab6573b8294b3ce, 0x5ff6f4b3610e009c, + 0xa91223c6b56bf471); +} + +void TEST_CASE3() { + VSET(16, e16, m2); + VLOAD_16(v6, 0x6f50, 0x0299, 0x3578, 0x0e45, 0x752b, 0x60c7, 0x7d0c, 0x0943, + 0x3f2d, 0x47bc, 0x4468, 0x616d, 0x5844, 0x3a7d, 0x32fe, 0x7813); + VSET(16, e8, m1); + uint64_t scalar = 5; + VLOAD_8(v4, 0x01, 0xd6, 0x1e, 0x57, 0xcc, 0x31, 0x29, 0x06, 0x5a, 0xab, 0x1e, + 0x0a, 0x97, 0x6f, 0xe0, 0xfc); + asm volatile("vwmaccu.vx v6, %[A], v4" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_U16(7, v6, 0x6f55, 0x06c7, 0x360e, 0x0ff8, 0x7927, 0x61bc, 0x7dd9, + 0x0961, 0x40ef, 0x4b13, 0x44fe, 0x619f, 0x5b37, 0x3ca8, 0x375e, + 0x7cff); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x681721c9, 0x5c07924a, 0x5147143e, 0x14da5785, 0x30a43e20, + 0x3498177e, 0x551df71d, 0x29656468, 0x12550807, 0x7dc95cbd, + 0x2167ff36, 0x194b0d6c, 0x79119a1d, 0x6d77fab6, 0x3e32c755, + 0x6e479bf4); + VSET(16, e16, m2); + scalar = 5383; + VLOAD_16(v4, 0x9752, 0x45a4, 0xfde9, 0xa659, 0x957b, 0x1a3f, 0x2212, 0x5d43, + 0xdc08, 0x1fb8, 0x5e15, 0x08da, 0x0468, 0x4458, 0xe1e2, 0x4ef7); + asm volatile("vwmaccu.vx v8, %[A], v4" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(8, v8, 0x7484ff07, 0x61bfedc6, 0x6622229d, 0x228430f4, 0x3ceb6b7d, + 0x36bffa37, 0x57ea5f9b, 0x310e703d, 0x2467b43f, 0x806452c5, + 0x29224ac9, 0x1a052d62, 0x796e40f5, 0x7315111e, 0x50c07e83, + 0x74c407b5); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x27a0a02f7e9757d4, 0x7ff7bb4d394926a0, 0x09d25e3173571efd, + 0x11661c8ece6711ac, 0x1e5ffff32ed851dd, 0x0698334d63d206a9, + 0x79598c88fd85995f, 0x2fa78b4b7d90a222, 0x7d65cbfdfc7f2e1d, + 0x6c0101ef46924df6, 0x59ff3d4e018b50f4, 0x2c8ec8409f219401, + 0x20b183b4bb89c200, 0x28bee831261ca372, 0x5b9d142326bcef0a, + 0x1c2ad051e4e7281e); + VSET(16, e32, m4); + scalar = 6474219; + VLOAD_32(v8, 0x307dc235, 0x92187481, 0xa69319d1, 0x682b9abe, 0x8bdce4be, + 0x95ec65ce, 0x46915d6f, 0xd59243e6, 0x1d0943e5, 0x3ae27787, + 0x33c32e03, 0x8be66da2, 0x0fc78147, 0x2ce8d421, 0x9c9bc2fb, + 0x10c8c9f7); + asm volatile("vwmaccu.vx v16, %[A], v8" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_U64(9, v16, 0x27b3569317ce3b7b, 0x80301be52297620b, 0x0a12a5e3bda6ead8, + 0x118e4f69e7a94c16, 0x1e95f8d392f37a47, 0x06d20e0c989e38c3, + 0x7974c7d7abd08544, 0x2ff9f5bd60308c44, 0x7d710070e7f94e54, + 0x6c17bb12e5fd05e3, 0x5a1336d961fce8b5, 0x2cc4c4ceefc069b7, + 0x20b79a86595c2d2d, 0x28d03cbe630746bd, 0x5bd98349d9e5fe73, + 0x1c314a6c44597cdb); +} + +void TEST_CASE4() { + VSET(16, e16, m2); + VLOAD_16(v6, 0x6f50, 0x0299, 0x3578, 0x0e45, 0x752b, 0x60c7, 0x7d0c, 0x0943, + 0x3f2d, 0x47bc, 0x4468, 0x616d, 0x5844, 0x3a7d, 0x32fe, 0x7813); + VSET(16, e8, m1); + uint64_t scalar = 5; + VLOAD_8(v4, 0x01, 0xd6, 0x1e, 0x57, 0xcc, 0x31, 0x29, 0x06, 0x5a, 0xab, 0x1e, + 0x0a, 0x97, 0x6f, 0xe0, 0xfc); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmaccu.vx v6, %[A], v4, v0.t" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_U16(10, v6, 0x6f50, 0x06c7, 0x3578, 0x0ff8, 0x752b, 0x61bc, 0x7d0c, + 0x0961, 0x3f2d, 0x4b13, 0x4468, 0x619f, 0x5844, 0x3ca8, 0x32fe, + 0x7cff); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x681721c9, 0x5c07924a, 0x5147143e, 0x14da5785, 0x30a43e20, + 0x3498177e, 0x551df71d, 0x29656468, 0x12550807, 0x7dc95cbd, + 0x2167ff36, 0x194b0d6c, 0x79119a1d, 0x6d77fab6, 0x3e32c755, + 0x6e479bf4); + VSET(16, e16, m2); + scalar = 5383; + VLOAD_16(v4, 0x9752, 0x45a4, 0xfde9, 0xa659, 0x957b, 0x1a3f, 0x2212, 0x5d43, + 0xdc08, 0x1fb8, 0x5e15, 0x08da, 0x0468, 0x4458, 0xe1e2, 0x4ef7); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmaccu.vx v8, %[A], v4, v0.t" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(11, v8, 0x681721c9, 0x61bfedc6, 0x5147143e, 0x228430f4, 0x30a43e20, + 0x36bffa37, 0x551df71d, 0x310e703d, 0x12550807, 0x806452c5, + 0x2167ff36, 0x1a052d62, 0x79119a1d, 0x7315111e, 0x3e32c755, + 0x74c407b5); + + VSET(16, e64, m8); + VLOAD_64(v16, 0x27a0a02f7e9757d4, 0x7ff7bb4d394926a0, 0x09d25e3173571efd, + 0x11661c8ece6711ac, 0x1e5ffff32ed851dd, 0x0698334d63d206a9, + 0x79598c88fd85995f, 0x2fa78b4b7d90a222, 0x7d65cbfdfc7f2e1d, + 0x6c0101ef46924df6, 0x59ff3d4e018b50f4, 0x2c8ec8409f219401, + 0x20b183b4bb89c200, 0x28bee831261ca372, 0x5b9d142326bcef0a, + 0x1c2ad051e4e7281e); + VSET(16, e32, m4); + scalar = 6474219; + VLOAD_32(v8, 0x307dc235, 0x92187481, 0xa69319d1, 0x682b9abe, 0x8bdce4be, + 0x95ec65ce, 0x46915d6f, 0xd59243e6, 0x1d0943e5, 0x3ae27787, + 0x33c32e03, 0x8be66da2, 0x0fc78147, 0x2ce8d421, 0x9c9bc2fb, + 0x10c8c9f7); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmaccu.vx v16, %[A], v8, v0.t" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_U64(12, v16, 0x27a0a02f7e9757d4, 0x80301be52297620b, 0x09d25e3173571efd, + 0x118e4f69e7a94c16, 0x1e5ffff32ed851dd, 0x06d20e0c989e38c3, + 0x79598c88fd85995f, 0x2ff9f5bd60308c44, 0x7d65cbfdfc7f2e1d, + 0x6c17bb12e5fd05e3, 0x59ff3d4e018b50f4, 0x2cc4c4ceefc069b7, + 0x20b183b4bb89c200, 0x28d03cbe630746bd, 0x5b9d142326bcef0a, + 0x1c314a6c44597cdb); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmaccus.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmaccus.c new file mode 100644 index 000000000..0a403bc0b --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmaccus.c @@ -0,0 +1,127 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e16, m2); + VLOAD_16(v6, 0x4c8e, 0xd449, 0xe266, 0xb6d1, 0xf28a, 0x1655, 0x3111, 0x4bde, + 0x8787, 0x2ce4, 0x1083, 0xaa0c, 0x9fdf, 0x3e42, 0x98e7, 0xe33b); + VSET(16, e8, m1); + uint64_t scalar = 5; + VLOAD_8(v4, 0x83, 0xfe, 0xa2, 0xc3, 0xa6, 0x18, 0xd9, 0x4c, 0x6e, 0xeb, 0x43, + 0xb7, 0xec, 0x48, 0xb7, 0xe5); + asm volatile("vwmaccus.vx v6, %[A], v4" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_I16(1, v6, 0x4a1d, 0xd43f, 0xe090, 0xb5a0, 0xf0c8, 0x16cd, 0x304e, + 0x4d5a, 0x89ad, 0x2c7b, 0x11d2, 0xa89f, 0x9f7b, 0x3faa, 0x977a, + 0xe2b4); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xe318cc7a, 0x489815a8, 0x6e6fc053, 0x8d746807, 0xbc3e6244, + 0xcdfeb4fe, 0x22d24149, 0x26962240, 0x5ef85b7e, 0x2f61a9e8, + 0x373dc202, 0x1567a6b5, 0x763c5239, 0x60dd0502, 0xab178102, + 0x753e0a11); + VSET(16, e16, m2); + scalar = 5383; + VLOAD_16(v4, 0xce02, 0x6935, 0xc803, 0x75bc, 0x80b7, 0x19d2, 0x3b7c, 0xc269, + 0xb639, 0x66f1, 0x678b, 0xc83e, 0x5a5c, 0x389e, 0x9e46, 0xfae9); + asm volatile("vwmaccus.vx v8, %[A], v4" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_I32(2, v8, 0xdefd9888, 0x513c4f1b, 0x69d67768, 0x97200c2b, 0xb1c9ea45, + 0xd01da3bc, 0x27b50dad, 0x2187101f, 0x58e9040d, 0x37d63f7f, + 0x3fbefdcf, 0x10d33667, 0x7da856bd, 0x65838754, 0xa31092ec, + 0x74d30370); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xb6a5b1c3c6d69abb, 0x3c6a647eb0d79a41, 0xf0c3eb8821045259, + 0x91d74be946352cae, 0x524c6db6c58f9da6, 0x39185a920f7787e8, + 0x4080fbf0fdcc64ec, 0x9ed1fb83f53270fc, 0xff0661a19269f0c0, + 0x47d26c599193fe0b, 0xd8cc0342dc3104ce, 0xc51f802bc93381cd, + 0xe7d6522aa1c51245, 0x6fa0a9d3f57bc667, 0xd140731478a147a8, + 0x5d716379591922f4); + VSET(16, e32, m4); + scalar = 6474219; + VLOAD_32(v8, 0x09377def, 0x99ee4d30, 0x8b8452d1, 0xc9e17667, 0x2254aa29, + 0xb56ca9f5, 0xa1276371, 0x32ac1413, 0x59ff6af3, 0x6b61bf57, + 0xc0eb37b3, 0x26f06be7, 0x0e9b21b2, 0x22898a93, 0xe3646841, + 0xdd301fdc); + asm volatile("vwmaccus.vx v16, %[A], v8" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_I64(3, v16, 0xb6a940470425dc20, 0x3c430136a5932551, 0xf096f84fcc847134, + 0x91c26988f191bc3b, 0x5259ad33e6940249, 0x38fb9358fb72e8cf, + 0x405c6235eb4a66a7, 0x9ee5895b4431c96d, 0xff291c64abdfe8d1, + 0x47fbdc7722d7f1e8, 0xd8b3ab8cf55eb11f, 0xc52e86eb9b0cedda, + 0xe7dbf512bdc5c2ab, 0x6fadfdb8b3d16658, 0xd13568f3f48a0453, + 0x5d63f4705f821de8); +} + +void TEST_CASE2() { + VSET(16, e16, m2); + VLOAD_16(v6, 0x4c8e, 0xd449, 0xe266, 0xb6d1, 0xf28a, 0x1655, 0x3111, 0x4bde, + 0x8787, 0x2ce4, 0x1083, 0xaa0c, 0x9fdf, 0x3e42, 0x98e7, 0xe33b); + VSET(16, e8, m1); + uint64_t scalar = 5; + VLOAD_8(v4, 0x83, 0xfe, 0xa2, 0xc3, 0xa6, 0x18, 0xd9, 0x4c, 0x6e, 0xeb, 0x43, + 0xb7, 0xec, 0x48, 0xb7, 0xe5); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmaccus.vx v6, %[A], v4, v0.t" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_I16(4, v6, 0x4c8e, 0xd43f, 0xe266, 0xb5a0, 0xf28a, 0x16cd, 0x3111, + 0x4d5a, 0x8787, 0x2c7b, 0x1083, 0xa89f, 0x9fdf, 0x3faa, 0x98e7, + 0xe2b4); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xe318cc7a, 0x489815a8, 0x6e6fc053, 0x8d746807, 0xbc3e6244, + 0xcdfeb4fe, 0x22d24149, 0x26962240, 0x5ef85b7e, 0x2f61a9e8, + 0x373dc202, 0x1567a6b5, 0x763c5239, 0x60dd0502, 0xab178102, + 0x753e0a11); + VSET(16, e16, m2); + scalar = 5383; + VLOAD_16(v4, 0xce02, 0x6935, 0xc803, 0x75bc, 0x80b7, 0x19d2, 0x3b7c, 0xc269, + 0xb639, 0x66f1, 0x678b, 0xc83e, 0x5a5c, 0x389e, 0x9e46, 0xfae9); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmaccus.vx v8, %[A], v4, v0.t" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_I32(5, v8, 0xe318cc7a, 0x513c4f1b, 0x6e6fc053, 0x97200c2b, 0xbc3e6244, + 0xd01da3bc, 0x22d24149, 0x2187101f, 0x5ef85b7e, 0x37d63f7f, + 0x373dc202, 0x10d33667, 0x763c5239, 0x65838754, 0xab178102, + 0x74d30370); + + VSET(16, e64, m8); + VLOAD_64(v16, 0xb6a5b1c3c6d69abb, 0x3c6a647eb0d79a41, 0xf0c3eb8821045259, + 0x91d74be946352cae, 0x524c6db6c58f9da6, 0x39185a920f7787e8, + 0x4080fbf0fdcc64ec, 0x9ed1fb83f53270fc, 0xff0661a19269f0c0, + 0x47d26c599193fe0b, 0xd8cc0342dc3104ce, 0xc51f802bc93381cd, + 0xe7d6522aa1c51245, 0x6fa0a9d3f57bc667, 0xd140731478a147a8, + 0x5d716379591922f4); + VSET(16, e32, m4); + scalar = 6474219; + VLOAD_32(v8, 0x09377def, 0x99ee4d30, 0x8b8452d1, 0xc9e17667, 0x2254aa29, + 0xb56ca9f5, 0xa1276371, 0x32ac1413, 0x59ff6af3, 0x6b61bf57, + 0xc0eb37b3, 0x26f06be7, 0x0e9b21b2, 0x22898a93, 0xe3646841, + 0xdd301fdc); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vwmaccus.vx v16, %[A], v8, v0.t" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_I64(6, v16, 0xb6a5b1c3c6d69abb, 0x3c430136a5932551, 0xf0c3eb8821045259, + 0x91c26988f191bc3b, 0x524c6db6c58f9da6, 0x38fb9358fb72e8cf, + 0x4080fbf0fdcc64ec, 0x9ee5895b4431c96d, 0xff0661a19269f0c0, + 0x47fbdc7722d7f1e8, 0xd8cc0342dc3104ce, 0xc52e86eb9b0cedda, + 0xe7d6522aa1c51245, 0x6fadfdb8b3d16658, 0xd140731478a147a8, + 0x5d63f4705f821de8); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmul.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmul.c new file mode 100644 index 000000000..351d87766 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmul.c @@ -0,0 +1,188 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0xee, 0xfe, 0xbd, 0xc2, 0x02, 0xa4, 0x34, 0x33, 0x2b, 0x35, 0x16, + 0x9b, 0x3b, 0x5f, 0xfc, 0x8b); + VLOAD_8(v4, 0xcb, 0x24, 0xe8, 0xb2, 0xeb, 0x24, 0x80, 0x67, 0x43, 0x11, 0x7c, + 0x94, 0x22, 0x71, 0xca, 0x80); + asm volatile("vwmul.vv v6, v2, v4"); + VSET(16, e16, m2); + VCMP_I16(1, v6, 0x03ba, 0xffb8, 0x0648, 0x12e4, 0xffd6, 0xf310, 0xe600, + 0x1485, 0x0b41, 0x0385, 0x0aa8, 0x2a9c, 0x07d6, 0x29ef, 0x00d8, + 0x3a80); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x8aed, 0x2153, 0x5377, 0xc19c, 0x1051, 0x1b75, 0xbafd, 0xb200, + 0xb209, 0xa9a2, 0xbdc4, 0x1653, 0x5965, 0x145e, 0xb626, 0xd79c); + VLOAD_16(v8, 0x778d, 0xc104, 0x6eac, 0x78e8, 0xacd2, 0x698b, 0xc7d3, 0x1e29, + 0x0a58, 0x58b5, 0x29f9, 0x2fb0, 0x2166, 0x0ac4, 0x44e5, 0xbc40); + asm volatile("vwmul.vv v12, v4, v8"); + VSET(16, e32, m4); + VCMP_I32(2, v12, 0xc953af89, 0xf7cd184c, 0x241535f4, 0xe2889560, 0xfab2ce72, + 0x0b51e587, 0x0f24c987, 0xf6cf8200, 0xfcd98d18, 0xe2129f8a, + 0xf523f7a4, 0x04289610, 0x0ba9a33e, 0x00db43f8, 0xec2007fe, + 0x0ab07700); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xbbc467cb, 0xfbb3efda, 0x652f8490, 0x5e7ea848, 0x21fbc400, + 0xbb409fde, 0x98413836, 0x14652ba4, 0xc3d3c86f, 0xc84d3ae3, + 0x3df53027, 0xbda29a2c, 0xa1d7d949, 0x60a3d06e, 0xa91e405d, + 0x7eea498f); + VLOAD_32(v16, 0x80407791, 0x3e51b6e0, 0xd0be8bc1, 0x683f33bd, 0xeddda6c8, + 0x34e351f2, 0xa6a93ab2, 0xc8893cb8, 0xcb61ddc1, 0x341a4cdc, + 0xd377dc52, 0x2f3f3dbf, 0xa97f8c35, 0x38a44020, 0x28e1cc8b, + 0x52d0e17a); + asm volatile("vwmul.vv v24, v8, v16"); + VSET(16, e64, m8); + VCMP_I64(3, v24, 0x220c9d56193e26fb, 0xfef434f3d9f0dac0, 0xed5267a678ad2090, + 0x267ac8a1a8c09528, 0xfd97bcf001c92000, 0xf1cc14c54f865ddc, + 0x24347d81c3bb518c, 0xfb94cd6ed9b5cde0, 0x0c5e26a06dc0eeaf, + 0xf4a9f71526e7ff14, 0xf538e8d5150bf07e, 0xf3c075503fe182d4, + 0x1fd0bbab88bae81d, 0x1561d6be9f0d8dc0, 0xf220183a08740e7f, + 0x290e99b3f87dbd26); +}; + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0xee, 0xfe, 0xbd, 0xc2, 0x02, 0xa4, 0x34, 0x33, 0x2b, 0x35, 0x16, + 0x9b, 0x3b, 0x5f, 0xfc, 0x8b); + VLOAD_8(v4, 0xcb, 0x24, 0xe8, 0xb2, 0xeb, 0x24, 0x80, 0x67, 0x43, 0x11, 0x7c, + 0x94, 0x22, 0x71, 0xca, 0x80); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + VCLEAR(v7); + asm volatile("vwmul.vv v6, v2, v4, v0.t"); + VSET(16, e16, m2); + VCMP_I16(4, v6, 0, 0xffb8, 0, 0x12e4, 0, 0xf310, 0, 0x1485, 0, 0x0385, 0, + 0x2a9c, 0, 0x29ef, 0, 0x3a80); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x8aed, 0x2153, 0x5377, 0xc19c, 0x1051, 0x1b75, 0xbafd, 0xb200, + 0xb209, 0xa9a2, 0xbdc4, 0x1653, 0x5965, 0x145e, 0xb626, 0xd79c); + VLOAD_16(v8, 0x778d, 0xc104, 0x6eac, 0x78e8, 0xacd2, 0x698b, 0xc7d3, 0x1e29, + 0x0a58, 0x58b5, 0x29f9, 0x2fb0, 0x2166, 0x0ac4, 0x44e5, 0xbc40); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + VCLEAR(v14); + asm volatile("vwmul.vv v12, v4, v8, v0.t"); + VSET(16, e32, m4); + VCMP_I32(5, v12, 0, 0xf7cd184c, 0, 0xe2889560, 0, 0x0b51e587, 0, 0xf6cf8200, + 0, 0xe2129f8a, 0, 0x04289610, 0, 0x00db43f8, 0, 0x0ab07700); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xbbc467cb, 0xfbb3efda, 0x652f8490, 0x5e7ea848, 0x21fbc400, + 0xbb409fde, 0x98413836, 0x14652ba4, 0xc3d3c86f, 0xc84d3ae3, + 0x3df53027, 0xbda29a2c, 0xa1d7d949, 0x60a3d06e, 0xa91e405d, + 0x7eea498f); + VLOAD_32(v16, 0x80407791, 0x3e51b6e0, 0xd0be8bc1, 0x683f33bd, 0xeddda6c8, + 0x34e351f2, 0xa6a93ab2, 0xc8893cb8, 0xcb61ddc1, 0x341a4cdc, + 0xd377dc52, 0x2f3f3dbf, 0xa97f8c35, 0x38a44020, 0x28e1cc8b, + 0x52d0e17a); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + VCLEAR(v28); + asm volatile("vwmul.vv v24, v16, v8, v0.t"); + VSET(16, e64, m8); + VCMP_I64(6, v24, 0, 0xfef434f3d9f0dac0, 0, 0x267ac8a1a8c09528, 0, + 0xf1cc14c54f865ddc, 0, 0xfb94cd6ed9b5cde0, 0, 0xf4a9f71526e7ff14, 0, + 0xf3c075503fe182d4, 0, 0x1561d6be9f0d8dc0, 0, 0x290e99b3f87dbd26); +}; + +void TEST_CASE3(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x86, 0x79, 0xa0, 0x8a, 0x3e, 0xc3, 0x3e, 0x0c, 0x1b, 0xca, 0x80, + 0x41, 0x0e, 0xee, 0x94, 0xdf); + int64_t scalar = 5; + asm volatile("vwmul.vx v6, v2, %[A]" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_I16(7, v6, 0xfd9e, 0x025d, 0xfe20, 0xfdb2, 0x0136, 0xfecf, 0x0136, + 0x003c, 0x0087, 0xfef2, 0xfd80, 0x0145, 0x0046, 0xffa6, 0xfde4, + 0xff5b); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xb0ab, 0xcccb, 0x5fad, 0x9e24, 0x1496, 0xd4a0, 0x2552, 0xcef6, + 0x34b8, 0xef22, 0x69c3, 0xbb05, 0xbe72, 0x315b, 0x3f03, 0xf58b); + scalar = -5383; + asm volatile("vwmul.vx v8, v4, %[A]" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_I32(8, v8, 0x06842453, 0x0434bf73, 0xf8243145, 0x0809b904, 0xfe4f21e6, + 0x03900fa0, 0xfcef40c2, 0x04072946, 0xfbab76f8, 0x0162ac12, + 0xf7501cab, 0x05aa79dd, 0x056270e2, 0xfbf22f83, 0xfad307eb, + 0x00dbe233); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x71c6753d, 0x66646cc2, 0x23065c23, 0xde594cad, 0xa2f87c53, + 0xaebb2bcb, 0xc53688b8, 0xf0c161dd, 0x2d856780, 0xa520cce5, + 0x677c5e13, 0x83d288f4, 0x78b6acdc, 0x5b635dd1, 0x97dc75c8, + 0x1a1aa6d4); + scalar = 6474219; + asm volatile("vwmul.vx v16, v8, %[A]" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_I64(9, v16, 0x002be7b6249483ff, 0x0027833479d82816, 0x000d840f6a940f21, + 0xfff303a4936737cf, 0xffdc19bf4b7d4b31, 0xffe0a38d1ee99659, + 0xffe9508230d7f8e8, 0xfffa1e014df55adf, 0x001190f926b98280, + 0xffdceee1c5a5e337, 0x0027ef3ba84d4671, 0xffd014a6bd334bfc, + 0x002e952469a169f4, 0x0023441ed2e237db, 0xffd7d041966a2698, + 0x000a12cac09b989c); +}; + +void TEST_CASE4(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x86, 0x79, 0xa0, 0x8a, 0x3e, 0xc3, 0x3e, 0x0c, 0x1b, 0xca, 0x80, + 0x41, 0x0e, 0xee, 0x94, 0xdf); + int64_t scalar = 5; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + VCLEAR(v7); + asm volatile("vwmul.vx v6, v2, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_I16(10, v6, 0, 0x025d, 0, 0xfdb2, 0, 0xfecf, 0, 0x003c, 0, 0xfef2, 0, + 0x0145, 0, 0xffa6, 0, 0xff5b); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xb0ab, 0xcccb, 0x5fad, 0x9e24, 0x1496, 0xd4a0, 0x2552, 0xcef6, + 0x34b8, 0xef22, 0x69c3, 0xbb05, 0xbe72, 0x315b, 0x3f03, 0xf58b); + scalar = -5383; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vwmul.vx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_I32(11, v8, 0, 0x0434bf73, 0, 0x0809b904, 0, 0x03900fa0, 0, 0x04072946, + 0, 0x0162ac12, 0, 0x05aa79dd, 0, 0xfbf22f83, 0, 0x00dbe233); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x71c6753d, 0x66646cc2, 0x23065c23, 0xde594cad, 0xa2f87c53, + 0xaebb2bcb, 0xc53688b8, 0xf0c161dd, 0x2d856780, 0xa520cce5, + 0x677c5e13, 0x83d288f4, 0x78b6acdc, 0x5b635dd1, 0x97dc75c8, + 0x1a1aa6d4); + scalar = 6474219; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vwmul.vx v16, v8, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_I64(12, v16, 0, 0x0027833479d82816, 0, 0xfff303a4936737cf, 0, + 0xffe0a38d1ee99659, 0, 0xfffa1e014df55adf, 0, 0xffdceee1c5a5e337, 0, + 0xffd014a6bd334bfc, 0, 0x0023441ed2e237db, 0, 0x000a12cac09b989c); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmulsu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmulsu.c new file mode 100644 index 000000000..3831748d9 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmulsu.c @@ -0,0 +1,188 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x37, 0x4e, 0x9a, 0x08, 0x12, 0xfd, 0xa4, 0x21, 0x44, 0x58, 0x5a, + 0xa9, 0x1d, 0x5e, 0xd4, 0x8e); + VLOAD_8(v4, 0x60, 0x5b, 0x0e, 0x78, 0x67, 0xf4, 0xd3, 0x0f, 0x75, 0x34, 0xc3, + 0xb1, 0x62, 0x42, 0xa9, 0x75); + asm volatile("vwmulsu.vv v6, v2, v4"); + VSET(16, e16, m2); + VCMP_I16(1, v6, 0x14a0, 0x1bba, 0xfa6c, 0x03c0, 0x073e, 0xfd24, 0xb42c, + 0x01ef, 0x1f14, 0x11e0, 0x448e, 0xc3d9, 0x0b1a, 0x183c, 0xe2f4, + 0xcbe6); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xba33, 0x3a22, 0x9f52, 0x0c6a, 0xcb67, 0x790c, 0x1a85, 0x958e, + 0xe967, 0x52b6, 0xa453, 0xe306, 0x3c91, 0x0309, 0xcbad, 0x9b78); + VLOAD_16(v8, 0x84ef, 0xf522, 0x6224, 0x6e02, 0xeedb, 0x5a1f, 0x98d7, 0xa498, + 0x7a66, 0xdc69, 0xd88b, 0xa611, 0x5a08, 0x6836, 0x9130, 0x85be); + asm volatile("vwmulsu.vv v12, v4, v8"); + VSET(16, e32, m4); + VCMP_I32(2, v12, 0xdbc1219d, 0x37aa4284, 0xdaefcb88, 0x0555a4d4, 0xceecc31d, + 0x2a9ce074, 0x0fd53db3, 0xbb8fc450, 0xf532150a, 0x473654a6, + 0xb2744111, 0xed33f766, 0x154cde88, 0x013c4be6, 0xe2532d70, + 0xcb7abb10); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x8bc45f0a, 0x8a60bc63, 0xf3fcddb9, 0xa810b1c8, 0x8cb59934, + 0x8d3334b0, 0x4387cb58, 0xc59d7a46, 0x939dd006, 0xfbd1dfc1, + 0x75307321, 0xb46b5a27, 0xfd2fbdda, 0xec141137, 0x3a1bc8a7, + 0xf0b1eb21); + VLOAD_32(v16, 0xe193ae9a, 0x57eccb97, 0x41d9eeff, 0xe8d58ddd, 0x2057ccc2, + 0x3122b84c, 0x003bb317, 0xcdfc6918, 0xb5883636, 0x52788c1f, + 0xab90d0a1, 0x6d890387, 0xf9bf35e7, 0xf88259a4, 0x79d9e6f0, + 0x8203a804); + asm volatile("vwmulsu.vv v24, v8, v16"); + VSET(16, e64, m8); + VCMP_I64(3, v24, 0x99948a03e977f804, 0xd79a13a8a4b59f65, 0xfce8fa6d09d3d947, + 0xb005c64673bca1a8, 0xf16f28a6e85f8968, 0xe9f73b3d68722440, + 0x000fbf89e1a9cce8, 0xd10589f688d22c90, 0xb324e8f72e5b2544, + 0xfea741c2f93fa45f, 0x4e89a7a8de9337c1, 0xdfa941cb4089ff91, + 0xfd4155b0080871b6, 0xeca94be43cc5263c, 0x1ba897cf74e12690, + 0xf83a217108785484); +}; + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x37, 0x4e, 0x9a, 0x08, 0x12, 0xfd, 0xa4, 0x21, 0x44, 0x58, 0x5a, + 0xa9, 0x1d, 0x5e, 0xd4, 0x8e); + VLOAD_8(v4, 0x60, 0x5b, 0x0e, 0x78, 0x67, 0xf4, 0xd3, 0x0f, 0x75, 0x34, 0xc3, + 0xb1, 0x62, 0x42, 0xa9, 0x75); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + VCLEAR(v7); + asm volatile("vwmulsu.vv v6, v2, v4, v0.t"); + VSET(16, e16, m2); + VCMP_I16(4, v6, 0, 0x1bba, 0, 0x03c0, 0, 0xfd24, 0, 0x01ef, 0, 0x11e0, 0, + 0xc3d9, 0, 0x183c, 0, 0xcbe6); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xba33, 0x3a22, 0x9f52, 0x0c6a, 0xcb67, 0x790c, 0x1a85, 0x958e, + 0xe967, 0x52b6, 0xa453, 0xe306, 0x3c91, 0x0309, 0xcbad, 0x9b78); + VLOAD_16(v8, 0x84ef, 0xf522, 0x6224, 0x6e02, 0xeedb, 0x5a1f, 0x98d7, 0xa498, + 0x7a66, 0xdc69, 0xd88b, 0xa611, 0x5a08, 0x6836, 0x9130, 0x85be); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + VCLEAR(v14); + asm volatile("vwmulsu.vv v12, v4, v8, v0.t"); + VSET(16, e32, m4); + VCMP_I32(5, v12, 0, 0x37aa4284, 0, 0x0555a4d4, 0, 0x2a9ce074, 0, 0xbb8fc450, + 0, 0x473654a6, 0, 0xed33f766, 0, 0x013c4be6, 0, 0xcb7abb10); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x8bc45f0a, 0x8a60bc63, 0xf3fcddb9, 0xa810b1c8, 0x8cb59934, + 0x8d3334b0, 0x4387cb58, 0xc59d7a46, 0x939dd006, 0xfbd1dfc1, + 0x75307321, 0xb46b5a27, 0xfd2fbdda, 0xec141137, 0x3a1bc8a7, + 0xf0b1eb21); + VLOAD_32(v16, 0xe193ae9a, 0x57eccb97, 0x41d9eeff, 0xe8d58ddd, 0x2057ccc2, + 0x3122b84c, 0x003bb317, 0xcdfc6918, 0xb5883636, 0x52788c1f, + 0xab90d0a1, 0x6d890387, 0xf9bf35e7, 0xf88259a4, 0x79d9e6f0, + 0x8203a804); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + VCLEAR(v28); + asm volatile("vwmulsu.vv v24, v8, v16, v0.t"); + VSET(16, e64, m8); + VCMP_I64(6, v24, 0, 0xd79a13a8a4b59f65, 0, 0xb005c64673bca1a8, 0, + 0xe9f73b3d68722440, 0, 0xd10589f688d22c90, 0, 0xfea741c2f93fa45f, 0, + 0xdfa941cb4089ff91, 0, 0xeca94be43cc5263c, 0, 0xf83a217108785484); +}; + +void TEST_CASE3(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x16, 0x39, 0x0d, 0xcb, 0x02, 0x2b, 0xcd, 0x30, 0xec, 0x03, 0x18, + 0x78, 0xec, 0xba, 0xf8, 0x49); + uint64_t scalar = 5; + asm volatile("vwmulsu.vx v6, v2, %[A]" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_I16(7, v6, 0x006e, 0x011d, 0x0041, 0xfef7, 0x000a, 0x00d7, 0xff01, + 0x00f0, 0xff9c, 0x000f, 0x0078, 0x0258, 0xff9c, 0xfea2, 0xffd8, + 0x016d); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x602f, 0x1b08, 0xfdd6, 0x7e53, 0x59f7, 0x70f1, 0x8a33, 0x5d93, + 0x02a3, 0x9f70, 0x3919, 0x8f2b, 0xc9d3, 0x1b65, 0x15bd, 0xf8be); + scalar = 5383; + asm volatile("vwmulsu.vx v8, v4, %[A]" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_I32(8, v8, 0x07e67c49, 0x02386538, 0xffd27eda, 0x0a604345, 0x0763b8c1, + 0x0946db97, 0xf652f665, 0x07af9e05, 0x00377175, 0xf8118c10, + 0x04b09caf, 0xf6bb712d, 0xfb8cd3c5, 0x024008c3, 0x01c9192b, + 0xff676332); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x77036e08, 0xeb7bd8dc, 0x9c0d7f6a, 0x19d320f6, 0xb0c0d792, + 0x3f02203c, 0xf72a9ea9, 0x392f2986, 0x85b78fe8, 0xdc6a281b, + 0x146ffa52, 0x61f96c3c, 0x876cda10, 0x24d22032, 0xb6ffba3d, + 0xbbd29543); + scalar = 6474219; + asm volatile("vwmulsu.vx v16, v8, %[A]" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_I64(9, v16, 0x002ded2f109a4958, 0xfff8153a6c2ccdf4, 0xffd96e55a002304e, + 0x0009f73117dc67d2, 0xffe16b51302b8506, 0x00185082d93ab314, + 0xfffc975917645623, 0x001611286b315402, 0xffd0cfd1f64e41f8, + 0xfff24492094603c9, 0x0007e2fc81b92b46, 0x0025cec234e97714, + 0xffd178914242bcb0, 0x000e357b1b4ecfe6, 0xffe3d4511153daff, + 0xffe5b0d6e5269f81); +}; + +void TEST_CASE4(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x16, 0x39, 0x0d, 0xcb, 0x02, 0x2b, 0xcd, 0x30, 0xec, 0x03, 0x18, + 0x78, 0xec, 0xba, 0xf8, 0x49); + uint64_t scalar = 5; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + VCLEAR(v7); + asm volatile("vwmulsu.vx v6, v2, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_I16(10, v6, 0, 0x011d, 0, 0xfef7, 0, 0x00d7, 0, 0x00f0, 0, 0x000f, 0, + 0x0258, 0, 0xfea2, 0, 0x016d); + + VSET(16, e16, m2); + VLOAD_16(v4, 0x602f, 0x1b08, 0xfdd6, 0x7e53, 0x59f7, 0x70f1, 0x8a33, 0x5d93, + 0x02a3, 0x9f70, 0x3919, 0x8f2b, 0xc9d3, 0x1b65, 0x15bd, 0xf8be); + scalar = 5383; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vwmulsu.vx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_I32(11, v8, 0, 0x02386538, 0, 0x0a604345, 0, 0x0946db97, 0, 0x07af9e05, + 0, 0xf8118c10, 0, 0xf6bb712d, 0, 0x024008c3, 0, 0xff676332); + + VSET(16, e32, m4); + VLOAD_32(v8, 0x77036e08, 0xeb7bd8dc, 0x9c0d7f6a, 0x19d320f6, 0xb0c0d792, + 0x3f02203c, 0xf72a9ea9, 0x392f2986, 0x85b78fe8, 0xdc6a281b, + 0x146ffa52, 0x61f96c3c, 0x876cda10, 0x24d22032, 0xb6ffba3d, + 0xbbd29543); + scalar = 6474219; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vwmulsu.vx v16, v8, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_I64(12, v16, 0, 0xfff8153a6c2ccdf4, 0, 0x0009f73117dc67d2, 0, + 0x00185082d93ab314, 0, 0x001611286b315402, 0, 0xfff24492094603c9, 0, + 0x0025cec234e97714, 0, 0x000e357b1b4ecfe6, 0, 0xffe5b0d6e5269f81); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmulu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmulu.c new file mode 100644 index 000000000..67ea9375f --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwmulu.c @@ -0,0 +1,188 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x74, 0xfb, 0xf4, 0xe9, 0xe5, 0x4e, 0x02, 0x27, 0xe9, 0x83, 0xfe, + 0x03, 0xb2, 0xb9, 0x9a, 0x71); + VLOAD_8(v4, 0x67, 0xa9, 0x07, 0x0f, 0xe3, 0x0d, 0xce, 0x81, 0xa2, 0xa5, 0x59, + 0x18, 0x0d, 0xac, 0x80, 0x31); + asm volatile("vwmulu.vv v6, v2, v4"); + VSET(16, e16, m2); + VCMP_U16(1, v6, 0x2eac, 0xa5b3, 0x06ac, 0x0da7, 0xcb0f, 0x03f6, 0x019c, + 0x13a7, 0x9372, 0x546f, 0x584e, 0x0048, 0x090a, 0x7c4c, 0x4d00, + 0x15a1); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xcf44, 0x249f, 0x3b1d, 0xea59, 0x0c47, 0xd24b, 0xce3e, 0xdb61, + 0x3506, 0xcee2, 0x3c7e, 0xc169, 0x05fd, 0x7fe6, 0xf7db, 0xb7cd); + VLOAD_16(v8, 0xaa0b, 0x2176, 0x34bc, 0x4aa6, 0x221e, 0x9f98, 0x63f5, 0x8da7, + 0x001d, 0x18d7, 0x1dbb, 0x5f2d, 0x0783, 0xd756, 0xa08d, 0x9c49); + asm volatile("vwmulu.vv v12, v4, v8"); + VSET(16, e32, m4); + VCMP_U32(2, v12, 0x89ac0fec, 0x04c9604a, 0x0c2d4d4c, 0x4455afb6, 0x01a2de52, + 0x83197188, 0x50875b56, 0x79638947, 0x000601ae, 0x1412efce, + 0x0706760a, 0x47e7f675, 0x002cfb77, 0x6b952144, 0x9b71639f, + 0x70355575); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xd6f59ab7, 0x3b760112, 0x185928a9, 0x344f2f98, 0x07084e45, + 0x0af492c5, 0x5de6f51a, 0x76783522, 0x36835490, 0x043d016f, + 0xf583b765, 0xd8796652, 0x1bd09e8f, 0xeecf0026, 0xdb725a7d, + 0x3a4c3ab3); + VLOAD_32(v16, 0x19f9f18b, 0x801fde9f, 0xaf759e4c, 0x9206cfd4, 0x2dc70e82, + 0xb57cb666, 0xc4ab14ac, 0xbf231e21, 0xdc6caaf4, 0x5bbc4031, + 0x2021a0db, 0x4e68ad25, 0xb090da86, 0xe32bd2c2, 0xf45e06d0, + 0xa5320284); + asm volatile("vwmulu.vv v24, v8, v16"); + VSET(16, e64, m8); + VCMP_U64(3, v24, 0x15cfdbd14900485d, 0x1dc267886466462e, 0x10b022995dcd602c, + 0x1dd68d77269f51e0, 0x0141ed9cff22850a, 0x07c4420a9e36887e, + 0x48239482c2b0b578, 0x5873f004fd5ed562, 0x2ef0007a98143940, + 0x0184cd5f928d063f, 0x1ed0b72b9e520367, 0x424d80e7e10133da, + 0x132f2a19b6a8c4da, 0xd3ea6e817f5f48cc, 0xd179981358ee7390, + 0x259e854b7db9aa4c); +}; + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0x74, 0xfb, 0xf4, 0xe9, 0xe5, 0x4e, 0x02, 0x27, 0xe9, 0x83, 0xfe, + 0x03, 0xb2, 0xb9, 0x9a, 0x71); + VLOAD_8(v4, 0x67, 0xa9, 0x07, 0x0f, 0xe3, 0x0d, 0xce, 0x81, 0xa2, 0xa5, 0x59, + 0x18, 0x0d, 0xac, 0x80, 0x31); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + VCLEAR(v7); + asm volatile("vwmulu.vv v6, v2, v4, v0.t"); + VSET(16, e16, m2); + VCMP_U16(4, v6, 0, 0xa5b3, 0, 0x0da7, 0, 0x03f6, 0, 0x13a7, 0, 0x546f, 0, + 0x0048, 0, 0x7c4c, 0, 0x15a1); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xcf44, 0x249f, 0x3b1d, 0xea59, 0x0c47, 0xd24b, 0xce3e, 0xdb61, + 0x3506, 0xcee2, 0x3c7e, 0xc169, 0x05fd, 0x7fe6, 0xf7db, 0xb7cd); + VLOAD_16(v8, 0xaa0b, 0x2176, 0x34bc, 0x4aa6, 0x221e, 0x9f98, 0x63f5, 0x8da7, + 0x001d, 0x18d7, 0x1dbb, 0x5f2d, 0x0783, 0xd756, 0xa08d, 0x9c49); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + VCLEAR(v14); + asm volatile("vwmulu.vv v12, v4, v8, v0.t"); + VSET(16, e32, m4); + VCMP_U32(5, v12, 0, 0x04c9604a, 0, 0x4455afb6, 0, 0x83197188, 0, 0x79638947, + 0, 0x1412efce, 0, 0x47e7f675, 0, 0x6b952144, 0, 0x70355575); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xd6f59ab7, 0x3b760112, 0x185928a9, 0x344f2f98, 0x07084e45, + 0x0af492c5, 0x5de6f51a, 0x76783522, 0x36835490, 0x043d016f, + 0xf583b765, 0xd8796652, 0x1bd09e8f, 0xeecf0026, 0xdb725a7d, + 0x3a4c3ab3); + VLOAD_32(v16, 0x19f9f18b, 0x801fde9f, 0xaf759e4c, 0x9206cfd4, 0x2dc70e82, + 0xb57cb666, 0xc4ab14ac, 0xbf231e21, 0xdc6caaf4, 0x5bbc4031, + 0x2021a0db, 0x4e68ad25, 0xb090da86, 0xe32bd2c2, 0xf45e06d0, + 0xa5320284); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + VCLEAR(v28); + asm volatile("vwmulu.vv v24, v8, v16, v0.t"); + VSET(16, e64, m8); + VCMP_U64(6, v24, 0, 0x1dc267886466462e, 0, 0x1dd68d77269f51e0, 0, + 0x07c4420a9e36887e, 0, 0x5873f004fd5ed562, 0, 0x0184cd5f928d063f, 0, + 0x424d80e7e10133da, 0, 0xd3ea6e817f5f48cc, 0, 0x259e854b7db9aa4c); +}; + +void TEST_CASE3(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0xf8, 0x11, 0x12, 0xf1, 0x63, 0x21, 0x88, 0x3b, 0x01, 0xf5, 0x6d, + 0xf5, 0xb1, 0x54, 0xcd, 0xb0); + uint64_t scalar = 5; + asm volatile("vwmulu.vx v6, v2, %[A]" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_U16(7, v6, 0x04d8, 0x0055, 0x005a, 0x04b5, 0x01ef, 0x00a5, 0x02a8, + 0x0127, 0x0005, 0x04c9, 0x0221, 0x04c9, 0x0375, 0x01a4, 0x0401, + 0x0370); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xf8e5, 0x6a23, 0xb52f, 0x8838, 0xb6d4, 0x5279, 0xf80e, 0xa450, + 0x13ec, 0x916f, 0x8edd, 0x0162, 0x9350, 0x9f74, 0xe1e7, 0x2719); + scalar = 5383; + asm volatile("vwmulu.vx v8, v4, %[A]" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(8, v8, 0x14719743, 0x08b7c5f5, 0x0ee1cf49, 0x0b305188, 0x0f0463cc, + 0x06c62e4f, 0x145fee62, 0x0d7f0e30, 0x01a2e774, 0x0bf21509, + 0x0bbc090b, 0x001d13ae, 0x0c199730, 0x0d18e02c, 0x128e2051, + 0x03361eaf); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xc41bf344, 0xad5aef4c, 0xf2b11789, 0xeb0d7526, 0xd6c67427, + 0x73724130, 0x440f954a, 0x0661455f, 0x450070ca, 0xc258c90c, + 0xf095d838, 0x358b0916, 0x6e1f1918, 0x4ebf2685, 0x3805d683, + 0x73715886); + scalar = 6474219; + asm volatile("vwmulu.vx v16, v8, %[A]" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_U64(9, v16, 0x004bad6117b0b36c, 0x0042e58b237456c4, 0x005da736d67d2bc3, + 0x005ab48c2ee15fe2, 0x0052e15f704d3ecd, 0x002c8ccba3708710, + 0x001a43a5dcd924ee, 0x00027644c9204535, 0x001aa097dd4a236e, + 0x004aff4713f2fa04, 0x005cd71f45c17368, 0x0014a974cb2f9d32, + 0x002a7ec31c6fe108, 0x001e63491da0c917, 0x00159e6c20eec541, + 0x002c8c71dad97902); +}; + +void TEST_CASE4(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 0xf8, 0x11, 0x12, 0xf1, 0x63, 0x21, 0x88, 0x3b, 0x01, 0xf5, 0x6d, + 0xf5, 0xb1, 0x54, 0xcd, 0xb0); + uint64_t scalar = 5; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + VCLEAR(v7); + asm volatile("vwmulu.vx v6, v2, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_U16(10, v6, 0, 0x0055, 0, 0x04b5, 0, 0x00a5, 0, 0x0127, 0, 0x04c9, 0, + 0x04c9, 0, 0x01a4, 0, 0x0370); + + VSET(16, e16, m2); + VLOAD_16(v4, 0xf8e5, 0x6a23, 0xb52f, 0x8838, 0xb6d4, 0x5279, 0xf80e, 0xa450, + 0x13ec, 0x916f, 0x8edd, 0x0162, 0x9350, 0x9f74, 0xe1e7, 0x2719); + scalar = 5383; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vwmulu.vx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(11, v8, 0, 0x08b7c5f5, 0, 0x0b305188, 0, 0x06c62e4f, 0, 0x0d7f0e30, + 0, 0x0bf21509, 0, 0x001d13ae, 0, 0x0d18e02c, 0, 0x03361eaf); + + VSET(16, e32, m4); + VLOAD_32(v8, 0xc41bf344, 0xad5aef4c, 0xf2b11789, 0xeb0d7526, 0xd6c67427, + 0x73724130, 0x440f954a, 0x0661455f, 0x450070ca, 0xc258c90c, + 0xf095d838, 0x358b0916, 0x6e1f1918, 0x4ebf2685, 0x3805d683, + 0x73715886); + scalar = 6474219; + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vwmulu.vx v16, v8, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_U64(12, v16, 0, 0x0042e58b237456c4, 0, 0x005ab48c2ee15fe2, 0, + 0x002c8ccba3708710, 0, 0x00027644c9204535, 0, 0x004aff4713f2fa04, 0, + 0x0014a974cb2f9d32, 0, 0x001e63491da0c917, 0, 0x002c8c71dad97902); +}; + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwredsum.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwredsum.c new file mode 100644 index 000000000..0c57215a1 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwredsum.c @@ -0,0 +1,153 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "vector_macros.h" + +// Naive test +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v6, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v2, 255); + asm volatile("vwredsum.vs v4, v6, v2"); + VCMP_U16(1, v4, 327); + + VSET(16, e16, m2); + VLOAD_16(v12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v4, 1); + asm volatile("vwredsum.vs v8, v12, v4"); + VCMP_U32(2, v8, 73); + + VSET(16, e32, m4); + VLOAD_32(v24, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v8, 1); + asm volatile("vwredsum.vs v16, v24, v8"); + VCMP_U64(3, v16, 73); +} + +// Masked naive test +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_8(v6, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v2, 255); + VLOAD_16(v4, 1); + asm volatile("vwredsum.vs v4, v6, v2, v0.t"); + VCMP_U16(4, v4, 291); + + VSET(16, e16, m2); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_16(v12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v4, 1); + VLOAD_32(v8, 1); + asm volatile("vwredsum.vs v8, v12, v4, v0.t"); + VCMP_U32(5, v8, 37); + + VSET(16, e32, m4); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_32(v24, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v8, 1); + VLOAD_64(v16, 1); + asm volatile("vwredsum.vs v16, v24, v8, v0.t"); + VCMP_U64(6, v16, 37); +} + +// Are we respecting the undisturbed tail policy? +void TEST_CASE3(void) { + VSET(16, e8, m1); + VLOAD_8(v6, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwredsum.vs v4, v6, v2"); + VCMP_U16(7, v4, 73, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(16, e16, m2); + VLOAD_16(v12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwredsum.vs v8, v12, v4"); + VCMP_U32(8, v8, 73, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(16, e32, m4); + VLOAD_32(v24, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwredsum.vs v16, v24, v8"); + VCMP_U64(9, v16, 73, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); +} + +// Odd number of elements, undisturbed policy +void TEST_CASE4(void) { + VSET(15, e8, m1); + VLOAD_8(v6, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwredsum.vs v4, v6, v2"); + VCMP_U16(10, v4, 65, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(1, e16, m2); + VLOAD_16(v12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwredsum.vs v8, v12, v4"); + VCMP_U32(11, v8, 2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(3, e32, m4); + VLOAD_32(v24, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwredsum.vs v16, v24, v8"); + VCMP_U64(12, v16, 7, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); +} + +// Odd number of elements, undisturbed policy, and mask +void TEST_CASE5(void) { + VSET(15, e8, m1); + VLOAD_8(v0, 0x00, 0x40); + VLOAD_8(v6, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v2, 100, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwredsum.vs v4, v6, v2, v0.t"); + VCMP_U16(13, v4, 107, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(1, e16, m2); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_16(v12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwredsum.vs v8, v12, v4, v0.t"); + VCMP_U32(14, v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + // VSET(3, e32, m4); + // VLOAD_8(v0, 0xaa, 0x55); + // VLOAD_32(v24, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + // VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + // VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + // asm volatile("vwredsum.vs v16, v24, v8, v0.t"); + // VCMP_U64(15, v16, 3, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); +} + +// Test difference from vwredsumu +void TEST_CASE6(void) { + VSET(16, e8, m1); + VLOAD_8(v6, 1, 2, 3, 4, 5, 6, 7, 8, 255, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v2, 255); + asm volatile("vwredsum.vs v4, v6, v2"); + VCMP_U16(16, v4, 325); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwredsumu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwredsumu.c new file mode 100644 index 000000000..421acdb6f --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwredsumu.c @@ -0,0 +1,153 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matteo Perotti + +#include "vector_macros.h" + +// Naive test +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v6, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v2, 255); + asm volatile("vwredsumu.vs v4, v6, v2"); + VCMP_U16(1, v4, 327); + + VSET(16, e16, m2); + VLOAD_16(v12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v4, 1); + asm volatile("vwredsumu.vs v8, v12, v4"); + VCMP_U32(2, v8, 73); + + VSET(16, e32, m4); + VLOAD_32(v24, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v8, 1); + asm volatile("vwredsumu.vs v16, v24, v8"); + VCMP_U64(3, v16, 73); +} + +// Masked naive test +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_8(v6, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v2, 255); + VLOAD_16(v4, 1); + asm volatile("vwredsumu.vs v4, v6, v2, v0.t"); + VCMP_U16(4, v4, 291); + + VSET(16, e16, m2); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_16(v12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v4, 1); + VLOAD_32(v8, 1); + asm volatile("vwredsumu.vs v8, v12, v4, v0.t"); + VCMP_U32(5, v8, 37); + + VSET(16, e32, m4); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_32(v24, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v8, 1); + VLOAD_64(v16, 1); + asm volatile("vwredsumu.vs v16, v24, v8, v0.t"); + VCMP_U64(6, v16, 37); +} + +// Are we respecting the undisturbed tail policy? +void TEST_CASE3(void) { + VSET(16, e8, m1); + VLOAD_8(v6, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwredsumu.vs v4, v6, v2"); + VCMP_U16(7, v4, 73, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(16, e16, m2); + VLOAD_16(v12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwredsumu.vs v8, v12, v4"); + VCMP_U32(8, v8, 73, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(16, e32, m4); + VLOAD_32(v24, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwredsumu.vs v16, v24, v8"); + VCMP_U64(9, v16, 73, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); +} + +// Odd number of elements, undisturbed policy +void TEST_CASE4(void) { + VSET(15, e8, m1); + VLOAD_8(v6, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwredsumu.vs v4, v6, v2"); + VCMP_U16(10, v4, 65, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(1, e16, m2); + VLOAD_16(v12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwredsumu.vs v8, v12, v4"); + VCMP_U32(11, v8, 2, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(3, e32, m4); + VLOAD_32(v24, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwredsumu.vs v16, v24, v8"); + VCMP_U64(12, v16, 7, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); +} + +// Odd number of elements, undisturbed policy, and mask +void TEST_CASE5(void) { + VSET(15, e8, m1); + VLOAD_8(v0, 0x00, 0x40); + VLOAD_8(v6, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v2, 100, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwredsumu.vs v4, v6, v2, v0.t"); + VCMP_U16(13, v4, 107, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(1, e16, m2); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_16(v12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwredsumu.vs v8, v12, v4, v0.t"); + VCMP_U32(14, v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + + VSET(3, e32, m4); + VLOAD_8(v0, 0xaa, 0x55); + VLOAD_32(v24, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + asm volatile("vwredsumu.vs v16, v24, v8, v0.t"); + VCMP_U64(15, v16, 3, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); +} + +// Test difference from vwredsumu +void TEST_CASE6(void) { + VSET(16, e8, m1); + VLOAD_8(v6, 1, 2, 3, 4, 5, 6, 7, 8, 255, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v2, 255); + asm volatile("vwredsumu.vs v4, v6, v2"); + VCMP_U16(16, v4, 581); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwsub.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwsub.c new file mode 100644 index 000000000..200d28a4b --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwsub.c @@ -0,0 +1,246 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v4, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + asm volatile("vwsub.vv v6, v2, v4"); + VSET(16, e16, m2); + VCMP_U16(1, v6, -7, -5, -3, -1, 1, 3, 5, 7, -7, -5, -3, -1, 1, 3, 5, 7); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v8, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + asm volatile("vwsub.vv v12, v4, v8"); + VSET(16, e32, m4); + VCMP_U32(2, v12, -7, -5, -3, -1, 1, 3, 5, 7, -7, -5, -3, -1, 1, 3, 5, 7); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + asm volatile("vwsub.vv v24, v8, v16"); + VSET(16, e64, m8); + VCMP_U64(3, v24, -7, -5, -3, -1, 1, 3, 5, 7, -7, -5, -3, -1, 1, 3, 5, 7); +} + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v4, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + VCLEAR(v7); + asm volatile("vwsub.vv v6, v2, v4, v0.t"); + VSET(16, e16, m2); + VCMP_U16(4, v6, 0, -5, 0, -1, 0, 3, 0, 7, 0, -5, 0, -1, 0, 3, 0, 7); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v8, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + VCLEAR(v14); + asm volatile("vwsub.vv v12, v4, v8, v0.t"); + VSET(16, e32, m4); + VCMP_U32(5, v12, 0, -5, 0, -1, 0, 3, 0, 7, 0, -5, 0, -1, 0, 3, 0, 7); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + VCLEAR(v28); + asm volatile("vwsub.vv v24, v8, v16, v0.t"); + VSET(16, e64, m8); + VCMP_U64(6, v24, 0, -5, 0, -1, 0, 3, 0, 7, 0, -5, 0, -1, 0, 3, 0, 7); +} + +void TEST_CASE3(void) { + const uint32_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_8(v2, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwsub.vx v4, v2, %[A]" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_U16(7, v4, -4, -7, -2, -9, 0, -11, 2, -13, 4, -15, 6, -17, 8, -19, 10, + -21); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwsub.vx v8, v4, %[A]" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(8, v8, -4, -7, -2, -9, 0, -11, 2, -13, 4, -15, 6, -17, 8, -19, 10, + -21); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwsub.vx v16, v8, %[A]" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_U64(9, v16, -4, -7, -2, -9, 0, -11, 2, -13, 4, -15, 6, -17, 8, -19, 10, + -21); +} + +void TEST_CASE4(void) { + const uint32_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + VCLEAR(v5); + asm volatile("vwsub.vx v4, v2, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_U16(10, v4, 0, -3, 0, -1, 0, 1, 0, 3, 0, -3, 0, -1, 0, 1, 0, 3); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vwsub.vx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(11, v8, 0, -3, 0, -1, 0, 1, 0, 3, 0, -3, 0, -1, 0, 1, 0, 3); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vwsub.vx v16, v8, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_U64(12, v16, 0, -3, 0, -1, 0, 1, 0, 3, 0, -3, 0, -1, 0, 1, 0, 3); +} + +void TEST_CASE5(void) { + VSET(16, e8, m1); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v4, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + asm volatile("vwsub.wv v6, v2, v4"); + VSET(16, e16, m2); + VCMP_U16(13, v6, -7, -5, -3, -1, 1, 3, 5, 7, -7, -5, -3, -1, 1, 3, 5, 7); + + VSET(16, e16, m2); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v8, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + asm volatile("vwsub.wv v12, v4, v8"); + VSET(16, e32, m4); + VCMP_U32(14, v12, -7, -5, -3, -1, 1, 3, 5, 7, -7, -5, -3, -1, 1, 3, 5, 7); + + VSET(16, e32, m4); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + asm volatile("vwsub.wv v24, v8, v16"); + VSET(16, e64, m8); + VCMP_U64(15, v24, -7, -5, -3, -1, 1, 3, 5, 7, -7, -5, -3, -1, 1, 3, 5, 7); +} + +void TEST_CASE6(void) { + VSET(16, e8, m1); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v4, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + VCLEAR(v7); + asm volatile("vwsub.wv v6, v2, v4, v0.t"); + VSET(16, e16, m2); + VCMP_U16(16, v6, 0, -5, 0, -1, 0, 3, 0, 7, 0, -5, 0, -1, 0, 3, 0, 7); + + VSET(16, e16, m2); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v8, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + VCLEAR(v14); + asm volatile("vwsub.wv v12, v4, v8, v0.t"); + VSET(16, e32, m4); + VCMP_U32(17, v12, 0, -5, 0, -1, 0, 3, 0, 7, 0, -5, 0, -1, 0, 3, 0, 7); + + VSET(16, e32, m4); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + VCLEAR(v28); + asm volatile("vwsub.wv v24, v8, v16, v0.t"); + VSET(16, e64, m8); + VCMP_U64(18, v24, 0, -5, 0, -1, 0, 3, 0, 7, 0, -5, 0, -1, 0, 3, 0, 7); +} + +void TEST_CASE7(void) { + const uint32_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_16(v2, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwsub.wx v4, v2, %[A]" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_U16(19, v4, -4, -7, -2, -9, 0, -11, 2, -13, 4, -15, 6, -17, 8, -19, 10, + -21); + + VSET(16, e16, m2); + VLOAD_32(v4, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwsub.wx v8, v4, %[A]" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(20, v8, -4, -7, -2, -9, 0, -11, 2, -13, 4, -15, 6, -17, 8, -19, 10, + -21); + + VSET(16, e32, m4); + VLOAD_64(v8, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwsub.wx v16, v8, %[A]" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_U64(21, v16, -4, -7, -2, -9, 0, -11, 2, -13, 4, -15, 6, -17, 8, -19, 10, + -21); +} + +void TEST_CASE8(void) { + const uint32_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + VCLEAR(v5); + asm volatile("vwsub.wx v4, v2, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_U16(22, v4, 0, -3, 0, -1, 0, 1, 0, 3, 0, -3, 0, -1, 0, 1, 0, 3); + + VSET(16, e16, m2); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vwsub.wx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(23, v8, 0, -3, 0, -1, 0, 1, 0, 3, 0, -3, 0, -1, 0, 1, 0, 3); + + VSET(16, e32, m4); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vwsub.wx v16, v8, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_U64(24, v16, 0, -3, 0, -1, 0, 1, 0, 3, 0, -3, 0, -1, 0, 1, 0, 3); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + TEST_CASE8(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwsubu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwsubu.c new file mode 100644 index 000000000..12e6dc22c --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vwsubu.c @@ -0,0 +1,246 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v4, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + asm volatile("vwsubu.vv v6, v2, v4"); + VSET(16, e16, m2); + VCMP_U16(1, v6, -7, -5, -3, -1, 1, 3, 5, 7, -7, -5, -3, -1, 1, 3, 5, 7); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v8, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + asm volatile("vwsubu.vv v12, v4, v8"); + VSET(16, e32, m4); + VCMP_U32(2, v12, -7, -5, -3, -1, 1, 3, 5, 7, -7, -5, -3, -1, 1, 3, 5, 7); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + asm volatile("vwsubu.vv v24, v8, v16"); + VSET(16, e64, m8); + VCMP_U64(3, v24, -7, -5, -3, -1, 1, 3, 5, 7, -7, -5, -3, -1, 1, 3, 5, 7); +} + +void TEST_CASE2(void) { + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v4, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + VCLEAR(v7); + asm volatile("vwsubu.vv v6, v2, v4, v0.t"); + VSET(16, e16, m2); + VCMP_U16(4, v6, 0, -5, 0, -1, 0, 3, 0, 7, 0, -5, 0, -1, 0, 3, 0, 7); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v8, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + VCLEAR(v14); + asm volatile("vwsubu.vv v12, v4, v8, v0.t"); + VSET(16, e32, m4); + VCMP_U32(5, v12, 0, -5, 0, -1, 0, 3, 0, 7, 0, -5, 0, -1, 0, 3, 0, 7); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + VCLEAR(v28); + asm volatile("vwsubu.vv v24, v8, v16, v0.t"); + VSET(16, e64, m8); + VCMP_U64(6, v24, 0, -5, 0, -1, 0, 3, 0, 7, 0, -5, 0, -1, 0, 3, 0, 7); +} + +void TEST_CASE3(void) { + const uint32_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_8(v2, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwsubu.vx v6, v2, %[A]" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_U16(7, v6, -4, 249, -2, 247, 0, 245, 2, 243, 4, 241, 6, 239, 8, 237, 10, + 235); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwsubu.vx v8, v4, %[A]" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(8, v8, -4, 65529, -2, 65527, 0, 65525, 2, 65523, 4, 65521, 6, 65519, + 8, 65517, 10, 65515); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwsubu.vx v16, v8, %[A]" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_U64(9, v16, -4, 4294967289, -2, 4294967287, 0, 4294967285, 2, 4294967283, + 4, 4294967281, 6, 4294967279, 8, 4294967277, 10, 4294967275); +} + +void TEST_CASE4(void) { + const uint32_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_8(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + VCLEAR(v7); + asm volatile("vwsubu.vx v6, v2, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_U16(10, v6, 0, -3, 0, -1, 0, 1, 0, 3, 0, -3, 0, -1, 0, 1, 0, 3); + + VSET(16, e16, m2); + VLOAD_16(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vwsubu.vx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(11, v8, 0, -3, 0, -1, 0, 1, 0, 3, 0, -3, 0, -1, 0, 1, 0, 3); + + VSET(16, e32, m4); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vwsubu.vx v16, v8, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_U64(12, v16, 0, -3, 0, -1, 0, 1, 0, 3, 0, -3, 0, -1, 0, 1, 0, 3); +} + +void TEST_CASE5(void) { + VSET(16, e8, m1); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v4, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + asm volatile("vwsubu.wv v6, v2, v4"); + VSET(16, e16, m2); + VCMP_U16(13, v6, -7, -5, -3, -1, 1, 3, 5, 7, -7, -5, -3, -1, 1, 3, 5, 7); + + VSET(16, e16, m2); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v8, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + asm volatile("vwsubu.wv v12, v4, v8"); + VSET(16, e32, m4); + VCMP_U32(14, v12, -7, -5, -3, -1, 1, 3, 5, 7, -7, -5, -3, -1, 1, 3, 5, 7); + + VSET(16, e32, m4); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + asm volatile("vwsubu.wv v24, v8, v16"); + VSET(16, e64, m8); + VCMP_U64(15, v24, -7, -5, -3, -1, 1, 3, 5, 7, -7, -5, -3, -1, 1, 3, 5, 7); +} + +void TEST_CASE6(void) { + VSET(16, e8, m1); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v4, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v6); + VCLEAR(v7); + asm volatile("vwsubu.wv v6, v2, v4, v0.t"); + VSET(16, e16, m2); + VCMP_U16(16, v6, 0, -5, 0, -1, 0, 3, 0, 7, 0, -5, 0, -1, 0, 3, 0, 7); + + VSET(16, e16, m2); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_16(v8, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v12); + VCLEAR(v14); + asm volatile("vwsubu.wv v12, v4, v8, v0.t"); + VSET(16, e32, m4); + VCMP_U32(17, v12, 0, -5, 0, -1, 0, 3, 0, 7, 0, -5, 0, -1, 0, 3, 0, 7); + + VSET(16, e32, m4); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_32(v16, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v24); + VCLEAR(v28); + asm volatile("vwsubu.wv v24, v8, v16, v0.t"); + VSET(16, e64, m8); + VCMP_U64(18, v24, 0, -5, 0, -1, 0, 3, 0, 7, 0, -5, 0, -1, 0, 3, 0, 7); +} + +void TEST_CASE7(void) { + const uint32_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_16(v2, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwsubu.wx v4, v2, %[A]" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_U16(19, v4, -4, -7, -2, -9, 0, -11, 2, -13, 4, -15, 6, -17, 8, -19, 10, + -21); + + VSET(16, e16, m2); + VLOAD_32(v4, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwsubu.wx v8, v4, %[A]" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(20, v8, -4, -7, -2, -9, 0, -11, 2, -13, 4, -15, 6, -17, 8, -19, 10, + -21); + + VSET(16, e32, m4); + VLOAD_64(v8, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15, -16); + asm volatile("vwsubu.wx v16, v8, %[A]" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_U64(21, v16, -4, -7, -2, -9, 0, -11, 2, -13, 4, -15, 6, -17, 8, -19, 10, + -21); +} + +void TEST_CASE8(void) { + const uint32_t scalar = 5; + + VSET(16, e8, m1); + VLOAD_16(v2, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v4); + VCLEAR(v5); + asm volatile("vwsubu.wx v4, v2, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e16, m2); + VCMP_U16(22, v4, 0, -3, 0, -1, 0, 1, 0, 3, 0, -3, 0, -1, 0, 1, 0, 3); + + VSET(16, e16, m2); + VLOAD_32(v4, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v8); + VCLEAR(v10); + asm volatile("vwsubu.wx v8, v4, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e32, m4); + VCMP_U32(23, v8, 0, -3, 0, -1, 0, 1, 0, 3, 0, -3, 0, -1, 0, 1, 0, 3); + + VSET(16, e32, m4); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAA); + VCLEAR(v16); + VCLEAR(v20); + asm volatile("vwsubu.wx v16, v8, %[A], v0.t" ::[A] "r"(scalar)); + VSET(16, e64, m8); + VCMP_U64(24, v16, 0, -3, 0, -1, 0, 1, 0, 3, 0, -3, 0, -1, 0, 1, 0, 3); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + TEST_CASE8(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vxor.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vxor.c new file mode 100644 index 000000000..0c7574874 --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vxor.c @@ -0,0 +1,309 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(12, e8, m1); + VLOAD_8(v2, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + 0xf0); + VLOAD_8(v3, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, + 0xf0); + asm volatile("vxor.vv v1, v2, v3"); + VCMP_U8(1, v1, 0x0f, 0x02, 0x00, 0x0f, 0x02, 0x00, 0x0f, 0x02, 0x00, 0x0f, + 0x02, 0x00); + + VSET(12, e16, m2); + VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + 0xf0f0, 0xffff, 0x0001, 0xf0f0); + VLOAD_16(v6, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, + 0xf0f0, 0xff00, 0x0003, 0xf0f0); + asm volatile("vxor.vv v2, v4, v6"); + VCMP_U16(2, v2, 0x00ff, 0x0002, 0x0000, 0x00ff, 0x0002, 0x0000, 0x00ff, + 0x0002, 0x0000, 0x00ff, 0x0002, 0x0000); + + VSET(12, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, + 0x00000001, 0xf0f0f0f0); + VLOAD_32(v12, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, 0x00000003, + 0xf0f0f0f0, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, + 0x00000003, 0xf0f0f0f0); + asm volatile("vxor.vv v4, v8, v12"); + VCMP_U32(3, v4, 0x0000ffff, 0x00000002, 0x00000000, 0x0000ffff, 0x00000002, + 0x00000000, 0x0000ffff, 0x00000002, 0x00000000, 0x0000ffff, + 0x00000002, 0x00000000); + + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); + VLOAD_64(v24, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0); + asm volatile("vxor.vv v8, v16, v24"); + VCMP_U64(4, v8, 0x00000000ffffffff, 0x0000000000000002, 0x0000000000000000, + 0x00000000ffffffff, 0x0000000000000002, 0x0000000000000000, + 0x00000000ffffffff, 0x0000000000000002, 0x0000000000000000, + 0x00000000ffffffff, 0x0000000000000002, 0x0000000000000000); +} + +void TEST_CASE2() { + VSET(12, e8, m1); + VLOAD_8(v2, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + 0xf0); + VLOAD_8(v3, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, 0xf0, 0xf0, 0x03, + 0xf0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_8(v1, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + 0xef); + asm volatile("vxor.vv v1, v2, v3, v0.t"); + VCMP_U8(5, v1, 0x0f, 0xef, 0x00, 0x0f, 0xef, 0x00, 0x0f, 0xef, 0x00, 0x0f, + 0xef, 0x00); + + VSET(12, e16, m2); + VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + 0xf0f0, 0xffff, 0x0001, 0xf0f0); + VLOAD_16(v8, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, 0xf0f0, 0xff00, 0x0003, + 0xf0f0, 0xff00, 0x0003, 0xf0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + 0xbeef, 0xbeef, 0xbeef, 0xbeef); + asm volatile("vxor.vv v2, v4, v8, v0.t"); + VCMP_U16(6, v2, 0x00ff, 0xbeef, 0x0000, 0x00ff, 0xbeef, 0x0000, 0x00ff, + 0xbeef, 0x0000, 0x00ff, 0xbeef, 0x0000); + + VSET(12, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, + 0x00000001, 0xf0f0f0f0); + VLOAD_32(v12, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, 0x00000003, + 0xf0f0f0f0, 0xffff0000, 0x00000003, 0xf0f0f0f0, 0xffff0000, + 0x00000003, 0xf0f0f0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_32(v4, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef); + asm volatile("vxor.vv v4, v8, v12, v0.t"); + VCMP_U32(7, v4, 0x0000ffff, 0xdeadbeef, 0x00000000, 0x0000ffff, 0xdeadbeef, + 0x00000000, 0x0000ffff, 0xdeadbeef, 0x00000000, 0x0000ffff, + 0xdeadbeef, 0x00000000); + + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); + VLOAD_64(v24, 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0, + 0xffffffff00000000, 0x0000000000000003, 0xf0f0f0f0f0f0f0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef); + asm volatile("vxor.vv v8, v16, v24, v0.t"); + VCMP_U64(8, v8, 0x00000000ffffffff, 0xdeadbeefdeadbeef, 0x0000000000000000, + 0x00000000ffffffff, 0xdeadbeefdeadbeef, 0x0000000000000000, + 0x00000000ffffffff, 0xdeadbeefdeadbeef, 0x0000000000000000, + 0x00000000ffffffff, 0xdeadbeefdeadbeef, 0x0000000000000000); +} + +void TEST_CASE3() { + const uint64_t scalar = 0x0ff00ff00ff00ff0; + + VSET(12, e8, m1); + VLOAD_8(v2, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + 0xf0); + asm volatile("vxor.vx v1, v2, %[A]" ::[A] "r"(scalar)); + VCMP_U8(9, v1, 0x0f, 0xf1, 0x00, 0x0f, 0xf1, 0x00, 0x0f, 0xf1, 0x00, 0x0f, + 0xf1, 0x00); + + VSET(12, e16, m2); + VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + 0xf0f0, 0xffff, 0x0001, 0xf0f0); + asm volatile("vxor.vx v2, v4, %[A]" ::[A] "r"(scalar)); + VCMP_U16(10, v2, 0xf00f, 0x0ff1, 0xff00, 0xf00f, 0x0ff1, 0xff00, 0xf00f, + 0x0ff1, 0xff00, 0xf00f, 0x0ff1, 0xff00); + + VSET(12, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, + 0x00000001, 0xf0f0f0f0); + asm volatile("vxor.vx v4, v8, %[A]" ::[A] "r"(scalar)); + VCMP_U32(11, v4, 0xf00ff00f, 0x0ff00ff1, 0xff00ff00, 0xf00ff00f, 0x0ff00ff1, + 0xff00ff00, 0xf00ff00f, 0x0ff00ff1, 0xff00ff00, 0xf00ff00f, + 0x0ff00ff1, 0xff00ff00); + + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); + asm volatile("vxor.vx v8, v16, %[A]" ::[A] "r"(scalar)); + VCMP_U64(12, v8, 0xf00ff00ff00ff00f, 0x0ff00ff00ff00ff1, 0xff00ff00ff00ff00, + 0xf00ff00ff00ff00f, 0x0ff00ff00ff00ff1, 0xff00ff00ff00ff00, + 0xf00ff00ff00ff00f, 0x0ff00ff00ff00ff1, 0xff00ff00ff00ff00, + 0xf00ff00ff00ff00f, 0x0ff00ff00ff00ff1, 0xff00ff00ff00ff00); +} + +void TEST_CASE4() { + const uint64_t scalar = 0x0ff00ff00ff00ff0; + + VSET(12, e8, m1); + VLOAD_8(v2, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + 0xf0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_8(v1, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + 0xef); + asm volatile("vxor.vx v1, v2, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(13, v1, 0x0f, 0xef, 0x00, 0x0f, 0xef, 0x00, 0x0f, 0xef, 0x00, 0x0f, + 0xef, 0x00); + + VSET(12, e16, m2); + VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + 0xf0f0, 0xffff, 0x0001, 0xf0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + 0xbeef, 0xbeef, 0xbeef, 0xbeef); + asm volatile("vxor.vx v2, v4, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(14, v2, 0xf00f, 0xbeef, 0xff00, 0xf00f, 0xbeef, 0xff00, 0xf00f, + 0xbeef, 0xff00, 0xf00f, 0xbeef, 0xff00); + + VSET(12, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, + 0x00000001, 0xf0f0f0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_32(v4, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef); + asm volatile("vxor.vx v4, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(15, v4, 0xf00ff00f, 0xdeadbeef, 0xff00ff00, 0xf00ff00f, 0xdeadbeef, + 0xff00ff00, 0xf00ff00f, 0xdeadbeef, 0xff00ff00, 0xf00ff00f, + 0xdeadbeef, 0xff00ff00); + + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef); + asm volatile("vxor.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(16, v8, 0xf00ff00ff00ff00f, 0xdeadbeefdeadbeef, 0xff00ff00ff00ff00, + 0xf00ff00ff00ff00f, 0xdeadbeefdeadbeef, 0xff00ff00ff00ff00, + 0xf00ff00ff00ff00f, 0xdeadbeefdeadbeef, 0xff00ff00ff00ff00, + 0xf00ff00ff00ff00f, 0xdeadbeefdeadbeef, 0xff00ff00ff00ff00); +} + +void TEST_CASE5() { + VSET(12, e8, m1); + VLOAD_8(v2, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + 0xf0); + asm volatile("vxor.vi v1, v2, 15"); + VCMP_U8(17, v1, 0xf0, 0x0e, 0xff, 0xf0, 0x0e, 0xff, 0xf0, 0x0e, 0xff, 0xf0, + 0x0e, 0xff); + + VSET(12, e16, m2); + VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + 0xf0f0, 0xffff, 0x0001, 0xf0f0); + asm volatile("vxor.vi v2, v4, 15"); + VCMP_U16(18, v2, 0xfff0, 0x000e, 0xf0ff, 0xfff0, 0x000e, 0xf0ff, 0xfff0, + 0x000e, 0xf0ff, 0xfff0, 0x000e, 0xf0ff); + + VSET(12, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, + 0x00000001, 0xf0f0f0f0); + asm volatile("vxor.vi v4, v8, 15"); + VCMP_U32(19, v4, 0xfffffff0, 0x0000000e, 0xf0f0f0ff, 0xfffffff0, 0x0000000e, + 0xf0f0f0ff, 0xfffffff0, 0x0000000e, 0xf0f0f0ff, 0xfffffff0, + 0x0000000e, 0xf0f0f0ff); + + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); + asm volatile("vxor.vi v8, v16, 15"); + VCMP_U64(20, v8, 0xfffffffffffffff0, 0x000000000000000e, 0xf0f0f0f0f0f0f0ff, + 0xfffffffffffffff0, 0x000000000000000e, 0xf0f0f0f0f0f0f0ff, + 0xfffffffffffffff0, 0x000000000000000e, 0xf0f0f0f0f0f0f0ff, + 0xfffffffffffffff0, 0x000000000000000e, 0xf0f0f0f0f0f0f0ff); +} + +void TEST_CASE6() { + VSET(12, e8, m1); + VLOAD_8(v2, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, + 0xf0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_8(v1, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, + 0xef); + asm volatile("vxor.vi v1, v2, 15, v0.t"); + VCMP_U8(21, v1, 0xf0, 0xef, 0xff, 0xf0, 0xef, 0xff, 0xf0, 0xef, 0xff, 0xf0, + 0xef, 0xff); + + VSET(12, e16, m2); + VLOAD_16(v4, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, 0xf0f0, 0xffff, 0x0001, + 0xf0f0, 0xffff, 0x0001, 0xf0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_16(v2, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, 0xbeef, + 0xbeef, 0xbeef, 0xbeef, 0xbeef); + asm volatile("vxor.vi v2, v4, 15, v0.t"); + VCMP_U16(22, v2, 0xfff0, 0xbeef, 0xf0ff, 0xfff0, 0xbeef, 0xf0ff, 0xfff0, + 0xbeef, 0xf0ff, 0xfff0, 0xbeef, 0xf0ff); + + VSET(12, e32, m4); + VLOAD_32(v8, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, 0x00000001, + 0xf0f0f0f0, 0xffffffff, 0x00000001, 0xf0f0f0f0, 0xffffffff, + 0x00000001, 0xf0f0f0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_32(v4, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef); + asm volatile("vxor.vi v4, v8, 15, v0.t"); + VCMP_U32(23, v4, 0xfffffff0, 0xdeadbeef, 0xf0f0f0ff, 0xfffffff0, 0xdeadbeef, + 0xf0f0f0ff, 0xfffffff0, 0xdeadbeef, 0xf0f0f0ff, 0xfffffff0, + 0xdeadbeef, 0xf0f0f0ff); + + VSET(12, e64, m8); + VLOAD_64(v16, 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0, + 0xffffffffffffffff, 0x0000000000000001, 0xf0f0f0f0f0f0f0f0); + VLOAD_8(v0, 0x6D, 0x0B); + VLOAD_64(v8, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, + 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef); + asm volatile("vxor.vi v8, v16, 15, v0.t"); + VCMP_U64(24, v8, 0xfffffffffffffff0, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0ff, + 0xfffffffffffffff0, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0ff, + 0xfffffffffffffff0, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0ff, + 0xfffffffffffffff0, 0xdeadbeefdeadbeef, 0xf0f0f0f0f0f0f0ff); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + + EXIT_CHECK(); +} diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vzext.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vzext.c new file mode 100644 index 000000000..0d24c220d --- /dev/null +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vzext.c @@ -0,0 +1,106 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1(void) { + VSET(16, e16, m1); + VLOAD_8(v1, 1, 2, -3, -4, 5, 6, -7, -8); + asm volatile("vzext.vf2 v2, v1"); + VCMP_U16(1, v2, 1, 2, 253, 252, 5, 6, 249, 248); + + VSET(16, e32, m1); + VLOAD_16(v1, 1, 2, -3, -4); + asm volatile("vzext.vf2 v2, v1"); + VCMP_U32(2, v2, 1, 2, 65533, 65532); + + VSET(16, e64, m1); + VLOAD_32(v8, 1, 2); + asm volatile("vzext.vf2 v0, v8"); + VCMP_U64(3, v0, 1, 2); +} + +void TEST_CASE2(void) { + VSET(16, e16, m1); + VLOAD_8(v1, 1, 2, -3, -4, 5, 6, -7, -8); + VLOAD_8(v0, 0xAA); + VCLEAR(v2); + asm volatile("vzext.vf2 v2, v1, v0.t"); + VCMP_U16(4, v2, 0, 2, 0, 252, 0, 6, 0, 248); + + VSET(16, e32, m1); + VLOAD_16(v1, 1, 2, -3, -4); + VLOAD_8(v0, 0x0A); + VCLEAR(v2); + asm volatile("vzext.vf2 v2, v1, v0.t"); + VCMP_U32(5, v2, 0, 2, 0, 65532); + + VSET(16, e64, m1); + VLOAD_32(v1, 1, 2); + VLOAD_8(v0, 0x02); + VCLEAR(v2); + asm volatile("vzext.vf2 v2, v1, v0.t"); + VCMP_U64(6, v2, 0, 2); +} + +void TEST_CASE3(void) { + VSET(16, e32, m1); + VLOAD_8(v1, 1, 2, -3, -4); + asm volatile("vzext.vf4 v2, v1"); + VCMP_U32(7, v2, 1, 2, 253, 252); + + VSET(16, e64, m1); + VLOAD_16(v1, 1, 2); + asm volatile("vzext.vf4 v2, v1"); + VCMP_U64(8, v2, 1, 2); +} + +void TEST_CASE4(void) { + VSET(16, e32, m1); + VLOAD_8(v1, 1, 2, -3, -4); + VLOAD_8(v0, 0x0A); + VCLEAR(v2); + asm volatile("vzext.vf4 v2, v1, v0.t"); + VCMP_U32(9, v2, 0, 2, 0, 252); + + VSET(16, e64, m1); + VLOAD_16(v1, 1, 2); + VLOAD_8(v0, 0x02); + VCLEAR(v2); + asm volatile("vzext.vf4 v2, v1, v0.t"); + VCMP_U64(10, v2, 0, 2); +} + +void TEST_CASE5(void) { + VSET(16, e64, m1); + VLOAD_8(v1, 1, 2); + asm volatile("vzext.vf8 v2, v1"); + VCMP_U64(11, v2, 1, 2); +} + +void TEST_CASE6(void) { + VSET(16, e64, m1); + VLOAD_8(v1, 1, 2); + VLOAD_8(v0, 0x02); + VCLEAR(v2); + asm volatile("vzext.vf8 v2, v1, v0.t"); + VCMP_U64(12, v2, 0, 2); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + + EXIT_CHECK(); +} From 5f571d114444c8b8a05ccbb63525fb1d740dca52 Mon Sep 17 00:00:00 2001 From: sharafat hussain Date: Fri, 23 Dec 2022 18:42:03 +0500 Subject: [PATCH 13/16] [makefile] Update makefile to run the seperate tests for single lane --- apps/Makefile | 8 + apps/common/riscv_tests.mk | 6 +- .../isa/rv64uv/1_lane_tests/Makefrag | 208 +++++++++++++++--- 3 files changed, 186 insertions(+), 36 deletions(-) diff --git a/apps/Makefile b/apps/Makefile index 6bb74f304..57f00247a 100644 --- a/apps/Makefile +++ b/apps/Makefile @@ -109,7 +109,11 @@ riscv_tests: $(CVA6_BINARIES) $(ARA_BINARIES) define rvtest_compile_template TESTS_$(1) := $(addprefix bin/, $($(addsuffix _ara_tests, $1))) +ifeq ($(nr_lanes), 1) +bin/$(1)-ara-%: $(TESTS_DIR)/$(1)/1_lane_tests/%.$(2) $(RUNTIME_GCC) linker_script +else bin/$(1)-ara-%: $(TESTS_DIR)/$(1)/%.$(2) $(RUNTIME_GCC) linker_script +endif mkdir -p bin/ $$(RISCV_CC_GCC) -Iinclude -I$$(TESTS_DIR)/macros/scalar -I$$(TESTS_DIR)/macros/vector $$(RISCV_CCFLAGS_GCC) $$(RISCV_LDFLAGS_GCC) -o $$@ $$< $(RUNTIME_GCC) -T$$(CURDIR)/common/link.ld $$(RISCV_OBJDUMP) $$(RISCV_OBJDUMP_FLAGS) -D $$@ > $$@.dump @@ -119,7 +123,11 @@ endef define rvtest_compile_template_c TESTS_$(1) := $(addprefix bin/, $($(addsuffix _ara_tests, $1))) +ifeq ($(nr_lanes), 1) +bin/$(1)-ara-%: $(TESTS_DIR)/$(1)/1_lane_tests/%.$(2) $(RUNTIME_LLVM) linker_script +else bin/$(1)-ara-%: $(TESTS_DIR)/$(1)/%.$(2) $(RUNTIME_LLVM) linker_script +endif mkdir -p bin/ $$(RISCV_CC) -Iinclude -I$$(TESTS_DIR)/macros/scalar -I$$(TESTS_DIR)/macros/vector $$(RISCV_CCFLAGS) $$(RISCV_LDFLAGS) -o $$@ $$< $(RUNTIME_LLVM) -T$$(CURDIR)/common/link.ld $$(RISCV_OBJDUMP) $$(RISCV_OBJDUMP_FLAGS) -D $$@ > $$@.dump diff --git a/apps/common/riscv_tests.mk b/apps/common/riscv_tests.mk index adf0ada89..b1ca38e1b 100644 --- a/apps/common/riscv_tests.mk +++ b/apps/common/riscv_tests.mk @@ -8,7 +8,11 @@ include $(TESTS_DIR)/rv64um/Makefrag include $(TESTS_DIR)/rv64ua/Makefrag include $(TESTS_DIR)/rv64uf/Makefrag include $(TESTS_DIR)/rv64ud/Makefrag -include $(TESTS_DIR)/rv64uv/Makefrag +ifeq ($(nr_lanes), 1) + include $(TESTS_DIR)/rv64uv/1_lane_tests/Makefrag +else + include $(TESTS_DIR)/rv64uv/Makefrag +endif include $(TESTS_DIR)/rv64si/Makefrag rv64ui_ara_tests := $(addprefix rv64ui-ara-, $(rv64ui_sc_tests)) diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/Makefrag b/apps/riscv-tests/isa/rv64uv/1_lane_tests/Makefrag index caba44c02..a83d881dd 100644 --- a/apps/riscv-tests/isa/rv64uv/1_lane_tests/Makefrag +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/Makefrag @@ -1,40 +1,178 @@ -#Copyright 2021 ETH Zurich and University of Bologna. -#Solderpad Hardware License, Version 0.51, see LICENSE for details. -#SPDX - License - Identifier : SHL - 0.51 +# Copyright 2021 ETH Zurich and University of Bologna. +# Solderpad Hardware License, Version 0.51, see LICENSE for details. +# SPDX-License-Identifier: SHL-0.51 # -#Author : Matheus Cavalcante < matheusd @iis.ee.ethz.ch> -#Basile Bougenot < bbougenot @student.ethz.ch> +# Author: Matheus Cavalcante +# Basile Bougenot -rv64uv_sc_tests = vaadd vaaddu vsadd vsaddu vsmul vssra vssrl vnclip vnclipu vadd - vsub vrsub vwaddu vwsubu vwadd vwsub vsext vzext vadc vmadc vsbc vmsbc vand vor - vxor vsll vsrl vsra vnsrl vnsra vmseq vmsne vmsltu vmslt vmsleu vmsle vmsgtu - vmsgt vminu vmin vmaxu vmax vmul vmulh vmulhu vmulhsu vdivu vdiv vremu - vrem vwmul vwmulu vwmulsu vmacc vnmsac vmadd vnmsub vwmaccu vwmacc - vwmaccsu vwmaccus vmerge vmv vmvxs vmvsx vfmvfs vfmvsf vmvnrr - vredsum vredmaxu vredmax vredminu vredmin vredand vredor - vredxor vwredsumu vwredsum vfadd vfsub vfrsub vfwadd - vfwsub vfmul vfdiv vfrdiv vfwmul vfmacc vfnmacc vfmsac - vfnmsac vfmadd vfnmadd vfmsub vfnmsub vfwmacc - vfwnmacc vfwmsac vfwnmsac vfsqrt vfmin vfmax vfredusum vfredosum vfredmin vfredmax - vfwredusum vfwredosum vfclass vfsgnj vfsgnjn vfsgnjx vfmerge - vfmv vmfeq vmfne vmflt vmfle vmfgt vmfge vfcvt vfwcvt vfncvt - vmand vmnand vmandnot vmor vmnor vmornot vmxor vmxnor vslideup vslidedown - vslide1up vfslide1up vslide1down vfslide1down vl - vl1r vle1 vls vluxei vs - vs1r vse1 vss vsuxei vsetivli vsetvli - vsetvl vmsbf vmsof vmsif viota vid vcpop vfirst vle8 - vse8 vle16 vse16 vle32 vse32 vle64 vse64 +rv64uv_sc_tests = vaadd \ + vaaddu\ + vsadd \ + vsaddu \ + vsmul \ + vssra \ + vssrl \ + vnclip \ + vnclipu \ + vadd \ + vsub \ + vrsub \ + vwaddu \ + vwsubu \ + vwadd \ + vwsub \ + vsext \ + vzext \ + vadc \ + vmadc \ + vsbc \ + vmsbc \ + vand \ + vor \ + vxor \ + vsll \ + vsrl \ + vsra \ + vnsrl \ + vnsra \ + vmseq \ + vmsne \ + vmsltu \ + vmslt \ + vmsleu \ + vmsle \ + vmsgtu \ + vmsgt \ + vminu \ + vmin \ + vmaxu \ + vmax \ + vmul \ + vmulh \ + vmulhu \ + vmulhsu \ + vdivu \ + vdiv \ + vremu \ + vrem \ + vwmul \ + vwmulu \ + vwmulsu \ + vmacc \ + vnmsac \ + vmadd \ + vnmsub \ + vwmaccu \ + vwmacc \ + vwmaccsu \ + vwmaccus \ + vmerge \ + vmv \ + vmvxs \ + vmvsx \ + vfmvfs \ + vfmvsf \ + vmvnrr \ + vredsum \ + vredmaxu \ + vredmax \ + vredminu \ + vredmin \ + vredand \ + vredor \ + vredxor \ + vwredsumu \ + vwredsum \ + vfadd \ + vfsub \ + vfrsub \ + vfwadd \ + vfwsub \ + vfmul \ + vfdiv \ + vfrdiv \ + vfwmul \ + vfmacc \ + vfnmacc \ + vfmsac \ + vfnmsac \ + vfmadd \ + vfnmadd \ + vfmsub \ + vfnmsub \ + vfwmacc \ + vfwnmacc \ + vfwmsac \ + vfwnmsac \ + vfsqrt \ + vfmin \ + vfmax \ + vfredusum \ + vfredosum \ + vfredmin \ + vfredmax \ + vfwredusum \ + vfwredosum \ + vfclass \ + vfsgnj \ + vfsgnjn \ + vfsgnjx \ + vfmerge \ + vfmv \ + vmfeq \ + vmfne \ + vmflt \ + vmfle \ + vmfgt \ + vmfge \ + vfcvt \ + vfwcvt \ + vfncvt \ + vmand \ + vmnand \ + vmandnot \ + vmor \ + vmnor \ + vmornot \ + vmxor \ + vmxnor \ + vslideup \ + vslidedown \ + vslide1up \ + vfslide1up \ + vslide1down \ + vfslide1down \ + vl \ + vl1r \ + vle1 \ + vls \ + vluxei \ + vs \ + vs1r \ + vse1 \ + vss \ + vsuxei \ + vsetivli\ + vsetvli\ + vsetvl\ + vmsbf \ + vmsof \ + vmsif \ + viota \ + vid \ + vcpop \ + vfirst \ + vle8 \ + vse8 \ + vle16 \ + vse16 \ + vle32 \ + vse32 \ + vle64 \ + vse64 -#rv64uv_sc_tests = vaadd vaaddu vadc vasub vasubu vcompress vfirst vid viota \ - vl vlff vl_nocheck vlx vmsbf vmsif vmsof vpopc_m vrgather vsadd vsaddu \ - vsetvl vsetivli vsetvli vsmul vssra vssrl vssub vssubu vsux vsx +#rv64uv_sc_tests = vaadd vaaddu vadc vasub vasubu vcompress vfirst vid viota vl vlff vl_nocheck vlx vmsbf vmsif vmsof vpopc_m vrgather vsadd vsaddu vsetvl vsetivli vsetvli vsmul vssra vssrl vssub vssubu vsux vsx - rv64uv_p_tests = $( - addprefix - rv64uv - - p - - , - $(rv64uv_sc_tests)) +rv64uv_p_tests = $(addprefix rv64uv-p-, $(rv64uv_sc_tests)) - spike_ctests += - $(rv64uv_p_tests) +spike_ctests += $(rv64uv_p_tests) \ No newline at end of file From 1734e04913b353c14a5161b58b3e4a395b0f5f1e Mon Sep 17 00:00:00 2001 From: sharafat hussain Date: Wed, 28 Dec 2022 10:37:01 +0500 Subject: [PATCH 14/16] :droplet: Fixed clang-format --- .../isa/rv64uv/1_lane_tests/Makefrag | 2 +- .../isa/rv64uv/1_lane_tests/vaadd.c | 8 +++--- .../isa/rv64uv/1_lane_tests/vaaddu.c | 8 +++--- .../isa/rv64uv/1_lane_tests/vcpop.c | 18 ++++++------- .../isa/rv64uv/1_lane_tests/vfirst.c | 27 ++++++++----------- .../isa/rv64uv/1_lane_tests/vl_nocheck.c | 8 +++--- .../isa/rv64uv/1_lane_tests/vle16.c | 2 +- .../isa/rv64uv/1_lane_tests/vle32.c | 2 +- .../isa/rv64uv/1_lane_tests/vle64.c | 2 +- .../isa/rv64uv/1_lane_tests/vle8.c | 8 +++--- .../isa/rv64uv/1_lane_tests/vpopc_m.c | 9 +++---- .../isa/rv64uv/1_lane_tests/vse16.c | 5 ++-- .../isa/rv64uv/1_lane_tests/vse32.c | 5 ++-- .../isa/rv64uv/1_lane_tests/vse64.c | 7 ++--- .../isa/rv64uv/1_lane_tests/vse8.c | 5 ++-- .../isa/rv64uv/1_lane_tests/vsetivli.c | 24 ++++++++--------- .../isa/rv64uv/1_lane_tests/vsetvl.c | 26 +++++++++--------- .../isa/rv64uv/1_lane_tests/vsetvli.c | 26 +++++++++--------- .../isa/rv64uv/1_lane_tests/vsuxei.c | 12 ++++++--- 19 files changed, 102 insertions(+), 102 deletions(-) diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/Makefrag b/apps/riscv-tests/isa/rv64uv/1_lane_tests/Makefrag index a83d881dd..e5a2acaf6 100644 --- a/apps/riscv-tests/isa/rv64uv/1_lane_tests/Makefrag +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/Makefrag @@ -175,4 +175,4 @@ rv64uv_sc_tests = vaadd \ rv64uv_p_tests = $(addprefix rv64uv-p-, $(rv64uv_sc_tests)) -spike_ctests += $(rv64uv_p_tests) \ No newline at end of file +spike_ctests += $(rv64uv_p_tests) diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vaadd.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vaadd.c index 513b4f4f3..40cd9d484 100644 --- a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vaadd.c +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vaadd.c @@ -8,7 +8,7 @@ #include "vector_macros.h" void TEST_CASE1(void) { - set_vxrm(0); // setting vxrm to rnu rounding mode + set_vxrm(0); // setting vxrm to rnu rounding mode VSET(4, e8, m1); VLOAD_8(v1, 1, -2, -3, 4); VLOAD_8(v2, 1, 2, -3, 3); @@ -17,7 +17,7 @@ void TEST_CASE1(void) { } void TEST_CASE2(void) { - set_vxrm(1); // setting vxrm to rne rounding mode + set_vxrm(1); // setting vxrm to rne rounding mode VSET(4, e8, m1); VLOAD_8(v1, 1, -2, -3, 4); VLOAD_8(v2, 1, 9, -3, 5); @@ -28,7 +28,7 @@ void TEST_CASE2(void) { } void TEST_CASE3(void) { - set_vxrm(2); // setting vxrm to rdn rounding mode + set_vxrm(2); // setting vxrm to rdn rounding mode VSET(4, e32, m1); VLOAD_32(v1, 1, -2, 3, -4); const uint32_t scalar = 5; @@ -38,7 +38,7 @@ void TEST_CASE3(void) { // Dont use VCLEAR here, it results in a glitch where are values are off by 1 void TEST_CASE4(void) { - set_vxrm(3); // setting vxrm to rod rounding mode + set_vxrm(3); // setting vxrm to rod rounding mode VSET(4, e32, m1); VLOAD_32(v1, 1, 2, 3, 4); const uint32_t scalar = 5; diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vaaddu.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vaaddu.c index ff50b894c..44bea01f1 100644 --- a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vaaddu.c +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vaaddu.c @@ -8,7 +8,7 @@ #include "vector_macros.h" void TEST_CASE1(void) { - set_vxrm(0); // setting vxrm to rnu rounding mode + set_vxrm(0); // setting vxrm to rnu rounding mode VSET(4, e8, m1); VLOAD_8(v1, 1, 2, 3, 5); VLOAD_8(v2, 1, 3, 8, 4); @@ -17,7 +17,7 @@ void TEST_CASE1(void) { } void TEST_CASE2(void) { - set_vxrm(1); // setting vxrm to rne rounding mode + set_vxrm(1); // setting vxrm to rne rounding mode VSET(4, e8, m1); VLOAD_8(v1, 5, 8, 3, 7); VLOAD_8(v2, 7, 5, 3, 5); @@ -28,7 +28,7 @@ void TEST_CASE2(void) { } void TEST_CASE3(void) { - set_vxrm(2); // setting vxrm to rdn rounding mode + set_vxrm(2); // setting vxrm to rdn rounding mode VSET(4, e32, m1); VLOAD_32(v1, 1, 2, 3, 4); const uint32_t scalar = 5; @@ -38,7 +38,7 @@ void TEST_CASE3(void) { // Dont use VCLEAR here, it results in a glitch where are values are off by 1 void TEST_CASE4(void) { - set_vxrm(3); // setting vxrm to rod rounding mode + set_vxrm(3); // setting vxrm to rod rounding mode VSET(4, e32, m1); VLOAD_32(v1, 1, 2, 3, 4); const uint32_t scalar = 5; diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vcpop.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vcpop.c index a9b828e31..0d0794db9 100644 --- a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vcpop.c +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vcpop.c @@ -15,11 +15,10 @@ void TEST_CASE1(void) { VLOAD_32(v0, 5, 0, 0, 0); volatile uint32_t scalar = 1337; volatile uint32_t OUP[] = {0, 0, 0, 0}; - __asm__ volatile( - "vpopc.m %[A], v2, v0.t \n" - "sw %[A], (%1) \n" - : - : [A] "r"(scalar), "r"(OUP)); + __asm__ volatile("vpopc.m %[A], v2, v0.t \n" + "sw %[A], (%1) \n" + : + : [A] "r"(scalar), "r"(OUP)); XCMP(1, OUP[0], 2); } @@ -29,11 +28,10 @@ void TEST_CASE2(void) { VLOAD_32(v2, 0xF, 0, 0, 0); volatile uint32_t scalar = 1337; volatile uint32_t OUP[] = {0, 0, 0, 0}; - __asm__ volatile( - "vpopc.m %[A], v2 \n" - "sw %[A], (%1) \n" - : - : [A] "r"(scalar), "r"(OUP)); + __asm__ volatile("vpopc.m %[A], v2 \n" + "sw %[A], (%1) \n" + : + : [A] "r"(scalar), "r"(OUP)); XCMP(2, OUP[0], 4); } diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfirst.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfirst.c index fd9615af4..c7657b4ec 100644 --- a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfirst.c +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vfirst.c @@ -7,38 +7,33 @@ #include "vector_macros.h" -void TEST_CASE1() -{ +void TEST_CASE1() { VSET(4, e32, m1); VLOAD_32(v2, 3); VLOAD_32(v0, 2, 0, 0, 0); volatile uint32_t scalar = 1337; volatile uint32_t OUP[] = {0}; - __asm__ volatile( - "vfirst.m %[A], v2, v0.t \n" - "sw %[A], (%1) \n" - : - : [A] "r"(scalar), "r"(OUP)); + __asm__ volatile("vfirst.m %[A], v2, v0.t \n" + "sw %[A], (%1) \n" + : + : [A] "r"(scalar), "r"(OUP)); XCMP(1, OUP[0], 1); } -void TEST_CASE2() -{ +void TEST_CASE2() { VSET(4, e32, m1); VLOAD_32(v2, 1, 2, 3, 4); VLOAD_32(v0, 0, 0, 0, 0); volatile int32_t scalar = 1337; volatile int32_t OUP[] = {0}; - __asm__ volatile( - "vfirst.m %[A], v2, v0.t \n" - "sw %[A], (%1) \n" - : - : [A] "r"(scalar), "r"(OUP)); + __asm__ volatile("vfirst.m %[A], v2, v0.t \n" + "sw %[A], (%1) \n" + : + : [A] "r"(scalar), "r"(OUP)); XCMP(2, OUP[0], -1); } -int main(void) -{ +int main(void) { INIT_CHECK(); enable_vec(); enable_fp(); diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vl_nocheck.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vl_nocheck.c index 7260e19f8..aadc37285 100644 --- a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vl_nocheck.c +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vl_nocheck.c @@ -10,7 +10,7 @@ // or add inp here void TEST_CASE1(void) { VSET(4, e8, m1); - volatile int8_t INP1[] = {0xff, 0x00, 0x0f, 0xf0}; // flush + volatile int8_t INP1[] = {0xff, 0x00, 0x0f, 0xf0}; // flush __asm__ volatile("fence"); __asm__ volatile("vle8.v v1, (%0)" ::"r"(INP1)); // VEC_CMP_8(1,v1,0xff, 0x00, 0x0f,0xf0); @@ -19,7 +19,7 @@ void TEST_CASE1(void) { void TEST_CASE2(void) { VSET(4, e16, m1); - volatile int16_t INP1[] = {0xffff, 0x0000, 0x0f0f, 0xf0f0}; // flush + volatile int16_t INP1[] = {0xffff, 0x0000, 0x0f0f, 0xf0f0}; // flush __asm__ volatile("fence"); __asm__ volatile("vle16.v v1, (%0)" ::"r"(INP1)); // VEC_CMP_16(2,v1,0xffff, 0x0000, 0x0f0f,0xf0f0); @@ -29,7 +29,7 @@ void TEST_CASE2(void) { void TEST_CASE3(void) { VSET(4, e32, m1); volatile int32_t INP3[] = {0xffffffff, 0x00000000, 0x0f0f0f0f, - 0xf0f0f0f0}; // flush + 0xf0f0f0f0}; // flush __asm__ volatile("fence"); __asm__ volatile("vle32.v v1, (%0)" ::"r"(INP3)); // VEC_CMP_32(3,v1,0xffffffff, 0x00000000, 0x0f0f0f0f,0xf0f0f0f0); @@ -39,7 +39,7 @@ void TEST_CASE3(void) { void TEST_CASE4(void) { VSET(4, e64, m1); volatile int64_t INP1[] = {0xffffffffffffffff, 0x0000000000000000, - 0x0f0f0f0f0f0f0f0f, 0xf0f0f0f0f0f0f0f0}; // flush + 0x0f0f0f0f0f0f0f0f, 0xf0f0f0f0f0f0f0f0}; // flush __asm__ volatile("fence"); __asm__ volatile("vle64.v v1, (%0)" ::"r"(INP1)); // VEC_CMP_64(4,v1,0xffffffffffffffff, 0x00000000000000000, diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle16.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle16.c index 893d0e379..32b5b2192 100644 --- a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle16.c +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle16.c @@ -9,7 +9,7 @@ // Exception Handler for rtl void mtvec_handler(void) { - asm volatile("csrr t0, mcause"); // Read mcause + asm volatile("csrr t0, mcause"); // Read mcause // Read mepc asm volatile("csrr t1, mepc"); diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle32.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle32.c index 0e4f1c1c5..ec0bda965 100644 --- a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle32.c +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle32.c @@ -8,7 +8,7 @@ // Exception Handler for rtl void mtvec_handler(void) { - asm volatile("csrr t0, mcause"); // Read mcause + asm volatile("csrr t0, mcause"); // Read mcause // Read mepc asm volatile("csrr t1, mepc"); diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle64.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle64.c index 282fd11b4..8bbe8673b 100644 --- a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle64.c +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle64.c @@ -9,7 +9,7 @@ // Exception Handler for rtl void mtvec_handler(void) { - asm volatile("csrr t0, mcause"); // Read mcause + asm volatile("csrr t0, mcause"); // Read mcause // Read mepc asm volatile("csrr t1, mepc"); diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle8.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle8.c index b4e1d84ee..1e4f58db4 100644 --- a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle8.c +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vle8.c @@ -10,7 +10,7 @@ // Exception Handler for rtl void mtvec_handler(void) { - asm volatile("csrr t0, mcause"); // Read mcause + asm volatile("csrr t0, mcause"); // Read mcause // Read mepc asm volatile("csrr t1, mepc"); @@ -193,7 +193,7 @@ void TEST_CASE10(void) { void TEST_CASE11(void) { VSET(16, e8, m1); VLOAD_8(v6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - VSET(16, e8, m1); // Setting vl=16 + VSET(16, e8, m1); // Setting vl=16 asm volatile("vle8.v v6, (%0)" ::"r"(&ALIGNED_I8[0])); VSET(16, e8, m1); VCMP_U8(11, v6, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, @@ -205,7 +205,7 @@ void TEST_CASE12(void) { VSET(16, e8, m1); VLOAD_8(v6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); __asm__ volatile("vsetivli %[A], 0, e8, m1, ta, ma" - : [A] "=r"(avl)); // Setting vl=0 + : [A] "=r"(avl)); // Setting vl=0 asm volatile("vle8.v v6, (%0)" ::"r"(&ALIGNED_I8[0])); VSET(16, e8, m1); VCMP_U8(12, v6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); @@ -214,7 +214,7 @@ void TEST_CASE12(void) { void TEST_CASE13(void) { VSET(16, e8, m1); VLOAD_8(v6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - VSET(13, e8, m1); // Setting vl =13 + VSET(13, e8, m1); // Setting vl =13 asm volatile("vle8.v v6, (%0)" ::"r"(&ALIGNED_I8[0])); VSET(16, e8, m1); VCMP_U8(13, v6, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vpopc_m.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vpopc_m.c index 14fa78e6b..d66b9b887 100644 --- a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vpopc_m.c +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vpopc_m.c @@ -13,11 +13,10 @@ void TEST_CASE1() { VLOAD_U32(v0, 5, 0, 0, 0); volatile uint32_t scalar = 1337; volatile uint32_t OUP[] = {0, 0, 0, 0}; - __asm__ volatile( - "vpopc.m %[A], v2, v0.t \n" - "sw %[A], (%1) \n" - : - : [A] "r"(scalar), "r"(OUP)); + __asm__ volatile("vpopc.m %[A], v2, v0.t \n" + "sw %[A], (%1) \n" + : + : [A] "r"(scalar), "r"(OUP)); XCMP(1, OUP[0], 2); } diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse16.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse16.c index c5d9b06a7..efd8d218c 100644 --- a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse16.c +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse16.c @@ -6,7 +6,7 @@ #define AXI_DWIDTH 128 void mtvec_handler(void) { - asm volatile("csrr t0, mcause"); // Read mcause + asm volatile("csrr t0, mcause"); // Read mcause // Read mepc asm volatile("csrr t1, mepc"); @@ -73,7 +73,8 @@ void handle_trap(void) { } void reset_vec16(volatile uint16_t *vec) { - for (uint64_t i = 0; i < 1024; ++i) vec[i] = 0; + for (uint64_t i = 0; i < 1024; ++i) + vec[i] = 0; } static volatile uint16_t ALIGNED_I16[1024] __attribute__((aligned(AXI_DWIDTH))); diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse32.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse32.c index afd7c6427..9b45af723 100644 --- a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse32.c +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse32.c @@ -5,7 +5,7 @@ #include "vector_macros.h" #define AXI_DWIDTH 128 void mtvec_handler(void) { - asm volatile("csrr t0, mcause"); // Read mcause + asm volatile("csrr t0, mcause"); // Read mcause // Read mepc asm volatile("csrr t1, mepc"); @@ -72,7 +72,8 @@ void handle_trap(void) { } void reset_vec32(volatile uint32_t *vec) { - for (uint64_t i = 0; i < 1024; ++i) vec[i] = 0; + for (uint64_t i = 0; i < 1024; ++i) + vec[i] = 0; } static volatile uint32_t ALIGNED_I32[1024] __attribute__((aligned(AXI_DWIDTH))); diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse64.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse64.c index da857b854..603e95442 100644 --- a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse64.c +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse64.c @@ -6,7 +6,7 @@ #define AXI_DWIDTH 128 void mtvec_handler(void) { - asm volatile("csrr t0, mcause"); // Read mcause + asm volatile("csrr t0, mcause"); // Read mcause // Read mepc asm volatile("csrr t1, mepc"); @@ -15,7 +15,7 @@ void mtvec_handler(void) { asm volatile("addi t1, t1, 4"); asm volatile("csrw mepc, t1"); - // Filter with mcause and handle here + // Filter with mcause and handle hereZ asm volatile("mret"); } @@ -73,7 +73,8 @@ void handle_trap(void) { } void reset_vec64(volatile uint64_t *vec) { - for (uint64_t i = 0; i < 1024; ++i) vec[i] = 0; + for (uint64_t i = 0; i < 1024; ++i) + vec[i] = 0; } static volatile uint64_t ALIGNED_I64[1024] __attribute__((aligned(AXI_DWIDTH))); diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse8.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse8.c index 0d74bde5d..5eb936ee9 100644 --- a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse8.c +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vse8.c @@ -6,7 +6,7 @@ #define AXI_DWIDTH 128 void mtvec_handler(void) { - asm volatile("csrr t0, mcause"); // Read mcause + asm volatile("csrr t0, mcause"); // Read mcause // Read mepc asm volatile("csrr t1, mepc"); @@ -73,7 +73,8 @@ void handle_trap(void) { } void reset_vec8(volatile uint8_t *vec) { - for (uint64_t i = 0; i < 1024; ++i) vec[i] = 0; + for (uint64_t i = 0; i < 1024; ++i) + vec[i] = 0; } static volatile uint8_t ALIGNED_I8[1024] __attribute__((aligned(AXI_DWIDTH))); diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsetivli.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsetivli.c index 017feaacf..4fb9d4581 100644 --- a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsetivli.c +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsetivli.c @@ -9,22 +9,22 @@ //***********LMUL = 1**********// void TEST_CASE1(void) { uint64_t avl, vtype, - vl; // Declaring avl,vtype and vl variables to pass for comparison - uint64_t vlmul = 0; // Setting value of vlmul - uint64_t vsew = 0; // Setting value of vsew - uint64_t vta = 1; // Setting value of vta - uint64_t vma = 1; // Setting value of vma - uint64_t golden_vtype; // Declaring variable to use as a reference value + vl; // Declaring avl,vtype and vl variables to pass for comparison + uint64_t vlmul = 0; // Setting value of vlmul + uint64_t vsew = 0; // Setting value of vsew + uint64_t vta = 1; // Setting value of vta + uint64_t vma = 1; // Setting value of vma + uint64_t golden_vtype; // Declaring variable to use as a reference value vtype(golden_vtype, vlmul, vsew, vta, - vma); // Setting up reference variable golden_vtype by assigning - // different fields of configurations + vma); // Setting up reference variable golden_vtype by assigning + // different fields of configurations __asm__ volatile("vsetivli %[A], 30, e8, m1, ta, ma" - : [A] "=r"(avl)); // Executing vsetivli instruction - read_vtype(vtype); // Reading vtype CSR - read_vl(vl); // Reading vl CSR + : [A] "=r"(avl)); // Executing vsetivli instruction + read_vtype(vtype); // Reading vtype CSR + read_vl(vl); // Reading vl CSR check_vtype_vl( 1, vtype, golden_vtype, avl, vl, vsew, - vlmul); // Passsing actual values and reference values for comparison + vlmul); // Passsing actual values and reference values for comparison } void TEST_CASE2(void) { diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsetvl.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsetvl.c index b238ecfc6..f3cca56eb 100644 --- a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsetvl.c +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsetvl.c @@ -12,24 +12,24 @@ #define VLEN 128 //***********LMUL = 1**********// void TEST_CASE1(void) { - uint64_t vtype, vl; // Setting avl and declaring vtype and vl - // variables to pass for comparison - uint64_t vlmul = 0; // Setting value of vlmul - uint64_t vsew = 0; // Setting value of vsew - uint64_t vta = 1; // Setting value of vta - uint64_t vma = 1; // Setting value of vma - uint64_t golden_vtype; // Declaring variable to use as a reference value + uint64_t vtype, vl; // Setting avl and declaring vtype and vl + // variables to pass for comparison + uint64_t vlmul = 0; // Setting value of vlmul + uint64_t vsew = 0; // Setting value of vsew + uint64_t vta = 1; // Setting value of vta + uint64_t vma = 1; // Setting value of vma + uint64_t golden_vtype; // Declaring variable to use as a reference value uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; vtype(golden_vtype, vlmul, vsew, vta, - vma); // Setting up reference variable golden_vtype by assigning - // different fields of configurations + vma); // Setting up reference variable golden_vtype by assigning + // different fields of configurations __asm__ volatile("vsetvl t0, %[A], %[B]" ::[A] "r"(avl), - [B] "r"(golden_vtype)); // Executing vsetvl instruction - read_vtype(vtype); // Reading vtype CSR - read_vl(vl); // Reading vl CSR + [B] "r"(golden_vtype)); // Executing vsetvl instruction + read_vtype(vtype); // Reading vtype CSR + read_vl(vl); // Reading vl CSR check_vtype_vl( 1, vtype, golden_vtype, avl, vl, vsew, - vlmul); // Passsing actual values and reference values for comparison + vlmul); // Passsing actual values and reference values for comparison } void TEST_CASE2(void) { diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsetvli.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsetvli.c index 27689fcd8..bc5b2fd1b 100644 --- a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsetvli.c +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsetvli.c @@ -15,24 +15,24 @@ //****** SEW = 8 void TEST_CASE1(void) { - uint64_t vtype, vl; // Setting avl and declaring vtype and vl - // variables to pass for comparison - uint64_t vlmul = 0; // Setting value of vlmul - uint64_t vsew = 0; // Setting value of vsew - uint64_t vta = 1; // Setting value of vta - uint64_t vma = 1; // Setting value of vma - uint64_t golden_vtype; // Declaring variable to use as a reference value + uint64_t vtype, vl; // Setting avl and declaring vtype and vl + // variables to pass for comparison + uint64_t vlmul = 0; // Setting value of vlmul + uint64_t vsew = 0; // Setting value of vsew + uint64_t vta = 1; // Setting value of vta + uint64_t vma = 1; // Setting value of vma + uint64_t golden_vtype; // Declaring variable to use as a reference value uint64_t avl = ((VLEN / (8 << vsew)) * (1 << vlmul)) - 1; vtype(golden_vtype, vlmul, vsew, vta, - vma); // Setting up reference variable golden_vtype by assigning - // different fields of configurations + vma); // Setting up reference variable golden_vtype by assigning + // different fields of configurations __asm__ volatile("vsetvli t0, %[A], e8, m1,ta,ma" ::[A] "r"( - avl)); // Executing vsetvli instruction - read_vtype(vtype); // Reading vtype CSR - read_vl(vl); // Reading vl CSR + avl)); // Executing vsetvli instruction + read_vtype(vtype); // Reading vtype CSR + read_vl(vl); // Reading vl CSR check_vtype_vl( 1, vtype, golden_vtype, avl, vl, vsew, - vlmul); // Passsing actual values and reference values for comparison + vlmul); // Passsing actual values and reference values for comparison } //****** SEW = 16 diff --git a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsuxei.c b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsuxei.c index 2f0686722..7b806b9f2 100644 --- a/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsuxei.c +++ b/apps/riscv-tests/isa/rv64uv/1_lane_tests/vsuxei.c @@ -11,16 +11,20 @@ #define INIT 98 void reset_vec8(volatile uint8_t *vec, int rst_val, uint64_t len) { - for (uint64_t i = 0; i < len; ++i) vec[i] = rst_val; + for (uint64_t i = 0; i < len; ++i) + vec[i] = rst_val; } void reset_vec16(volatile uint16_t *vec, int rst_val, uint64_t len) { - for (uint64_t i = 0; i < len; ++i) vec[i] = rst_val; + for (uint64_t i = 0; i < len; ++i) + vec[i] = rst_val; } void reset_vec32(volatile uint32_t *vec, int rst_val, uint64_t len) { - for (uint64_t i = 0; i < len; ++i) vec[i] = rst_val; + for (uint64_t i = 0; i < len; ++i) + vec[i] = rst_val; } void reset_vec64(volatile uint64_t *vec, int rst_val, uint64_t len) { - for (uint64_t i = 0; i < len; ++i) vec[i] = rst_val; + for (uint64_t i = 0; i < len; ++i) + vec[i] = rst_val; } static volatile uint8_t BUFFER_O8[16] __attribute__((aligned(AXI_DWIDTH))) = { INIT, INIT, INIT, INIT, INIT, INIT, INIT, INIT, From 933832661de48c574ff7b9e1107be3288bc8f8d6 Mon Sep 17 00:00:00 2001 From: sharafat hussain Date: Wed, 28 Dec 2022 14:56:44 +0500 Subject: [PATCH 15/16] [makefile] Update Makefile --- apps/Makefile | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/apps/Makefile b/apps/Makefile index 57f00247a..bfc19a198 100644 --- a/apps/Makefile +++ b/apps/Makefile @@ -110,9 +110,13 @@ define rvtest_compile_template TESTS_$(1) := $(addprefix bin/, $($(addsuffix _ara_tests, $1))) ifeq ($(nr_lanes), 1) +ifeq ($(1), rv64uv) bin/$(1)-ara-%: $(TESTS_DIR)/$(1)/1_lane_tests/%.$(2) $(RUNTIME_GCC) linker_script else bin/$(1)-ara-%: $(TESTS_DIR)/$(1)/%.$(2) $(RUNTIME_GCC) linker_script +endif +else +bin/$(1)-ara-%: $(TESTS_DIR)/$(1)/%.$(2) $(RUNTIME_GCC) linker_script endif mkdir -p bin/ $$(RISCV_CC_GCC) -Iinclude -I$$(TESTS_DIR)/macros/scalar -I$$(TESTS_DIR)/macros/vector $$(RISCV_CCFLAGS_GCC) $$(RISCV_LDFLAGS_GCC) -o $$@ $$< $(RUNTIME_GCC) -T$$(CURDIR)/common/link.ld @@ -124,9 +128,13 @@ define rvtest_compile_template_c TESTS_$(1) := $(addprefix bin/, $($(addsuffix _ara_tests, $1))) ifeq ($(nr_lanes), 1) +ifeq ($(1), rv64uv) bin/$(1)-ara-%: $(TESTS_DIR)/$(1)/1_lane_tests/%.$(2) $(RUNTIME_LLVM) linker_script else bin/$(1)-ara-%: $(TESTS_DIR)/$(1)/%.$(2) $(RUNTIME_LLVM) linker_script +endif +else +bin/$(1)-ara-%: $(TESTS_DIR)/$(1)/%.$(2) $(RUNTIME_LLVM) linker_script endif mkdir -p bin/ $$(RISCV_CC) -Iinclude -I$$(TESTS_DIR)/macros/scalar -I$$(TESTS_DIR)/macros/vector $$(RISCV_CCFLAGS) $$(RISCV_LDFLAGS) -o $$@ $$< $(RUNTIME_LLVM) -T$$(CURDIR)/common/link.ld @@ -189,4 +197,4 @@ clean: riscv_tests_spike_clean benchmarks_clean rm -vf $(RUNTIME_SPIKE) for app in $(APPS); do cd $(APPS_DIR)/$${app} && rm -f $$(find . -name "*.c.o*" -o -name "*.S.o*") && cd ..; done -.INTERMEDIATE: $(addsuffix /main.c.o,$(APPS)) +.INTERMEDIATE: $(addsuffix /main.c.o,$(APPS)) \ No newline at end of file From c1c96c10a6ace9e0829c2099f3a83a1ff5f81bee Mon Sep 17 00:00:00 2001 From: sharafat hussain Date: Wed, 28 Dec 2022 14:24:36 +0500 Subject: [PATCH 16/16] [config] update vlen to 128 for 1_lane.mk --- config/1_lane.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/1_lane.mk b/config/1_lane.mk index 1df56e541..ada19794b 100644 --- a/config/1_lane.mk +++ b/config/1_lane.mk @@ -22,4 +22,4 @@ nr_lanes ?= 1 # Length of each vector register (in bits) # Constraints: VLEN >= 512 -vlen ?= 4096 \ No newline at end of file +vlen ?= 128 \ No newline at end of file