-
Notifications
You must be signed in to change notification settings - Fork 698
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
The third optimization for Altera FPGA is to move the register file to LUTRAM. Same as before, the reason why the optimization previously done for Xilinx is not working, is that in that case asynchronous RAM primitives are used, and Altera does not support asynchronous RAM. Therefore, this optimization consists in using synchronous RAM for the register file. The main changes to the existing code are: Changes in ariane_regfile_fpga.sv file: The idea is the same as before, since synchronous RAM takes one clock cycle to read, we need to store the data when it is written, in case it is read right after. For this there is an auxiliary register that stores the last written data. On the read side, we need to identify if the data to be read is available in the RAM or if it is still in the auxiliary register (read after write). To compensate for the synchronous RAM delay the address is advanced one clock cycle. In this case there is a multiplexer in the output to select the block from where data is read, here we need to keep the read address for one clock cycle to select the right block when data is available. Changes in issue_read_operands.sv file: adjust address to read from register file (when synchronous RAM is used reads take one cycle, so we advance the address). Since this address is an input, we need a new input port that brings the address in advance “issue_instr_i_prev”. Changes in issue_stage.sv file: To connect the new input port that brings the address in advance “decoded_instr_i_prev”. Changes in id_stage.sv file: To output the instruction to be issued before registering it (one clock cycle in advance). A new output port is needed for this “issue_entry_o_prev” Changes in cva6.sv file: To connect the new output of the id_stage to the issue_stage to bring the address in advance to the register file (issue_entry_id_issue_prev)
- Loading branch information
1 parent
dd649f2
commit b718824
Showing
5 changed files
with
47 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
// Copyright 2018 ETH Zurich and University of Bologna. | ||
// Copyright 2024 - PlanV Technologies for additionnal contribution. | ||
// Copyright and related rights are licensed under the Solderpad Hardware | ||
// License, Version 0.51 (the "License"); you may not use this file except in | ||
// compliance with the License. You may obtain a copy of the License at | ||
|
@@ -15,7 +16,7 @@ | |
// Noam Gallmann - [email protected] | ||
// Felipe Lisboa Malaquias | ||
// Henry Suzukawa | ||
// | ||
// Angela Gonzalez - PlanV Technologies | ||
// | ||
// Description: This register file is optimized for implementation on | ||
// FPGAs. The register file features one distributed RAM block per implemented | ||
|
@@ -50,11 +51,16 @@ module ariane_regfile_fpga #( | |
localparam LOG_NR_WRITE_PORTS = CVA6Cfg.NrCommitPorts == 1 ? 1 : $clog2(CVA6Cfg.NrCommitPorts); | ||
|
||
// Distributed RAM usually supports one write port per block - duplicate for each write port. | ||
logic [ NUM_WORDS-1:0][ DATA_WIDTH-1:0] mem [CVA6Cfg.NrCommitPorts]; | ||
logic [NUM_WORDS-1:0][DATA_WIDTH-1:0] mem[CVA6Cfg.NrCommitPorts]; | ||
|
||
logic [CVA6Cfg.NrCommitPorts-1:0][NUM_WORDS-1:0] we_dec; | ||
logic [NUM_WORDS-1:0][LOG_NR_WRITE_PORTS-1:0] mem_block_sel; | ||
logic [NUM_WORDS-1:0][LOG_NR_WRITE_PORTS-1:0] mem_block_sel_q; | ||
logic [CVA6Cfg.NrCommitPorts-1:0][DATA_WIDTH-1:0] wdata_reg; | ||
logic [NR_READ_PORTS-1:0] read_after_write; | ||
|
||
logic [CVA6Cfg.NrCommitPorts-1:0][ NUM_WORDS-1:0] we_dec; | ||
logic [ NUM_WORDS-1:0][LOG_NR_WRITE_PORTS-1:0] mem_block_sel; | ||
logic [ NUM_WORDS-1:0][LOG_NR_WRITE_PORTS-1:0] mem_block_sel_q; | ||
logic [NR_READ_PORTS-1:0][4:0] raddr_q; | ||
logic [NR_READ_PORTS-1:0][4:0] raddr; | ||
|
||
// write adress decoder (for block selector) | ||
always_comb begin | ||
|
@@ -88,36 +94,55 @@ module ariane_regfile_fpga #( | |
always_ff @(posedge clk_i or negedge rst_ni) begin | ||
if (!rst_ni) begin | ||
mem_block_sel_q <= '0; | ||
raddr_q <= '0; | ||
end else begin | ||
mem_block_sel_q <= mem_block_sel; | ||
if (CVA6Cfg.FpgaAlteraEn) raddr_q <= raddr_i; | ||
else raddr_q <= '0; | ||
end | ||
end | ||
|
||
// distributed RAM blocks | ||
logic [NR_READ_PORTS-1:0][DATA_WIDTH-1:0] mem_read[CVA6Cfg.NrCommitPorts]; | ||
logic [NR_READ_PORTS-1:0][DATA_WIDTH-1:0] mem_read_sync[CVA6Cfg.NrCommitPorts]; | ||
for (genvar j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin : regfile_ram_block | ||
always_ff @(posedge clk_i) begin | ||
if (we_i[j] && ~waddr_i[j] != 0) begin | ||
mem[j][waddr_i[j]] <= wdata_i[j]; | ||
if (CVA6Cfg.FpgaAlteraEn) | ||
wdata_reg[j] <= wdata_i[j]; // register data written in case is needed to read next cycle | ||
else wdata_reg[j] <= '0; | ||
end | ||
if (CVA6Cfg.FpgaAlteraEn) begin | ||
for (int k = 0; k < NR_READ_PORTS; k++) begin : block_read | ||
mem_read_sync[j][k] = mem[j][raddr_i[k]]; // synchronous RAM | ||
read_after_write[k] <= '0; | ||
if (waddr_i[j] == raddr_i[k]) | ||
read_after_write[k] <= we_i[j] && ~waddr_i[j] != 0; // Identify if we need to read the content that was written | ||
end | ||
end | ||
end | ||
for (genvar k = 0; k < NR_READ_PORTS; k++) begin : block_read | ||
assign mem_read[j][k] = mem[j][raddr_i[k]]; | ||
assign mem_read[j][k] = CVA6Cfg.FpgaAlteraEn ? ( read_after_write[k] ? wdata_reg[j]: mem_read_sync[j][k]) : mem[j][raddr_i[k]]; | ||
end | ||
end | ||
//with synchronous ram there is the need to adjust which address is used at the output MUX | ||
assign raddr = CVA6Cfg.FpgaAlteraEn ? raddr_q : raddr_i; | ||
|
||
// output MUX | ||
logic [NR_READ_PORTS-1:0][LOG_NR_WRITE_PORTS-1:0] block_addr; | ||
for (genvar k = 0; k < NR_READ_PORTS; k++) begin : regfile_read_port | ||
assign block_addr[k] = mem_block_sel_q[raddr_i[k]]; | ||
assign rdata_o[k] = (ZERO_REG_ZERO && raddr_i[k] == '0) ? '0 : mem_read[block_addr[k]][k]; | ||
assign block_addr[k] = mem_block_sel_q[raddr[k]]; | ||
assign rdata_o[k] = (ZERO_REG_ZERO && raddr[k] == '0) ? '0 : mem_read[block_addr[k]][k]; | ||
end | ||
|
||
// random initialization of the memory to suppress assert warnings on Questa. | ||
initial begin | ||
for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin | ||
for (int j = 0; j < NUM_WORDS; j++) begin | ||
mem[i][j] = $random(); | ||
if (!CVA6Cfg.FpgaAlteraEn) | ||
mem[i][j] = $random(); //quartus does not support this random statement on synthesis | ||
else mem[i][j] = '0; | ||
end | ||
end | ||
end | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters