Skip to content

Commit

Permalink
Altera opt 1 (#2592)
Browse files Browse the repository at this point in the history
The first optimization for Altera FPGA is to move the instruction queue to LUTRAM. The reason why the optimization previously done for Xilinx is not working, is that in that case asynchronous RAM primitives are used, and Altera does not support asynchronous RAM. Therefore, this optimization consists in using synchronous RAM for the instruction queue and FIFOs inside wt axi adapter.

The main changes to the existing code are:

New RAM module to infer synchronous RAM in altera with independent read and write ports (SyncDpRam_ind_r_w.sv)

Changes inside cva6_fifo_v3 to adapt to the use of synchronous RAM instead of asynchronous:

When the FIFO is not empty, next data is always read and available at the output hiding the reading latency introduced by synchronous RAM (similar to fall-through approach). This is a simplification that is possible because in a FIFO we always know what is the next address to be read.

When data is read right after write, we can’t use the previous method because there is a latency to first write the data in the FIFO, and then to read it. For this reason, in the new design there is an auxiliary register used to hide this latency. This is used only if the FIFO is empty, so we detect when the word written is first word, and keep it in this register. If the next cycle comes a read, the data out is taken from the aux register. Afterwards the data is already available in the RAM and can be read continuously as in the first case.

All this is only used inf FpgaAlteraEn parameter is enabled, otherwise the previous implementation with asynchronous RAM applies (when FpgaEn is set), or the register based implementation (when FpgaEn is not set).
  • Loading branch information
AngelaGonzalezMarino authored Nov 15, 2024
1 parent f54b9d4 commit 33c5d77
Show file tree
Hide file tree
Showing 4 changed files with 128 additions and 25 deletions.
10 changes: 6 additions & 4 deletions core/cache_subsystem/wt_axi_adapter.sv
Original file line number Diff line number Diff line change
Expand Up @@ -311,8 +311,9 @@ module wt_axi_adapter
end

cva6_fifo_v3 #(
.dtype (icache_req_t),
.DEPTH (ReqFifoDepth),
.FPGA_ALTERA(CVA6Cfg.FpgaAlteraEn),
.dtype(icache_req_t),
.DEPTH(ReqFifoDepth),
.FPGA_EN(CVA6Cfg.FpgaEn)
) i_icache_data_fifo (
.clk_i (clk_i),
Expand All @@ -329,8 +330,9 @@ module wt_axi_adapter
);

cva6_fifo_v3 #(
.dtype (dcache_req_t),
.DEPTH (ReqFifoDepth),
.FPGA_ALTERA(CVA6Cfg.FpgaAlteraEn),
.dtype(dcache_req_t),
.DEPTH(ReqFifoDepth),
.FPGA_EN(CVA6Cfg.FpgaEn)
) i_dcache_data_fifo (
.clk_i (clk_i),
Expand Down
72 changes: 56 additions & 16 deletions core/cva6_fifo_v3.sv
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright 2024 - PlanV Technologies for additionnal contribution.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
Expand All @@ -9,9 +10,12 @@
// specific language governing permissions and limitations under the License.

// Author: Florian Zaruba <[email protected]>
// Additional contributions by:
// Angela Gonzalez - PlanV Technologies

module cva6_fifo_v3 #(
parameter bit FALL_THROUGH = 1'b0, // fifo is in fall-through mode
parameter bit FPGA_ALTERA = 1'b0, // FPGA Altera optimizations enabled
parameter int unsigned DATA_WIDTH = 32, // default data width if the fifo is of type logic
parameter int unsigned DEPTH = 8, // depth can be arbitrary from 0 to 2**32
parameter type dtype = logic [DATA_WIDTH-1:0],
Expand Down Expand Up @@ -46,6 +50,8 @@ module cva6_fifo_v3 #(
logic [ADDR_DEPTH:0] status_cnt_n, status_cnt_q;
// actual memory
dtype [FifoDepth - 1:0] mem_n, mem_q;
dtype data_ft_n, data_ft_q;
logic first_word_n, first_word_q;

// fifo ram signals for fpga target
logic fifo_ram_we;
Expand All @@ -71,12 +77,13 @@ module cva6_fifo_v3 #(
read_pointer_n = read_pointer_q;
write_pointer_n = write_pointer_q;
status_cnt_n = status_cnt_q;
data_ft_n = data_ft_q;
first_word_n = first_word_q;
if (FPGA_EN) begin
fifo_ram_we = '0;
fifo_ram_read_address = read_pointer_q;
fifo_ram_write_address = '0;
fifo_ram_wdata = '0;
data_o = (DEPTH == 0) ? data_i : fifo_ram_rdata;
data_o = (DEPTH == 0) ? data_i : (first_word_q ? data_ft_q : fifo_ram_rdata);
end else begin
data_o = (DEPTH == 0) ? data_i : mem_q[read_pointer_q];
mem_n = mem_q;
Expand All @@ -89,6 +96,7 @@ module cva6_fifo_v3 #(
fifo_ram_we = 1'b1;
fifo_ram_write_address = write_pointer_q;
fifo_ram_wdata = data_i;
first_word_n = FPGA_ALTERA && first_word_q && pop_i;
end else begin
// push the data onto the queue
mem_n[write_pointer_q] = data_i;
Expand All @@ -104,6 +112,8 @@ module cva6_fifo_v3 #(
end

if (pop_i && ~empty_o) begin
data_ft_n = data_i;
first_word_n = FPGA_EN && FPGA_ALTERA && first_word_q && push_i;
// read from the queue is a default assignment
// but increment the read pointer...
if (read_pointer_n == FifoDepth[ADDR_DEPTH-1:0] - 1) read_pointer_n = '0;
Expand All @@ -116,14 +126,23 @@ module cva6_fifo_v3 #(
if (push_i && pop_i && ~full_o && ~empty_o) status_cnt_n = status_cnt_q;

// FIFO is in pass through mode -> do not change the pointers
if (FALL_THROUGH && (status_cnt_q == 0) && push_i) begin
data_o = data_i;
if ((FALL_THROUGH || (FPGA_EN && FPGA_ALTERA)) && (status_cnt_q == 0) && push_i) begin
if (FALL_THROUGH) data_o = data_i;
if (FPGA_EN && FPGA_ALTERA) begin
data_ft_n = data_i;
first_word_n = '1;
end
if (pop_i) begin
first_word_n = '0;
status_cnt_n = status_cnt_q;
read_pointer_n = read_pointer_q;
write_pointer_n = write_pointer_q;
end
end

if (FPGA_EN) fifo_ram_read_address = (FPGA_ALTERA == 1) ? read_pointer_n : read_pointer_q;
else fifo_ram_read_address = '0;

end

// sequential process
Expand All @@ -132,32 +151,53 @@ module cva6_fifo_v3 #(
read_pointer_q <= '0;
write_pointer_q <= '0;
status_cnt_q <= '0;
first_word_q <= '0;
data_ft_q <= '0;
end else begin
if (flush_i) begin
read_pointer_q <= '0;
write_pointer_q <= '0;
status_cnt_q <= '0;
if (FPGA_ALTERA) first_word_q <= '0;
if (FPGA_ALTERA) data_ft_q <= '0;
end else begin
read_pointer_q <= read_pointer_n;
write_pointer_q <= write_pointer_n;
status_cnt_q <= status_cnt_n;
if (FPGA_ALTERA) data_ft_q <= data_ft_n;
if (FPGA_ALTERA) first_word_q <= first_word_n;
end
end
end

if (FPGA_EN) begin : gen_fpga_queue
AsyncDpRam #(
.ADDR_WIDTH(ADDR_DEPTH),
.DATA_DEPTH(DEPTH),
.DATA_WIDTH($bits(dtype))
) fifo_ram (
.Clk_CI (clk_i),
.WrEn_SI (fifo_ram_we),
.RdAddr_DI(fifo_ram_read_address),
.WrAddr_DI(fifo_ram_write_address),
.WrData_DI(fifo_ram_wdata),
.RdData_DO(fifo_ram_rdata)
);
if (FPGA_ALTERA) begin
SyncDpRam_ind_r_w #(
.ADDR_WIDTH(ADDR_DEPTH),
.DATA_DEPTH(DEPTH),
.DATA_WIDTH($bits(dtype))
) fifo_ram (
.Clk_CI (clk_i),
.WrEn_SI (fifo_ram_we),
.RdAddr_DI(fifo_ram_read_address),
.WrAddr_DI(fifo_ram_write_address),
.WrData_DI(fifo_ram_wdata),
.RdData_DO(fifo_ram_rdata)
);
end else begin
AsyncDpRam #(
.ADDR_WIDTH(ADDR_DEPTH),
.DATA_DEPTH(DEPTH),
.DATA_WIDTH($bits(dtype))
) fifo_ram (
.Clk_CI (clk_i),
.WrEn_SI (fifo_ram_we),
.RdAddr_DI(fifo_ram_read_address),
.WrAddr_DI(fifo_ram_write_address),
.WrData_DI(fifo_ram_wdata),
.RdData_DO(fifo_ram_rdata)
);
end
end else begin : gen_asic_queue
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
Expand Down
12 changes: 7 additions & 5 deletions core/frontend/instr_queue.sv
Original file line number Diff line number Diff line change
Expand Up @@ -461,8 +461,9 @@ module instr_queue
// Make sure we don't save any instructions if we couldn't save the address
assign push_instr_fifo[i] = push_instr[i] & ~address_overflow;
cva6_fifo_v3 #(
.DEPTH (ariane_pkg::FETCH_FIFO_DEPTH),
.dtype (instr_data_t),
.FPGA_ALTERA(CVA6Cfg.FpgaAlteraEn),
.DEPTH(ariane_pkg::FETCH_FIFO_DEPTH),
.dtype(instr_data_t),
.FPGA_EN(CVA6Cfg.FpgaEn)
) i_fifo_instr_data (
.clk_i (clk_i),
Expand All @@ -489,9 +490,10 @@ module instr_queue
end

cva6_fifo_v3 #(
.DEPTH (ariane_pkg::FETCH_ADDR_FIFO_DEPTH),
.DATA_WIDTH(CVA6Cfg.VLEN),
.FPGA_EN (CVA6Cfg.FpgaEn)
.FPGA_ALTERA(CVA6Cfg.FpgaAlteraEn),
.DEPTH (ariane_pkg::FETCH_ADDR_FIFO_DEPTH),
.DATA_WIDTH (CVA6Cfg.VLEN),
.FPGA_EN (CVA6Cfg.FpgaEn)
) i_fifo_address (
.clk_i (clk_i),
.rst_ni (rst_ni),
Expand Down
59 changes: 59 additions & 0 deletions vendor/pulp-platform/fpga-support/rtl/SyncDpRam_ind_r_w.sv
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// Copyright 2024 PlanV Technologies
//
// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0
// You may obtain a copy of the License at https://solderpad.org/licenses
//
// Inferable, Synchronous Dual-Port RAM, there are a write port and a read port fully independent
//
//
// This module is designed to work with both Xilinx, Microchip and Altera FPGA tools by following the respective
// guidelines:
// - Xilinx UG901 Vivado Design Suite User Guide: Synthesis
// - Inferring Microchip PolarFire RAM Blocks
// - Altera Quartus II Handbook Volume 1: Design and Synthesis (p. 768)
//
// Current Maintainers:: Angela Gonzalez - PlanV Technologies

module SyncDpRam_ind_r_w
#(
parameter ADDR_WIDTH = 10,
parameter DATA_DEPTH = 1024, // usually 2**ADDR_WIDTH, but can be lower
parameter DATA_WIDTH = 32
)(
input logic Clk_CI,

// Write port
input logic WrEn_SI,
input logic [ADDR_WIDTH-1:0] WrAddr_DI,
input logic [DATA_WIDTH-1:0] WrData_DI,

// Read port
input logic [ADDR_WIDTH-1:0] RdAddr_DI,
output logic [DATA_WIDTH-1:0] RdData_DO
);

// logic [DATA_WIDTH-1:0] mem [DATA_DEPTH-1:0]= '{default:0};
(* ramstyle = "mlab" *) logic [DATA_WIDTH-1:0] mem [DATA_DEPTH-1:0]= '{default:0};

// WRITE
always_ff @(posedge Clk_CI)
begin
if (WrEn_SI) begin
mem[WrAddr_DI] <= WrData_DI;
end
RdData_DO = mem[RdAddr_DI];
end

////////////////////////////
// assertions
////////////////////////////

// pragma translate_off
assert property
(@(posedge Clk_CI) (longint'(2)**longint'(ADDR_WIDTH) >= longint'(DATA_DEPTH)))
else $error("depth out of bounds");
// pragma translate_on

endmodule

0 comments on commit 33c5d77

Please sign in to comment.