code release on LeNet datapath verilog implementation

hipersys-team · Aug 22, 2023 · cdf8567 · cdf8567
1 parent 6bbe02c
commit cdf8567
Show file tree

Hide file tree

Showing 42 changed files with 88,941 additions and 10 deletions.
diff --git a/.github/workflows/dnn_single_core.yml b/.github/workflows/dnn_single_core.yml
@@ -0,0 +1,30 @@
+on:
+  issue_comment:
+    branches:
+      - master
+
+  push:
+    paths:
+      - 'rtl/*.v'
+      - 'rtl/tb/*.py'
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v3
+
+    - name: Submodules
+      run: git submodule update --init
+
+    - name: Install dependencies
+      run: |
+        sudo apt install -y verilator python3 python3-pip python3-venv
+        verilator --version
+
+    - name: Run accuracy testing
+      run: |
+        cd rtl
+        make accuracy
diff --git a/README.md b/README.md
@@ -1,5 +1,8 @@
 # Lightning: A Reconfigurable Photonic-Electronic SmartNIC for Fast and Energy-Efficient Inference
 
+[![DOI:10.1145/3603269.3604821](http://img.shields.io/badge/DOI-10.1145/3603269.3604821-69B7DB.svg)](https://doi.org/10.1145/3603269.3604821)
+[![DNN build](https://github.com/hipersys-team/lightning/actions/workflows/dnn_single_core.yml/badge.svg)](https://github.com/hipersys-team/lightning/actions/workflows/dnn_single_core.yml)
+
 Welcome to the Lightning, a reconfigurable photonic-electronic neural network inference system integrated with the a 100 Gbps smartNIC.
 
 ## 1. Overview
@@ -29,16 +32,15 @@ git clone --recursive
 
 This part of artifact contains Lightning's RTL-based datapath design and implementation (Sections 4, 5, and 6 of the Lightning SIGCOMM paper). We also include an emulated photonic MAC core to build a cycle-accurate testbench using Verilator.
 
-|  Source Files      |  Description                                                                                               |
-|  -----             |  -----                                                                                                     |
-|  `rtl/compute/`    |  This folder contains the code of digital computational modules (e.g., adder tree, ReLU, exponential, etc).|
-|  `rtl/emulate/`    |  This folder contains the code of emulated photonic multiplier modules                                     |
-|  `rtl/glue_logics/`|  This folder contains the code of glue-logic modules for several DNNs                                      |
-|  `rtl/srom/`       |  This folder contains the code of SROM modules                                                             |
-|  `rtl/tb/`         |  This folder contains the code of Verilator-based testbench modules                                        |
-|  `rtl/utils/`      |  This folder contains the code of customized AXI-related modules and third-party AXI libraries             |
-|  `rtl/Makefile`    |  This folder contains the Makefile for running the Verilator-based testbench                               |
-|  `rtl/README.md`   |  This README file explains the dependencies and steps to run the RTL cycle-accurate testbench              |
+|  Source Files      |  Description                                                                                                             |
+|  -----             |  -----                                                                                                                   |
+|  `rtl/datapath/`   |  This folder contains the code of Lightning's datapath modules (packet I.O, memory controller, count-action logic, etc.) |
+|  `rtl/emulate/`    |  This folder contains the code of emulated photonic multiplier modules                                                   |
+|  `rtl/sram/`       |  This folder contains the code of SRAM modules                                                                           |
+|  `rtl/tb/`         |  This folder contains the code of Verilator-based testbench modules                                                      |
+|  `rtl/utils/`      |  This folder contains the code of customized AXI-related modules and third-party AXI libraries                           |
+|  `rtl/Makefile`    |  This folder contains the Makefile for running the Verilator-based testbench                                             |
+|  `rtl/README.md`   |  This README file explains the dependencies and steps to run the RTL cycle-accurate testbench                            |
 
 ### 2.2 FPGA firmware and library code for Lightning's Python API
 

diff --git a/rtl/Makefile b/rtl/Makefile
@@ -0,0 +1,17 @@
+all:
+
+# build the verilator for cycle-accurate simulatiom on the LeNet DNN
+build-sw-lenet-single-core:
+	$(MAKE) -C tb build-sim-lenet-single-core
+
+# run the verilator for cycle-accurate simulatiom on the LeNet DNN
+run-sw-lenet-single-core:
+	$(MAKE) -C tb run-sim-lenet-single-core
+
+# perform accuracy checking for all models
+accuracy:
+	$(MAKE) -C tb accuracy
+
+# clean the verilator files
+clean-sw:
+	$(MAKE) -C tb clean-sims
diff --git a/rtl/datapath/analog_interfaces/calibration.v b/rtl/datapath/analog_interfaces/calibration.v
@@ -0,0 +1,197 @@
+/*
+
+Project: [Lightning] A Reconfigurable Photonic-Electronic SmartNIC for Fast and Energy-Efficient Inference 
+File: calibration.v
+File Explanation: this module describes the calibration process for optical loss in the system
+File Start Time: December 2022
+Authors: Zhizhen Zhong ([email protected])
+Language: Verilog 2001
+
+*/
+
+`resetall
+`timescale 1ns / 1ps
+`default_nettype none
+
+
+module calibration # (
+    parameter CALIBRATION_DATA_WIDTH = 256
+)(
+    input  wire  clk,
+    input  wire  rst,
+    input  wire  [15:0]  estimate_photonic_slack_cycle_length,
+    input  wire  calibration_start,
+    input  wire  [15:0]  calibration_length,
+    input  wire  [15:0]  calibration_wave_type,  // select different types of calibration waveform
+
+    input  wire [CALIBRATION_DATA_WIDTH-1:0] input_tdata,
+    input  wire input_tvalid,
+
+    output reg [CALIBRATION_DATA_WIDTH-1:0] output_tdata,
+    output reg output_tvalid,
+
+    output reg [15:0] loss,
+    output reg loss_valid
+);
+    wire [CALIBRATION_DATA_WIDTH-1:0] sine_wave_dc = 256'hCF07_A57F_89C3_8003_89C3_A57F_CF07_FFFF_30F8_5A80_763C_7FFC_763C_5A80_30F8_0000;
+    wire [CALIBRATION_DATA_WIDTH-1:0] sine_wave_positive = 256'hCF07_A57F_89C3_8003_89C3_A57F_CF07_FFFF_30F8_5A80_763C_7FFC_763C_5A80_30F8_0000;
+    wire [CALIBRATION_DATA_WIDTH-1:0] square_wave_positive = 256'h0000_0000_0000_0000_0000_0000_0000_0000_7FFC_7FFC_7FFC_7FFC_7FFC_7FFC_7FFC_7FFC;
+
+    integer i;
+
+    reg [CALIBRATION_DATA_WIDTH-1:0] output_tdata_buffer;
+    reg output_tvalid_buffer;
+
+    reg [15:0] photonic_slack_cycle_count;
+    reg [15:0] counter;
+
+    wire [CALIBRATION_DATA_WIDTH-1:0] post_preamble_tdata;
+    wire post_preamble_tvalid;
+
+    reg calibration_start_reg;
+    reg calibration_started_reg;
+
+    wire [15:0] matched_pattern;
+
+    always @ (posedge clk)
+        if (rst) begin
+            calibration_start_reg <= 1'b0;
+            calibration_started_reg <= 1'b0;
+        end else begin
+            if (!calibration_started_reg && calibration_start_reg) begin
+                calibration_start_reg <= calibration_start;
+                calibration_started_reg <= 1'b1;
+            end else begin
+                calibration_start_reg <= 1'b0;
+            end
+        end
+
+    always @ (posedge clk)
+        if (rst) begin
+            photonic_slack_cycle_count <= 0;
+            counter <= 0;
+        end else begin
+            counter <= counter + 1;
+        end
+
+    // send out a full sine wave
+    always @ (posedge clk)
+        if (rst) begin
+            output_tdata <= {CALIBRATION_DATA_WIDTH{1'b0}};
+            output_tvalid <= 1'b0;
+
+        end else begin
+            output_tdata <= output_tdata_buffer;
+            output_tvalid <= output_tvalid_buffer;
+        end
+
+    always @ (posedge clk)
+        if (rst) begin
+            output_tdata_buffer <= {CALIBRATION_DATA_WIDTH{1'b0}};
+            output_tvalid_buffer <= 1'b0;
+
+        end else if (calibration_start && calibration_wave_type[0]) begin
+            output_tdata_buffer <= sine_wave_dc;
+            output_tvalid_buffer <= 1'b1;
+
+        end else if (calibration_start && calibration_wave_type[1]) begin
+            output_tdata_buffer <= sine_wave_positive;  // the length of the signal is until calibration_start lasts
+            output_tvalid_buffer <= 1'b1;
+
+        end else if (calibration_start && calibration_wave_type[2]) begin
+            output_tdata_buffer <= square_wave_positive;  // the length of the signal is until calibration_start lasts
+            output_tvalid_buffer <= 1'b1;
+        end
+
+    reg [CALIBRATION_DATA_WIDTH-1:0] accumulated_tdata;
+    reg accumulated_tvalid;
+    reg [15:0] accumulated_times;
+
+    reg [CALIBRATION_DATA_WIDTH-1:0] ratio;
+    reg ratio_valid;
+    reg [CALIBRATION_DATA_WIDTH-1:0] ratio_relay;
+    reg ratio_valid_relay;
+
+    always @ (posedge clk) begin
+        ratio_relay <= ratio;
+        ratio_valid_relay <= ratio_valid;
+    end
+
+    // analyze the received waveform
+    always @ (posedge clk)
+        if (rst) begin
+            accumulated_tdata <= {CALIBRATION_DATA_WIDTH{1'b0}};
+            accumulated_tvalid <= 1'b0;
+            accumulated_times <= 16'd0;
+            ratio_valid <= 1'b0;
+
+        end else if (input_tvalid) begin
+            accumulated_times <= accumulated_times + 16'd1;
+            if (!accumulated_tvalid) begin
+                accumulated_tdata <= post_preamble_tdata;
+                accumulated_tvalid <= post_preamble_tvalid;
+
+            end else begin
+                for (i=0; i<CALIBRATION_DATA_WIDTH/16; i=i+1) begin
+                    accumulated_tdata[i*16 +: 16] <= accumulated_tdata[i*16 +: 16]/2 + post_preamble_tdata[i*16 +: 16]/2;
+                end
+                accumulated_tvalid <= post_preamble_tvalid;
+                if (accumulated_times > 16'd0) begin
+                    for (i=0; i<CALIBRATION_DATA_WIDTH/16; i=i+1) begin
+                        if (output_tdata_buffer[i*16+7 +: 8] == 8'd0) begin
+                            ratio[i*16 +: 16] <= 16'd0;
+                        end else begin
+                            ratio[i*16 +: 16] <= accumulated_tdata[i*16 +: 16] << 8;
+                        end
+                    end
+                    ratio_valid <= 1'b1;
+                end
+            end
+        end
+
+    wire [15:0] loss_wire;
+    wire loss_valid_wire;
+
+    always @ (posedge clk)
+        if (rst) begin
+            loss <= 16'd0;
+            loss_valid <= 1'b0;
+        end else begin
+            loss <= loss_wire;
+            loss_valid <= loss_valid_wire;
+        end
+
+    generate
+        averager_tree # (
+        ) averager_tree_calibration_inst(
+            .clk(clk),
+            .rst(rst),
+            .start_signal(accumulated_tvalid ^ ratio_valid_relay),
+            .persist_cycle_length(calibration_length + estimate_photonic_slack_cycle_length),
+            .s_tdata(ratio_relay),
+            .s_tvalid(ratio_valid_relay),
+            .m_tdata(loss_wire),
+            .m_tvalid(loss_valid_wire)
+        );
+    endgenerate
+
+    generate 
+        preamble_detect # (
+        ) preamble_detect_inst (
+            .clk(clk),
+            .rst(rst),
+            .state_changed(calibration_start_reg),
+            .input_adc_tdata(input_tdata),
+            .input_adc_tvalid(input_tvalid),
+            .monitor_cycle_length(calibration_length + estimate_photonic_slack_cycle_length + 100),
+            .preamble_cycle_length(calibration_length),  // let us say we use first half calibration cycles for detection
+            .pattern_match_agg(),
+            .matched_pattern(matched_pattern),
+            .output_detected_tdata(post_preamble_tdata),
+            .output_detected_tvalid(post_preamble_tvalid)
+        );
+    endgenerate
+
+endmodule
+
+`resetall
diff --git a/rtl/datapath/analog_interfaces/loss_compensator.v b/rtl/datapath/analog_interfaces/loss_compensator.v
@@ -0,0 +1,75 @@
+/*
+
+Project: [Lightning] A Reconfigurable Photonic-Electronic SmartNIC for Fast and Energy-Efficient Inference 
+File: loss_compensator.v
+File Explanation: this module describes the optical loss compensator logic after receiving the data from ADC
+File Start Time: December 2022
+Authors: Zhizhen Zhong ([email protected])
+Language: Verilog 2001
+
+*/
+
+`resetall
+`timescale 1ns / 1ps
+`default_nettype none
+
+
+module loss_compensator # (
+    parameter DATA_WIDTH = 256,
+    parameter WORD_WIDTH = 16
+)(
+    input wire clk,
+    input wire rst,
+
+    input wire [DATA_WIDTH-1:0] pre_mul_tdata,
+    input wire pre_mul_tvalid,
+    output reg pre_mul_tready,
+
+    input wire [WORD_WIDTH-1:0] multiply,
+
+    output reg [DATA_WIDTH-1:0] post_mul_tdata,
+    output reg post_mul_tvalid,
+    input wire post_mul_tready // ignored, to match RFSOC ADC behavior
+);
+
+    integer i;
+
+    // note that this causes a combinational path from post_mul_tready to pre_mul_1_tready and pre_mul_2_tready
+    reg [DATA_WIDTH-1:0] tdata;
+    reg tvalid;
+
+    wire [DATA_WIDTH-1:0] shifted_tdata;
+    wire shifted_tvalid;
+
+   always @ (posedge clk)
+        if (rst) begin
+            post_mul_tdata <= 0;
+            post_mul_tvalid <= 1'b0;
+            pre_mul_tready <= 1'b1; // always ready
+        end else begin
+            post_mul_tdata <= tdata;
+            post_mul_tvalid <= tvalid;
+            pre_mul_tready <= 1'b1; // always ready
+        end
+
+    always @ (posedge clk)
+        if (rst) begin
+            tdata <= 0;
+            tvalid <= 0;
+        end else begin
+            if (pre_mul_tvalid) begin
+                for (i=0; i<DATA_WIDTH/WORD_WIDTH; i=i+1) begin
+                    tdata[i*WORD_WIDTH +: WORD_WIDTH] <= pre_mul_tdata[i*WORD_WIDTH+7 +: 8] * multiply;
+                end
+                tvalid <= 1'b1;
+            end else begin
+                tdata <= {DATA_WIDTH{1'b0}};
+                tvalid <= 1'b0;
+            end
+
+        end
+
+endmodule
+
+
+`resetall