From b066882f6a912c6eb9cd19f4c6487a7d254dd7db Mon Sep 17 00:00:00 2001
From: auphelia <jakobapk@web.de>
Date: Fri, 19 Jul 2024 14:39:47 +0100
Subject: [PATCH 1/2] [RTL Thresh] Enable workaround for unsigned narrow
 quantization

---
 .../fpgadataflow/rtl/thresholding_rtl.py      | 31 +++++++++++++++----
 .../test_fpgadataflow_thresholding.py         |  8 ++---
 .../test_fpgadataflow_thresholding_runtime.py | 26 ++++++++++------
 3 files changed, 45 insertions(+), 20 deletions(-)

diff --git a/src/finn/custom_op/fpgadataflow/rtl/thresholding_rtl.py b/src/finn/custom_op/fpgadataflow/rtl/thresholding_rtl.py
index c31f90af0b..230d2879f5 100644
--- a/src/finn/custom_op/fpgadataflow/rtl/thresholding_rtl.py
+++ b/src/finn/custom_op/fpgadataflow/rtl/thresholding_rtl.py
@@ -186,9 +186,19 @@ def prepare_codegen_rtl_values(self, model):
         n_thres_steps = self.get_nodeattr("numSteps")
         wdt = self.get_weight_datatype()
         if expected_thresholds != n_thres_steps:
-            min_val = wdt.min()
-            thresholds = np.insert(thresholds, 0, min_val, axis=1)
-            bias = bias - 1
+            if DataType[output_data_type].signed():
+                min_val = wdt.min()
+                thresholds = np.insert(thresholds, 0, min_val, axis=1)
+                bias = bias - 1
+            # TODO: temporary fix for unsigned narrow quantization
+            else:
+                max_val = wdt.max()
+                if max_val > DataType[input_data_type].max():
+                    thresholds = np.insert(thresholds, len(thresholds[0]), max_val, axis=1)
+                else:
+                    max_val = max_val + 1
+                    wdt = DataType.get_smallest_possible(max_val)
+                    thresholds = np.insert(thresholds, len(thresholds[0]), max_val, axis=1)
             n_thres_steps += 1
 
         # add dummy dimension as final dimension (that's what gets packed with next call)
@@ -528,8 +538,18 @@ def make_weight_file(self, weights, weight_file_mode, weight_file_name):
         n_thres_steps = self.get_nodeattr("numSteps")
         wdt = self.get_weight_datatype()
         if expected_thresholds != n_thres_steps:
-            min_val = wdt.min()
-            thresholds = np.insert(thresholds, 0, min_val, axis=1)
+            if DataType[output_data_type].signed():
+                min_val = wdt.min()
+                thresholds = np.insert(thresholds, 0, min_val, axis=1)
+            # TODO: temporary fix for unsigned narrow quantization
+            else:
+                max_val = wdt.max()
+                if max_val > self.get_input_datatype().max():
+                    thresholds = np.insert(thresholds, len(thresholds[0]), max_val, axis=1)
+                else:
+                    max_val = max_val + 1
+                    wdt = DataType.get_smallest_possible(max_val)
+                    thresholds = np.insert(thresholds, len(thresholds[0]), max_val, axis=1)
             n_thres_steps += 1
 
         # If a single threshold value is found, broadcast the value
@@ -541,7 +561,6 @@ def make_weight_file(self, weights, weight_file_mode, weight_file_name):
         thresh_padded = np.zeros((thresholds.shape[0], width_padded))
         thresh_padded[: thresholds.shape[0], :n_thres_steps] = thresholds
         thresh_stream = []
-        wdt = self.get_weight_datatype()
         bw_hexdigit = roundup_to_integer_multiple(wdt.bitwidth(), 32)
         padding = np.zeros(width_padded, dtype=np.int32)
 
diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding.py b/tests/fpgadataflow/test_fpgadataflow_thresholding.py
index e4dd49fc7f..fe7ba3d9fb 100644
--- a/tests/fpgadataflow/test_fpgadataflow_thresholding.py
+++ b/tests/fpgadataflow/test_fpgadataflow_thresholding.py
@@ -129,14 +129,14 @@ def make_single_multithresholding_modelwrapper(
         [1, 2, 2],
     ],
 )
-@pytest.mark.parametrize("activation", [DataType["INT4"], DataType["BIPOLAR"]])
+@pytest.mark.parametrize("activation", [DataType["UINT4"], DataType["INT4"], DataType["BIPOLAR"]])
 @pytest.mark.parametrize(
     "idt_tdt_cfg",
     [
         (DataType["INT8"], DataType["INT8"]),
         (DataType["INT8"], DataType["INT9"]),
-        (DataType["UINT8"], DataType["UINT8"]),
-        (DataType["UINT8"], DataType["UINT9"]),
+        (DataType["UINT5"], DataType["UINT5"]),
+        (DataType["UINT5"], DataType["UINT6"]),
     ],
 )
 @pytest.mark.parametrize("fold", [-1, 1, 2])
@@ -184,7 +184,7 @@ def test_fpgadataflow_thresholding(
         activation_bias = 0
     else:
         activation_bias = activation.min()
-        if narrow:
+        if narrow and activation.signed():
             activation_bias += 1
 
     # Generate random thresholds and sort in ascending order
diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding_runtime.py b/tests/fpgadataflow/test_fpgadataflow_thresholding_runtime.py
index 1ad695bb94..e6175ac58b 100644
--- a/tests/fpgadataflow/test_fpgadataflow_thresholding_runtime.py
+++ b/tests/fpgadataflow/test_fpgadataflow_thresholding_runtime.py
@@ -122,13 +122,16 @@ def make_single_thresholding_modelwrapper(impl_style, T, idt, odt, actval, n_inp
 
 
 @pytest.mark.parametrize("impl_style", ["rtl", "hls"])
+@pytest.mark.parametrize(
+    "idt_act_cfg", [(DataType["INT16"], DataType["INT4"]), (DataType["UINT8"], DataType["UINT4"])]
+)
 # configuration (ch, pe)
-@pytest.mark.parametrize("cfg", [(1, 1), (6, 2), (6, 3)])
+@pytest.mark.parametrize("cfg", [(1, 1), (6, 2), (6, 6)])
 @pytest.mark.parametrize("narrow", [True, False])
 @pytest.mark.parametrize("per_tensor", [True, False])
 @pytest.mark.fpgadataflow
 @pytest.mark.vivado
-def test_runtime_thresholds_read(impl_style, cfg, narrow, per_tensor):
+def test_runtime_thresholds_read(impl_style, idt_act_cfg, cfg, narrow, per_tensor):
     """Read back threshold weights during runtime
 
     1. Create random initial weights T
@@ -140,8 +143,8 @@ def test_runtime_thresholds_read(impl_style, cfg, narrow, per_tensor):
     pe = cfg[1]
     n_inp_vecs = [1, 2, 2]
     hls_mem_mode = "internal_decoupled"
-    act = DataType["INT4"]
-    idt = DataType["INT16"]
+    act = idt_act_cfg[1]
+    idt = idt_act_cfg[0]
     odt = act
     n_steps = act.get_num_possible_values() - 1
     # Generate random thresholds and sort in ascending order
@@ -151,7 +154,7 @@ def test_runtime_thresholds_read(impl_style, cfg, narrow, per_tensor):
     T = sort_thresholds_increasing(T)
 
     actval = act.min()
-    if narrow:
+    if narrow and act.signed():
         actval += 1
 
     model = make_single_thresholding_modelwrapper(impl_style, T, idt, odt, actval, n_inp_vecs, ch)
@@ -219,13 +222,16 @@ def read_weights(sim):
 
 
 @pytest.mark.parametrize("impl_style", ["rtl", "hls"])
+@pytest.mark.parametrize(
+    "idt_act_cfg", [(DataType["INT16"], DataType["INT4"]), (DataType["UINT8"], DataType["UINT4"])]
+)
 # configuration (ch, pe)
-@pytest.mark.parametrize("cfg", [(1, 1), (6, 2), (6, 3)])
+@pytest.mark.parametrize("cfg", [(1, 1), (6, 2), (6, 6)])
 @pytest.mark.parametrize("narrow", [True, False])
 @pytest.mark.parametrize("per_tensor", [True, False])
 @pytest.mark.fpgadataflow
 @pytest.mark.vivado
-def test_runtime_thresholds_write(impl_style, cfg, narrow, per_tensor):
+def test_runtime_thresholds_write(impl_style, idt_act_cfg, cfg, narrow, per_tensor):
     """Write threshold weights during runtime
 
     1. Create random initial weights T_init
@@ -241,8 +247,8 @@ def test_runtime_thresholds_write(impl_style, cfg, narrow, per_tensor):
 
     n_inp_vecs = [1, 2, 2]
     hls_mem_mode = "internal_decoupled"
-    act = DataType["INT4"]
-    idt = DataType["INT16"]
+    act = idt_act_cfg[1]
+    idt = idt_act_cfg[0]
 
     odt = act
     n_steps = act.get_num_possible_values() - 1
@@ -253,7 +259,7 @@ def test_runtime_thresholds_write(impl_style, cfg, narrow, per_tensor):
     T_init = sort_thresholds_increasing(T_init)
 
     actval = act.min()
-    if narrow:
+    if narrow and act.signed():
         actval += 1
 
     model = make_single_thresholding_modelwrapper(

From 9d95b1b3c34bfabcf4160e4a39f7cc9bc26a363e Mon Sep 17 00:00:00 2001
From: auphelia <jakobapk@web.de>
Date: Mon, 22 Jul 2024 11:30:31 +0100
Subject: [PATCH 2/2] [RTL thresh] Fix datatype extension for unsigned narrow
 quantization

---
 .../custom_op/fpgadataflow/rtl/thresholding_rtl.py   | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/finn/custom_op/fpgadataflow/rtl/thresholding_rtl.py b/src/finn/custom_op/fpgadataflow/rtl/thresholding_rtl.py
index 230d2879f5..d1e9387b1b 100644
--- a/src/finn/custom_op/fpgadataflow/rtl/thresholding_rtl.py
+++ b/src/finn/custom_op/fpgadataflow/rtl/thresholding_rtl.py
@@ -197,7 +197,11 @@ def prepare_codegen_rtl_values(self, model):
                     thresholds = np.insert(thresholds, len(thresholds[0]), max_val, axis=1)
                 else:
                     max_val = max_val + 1
-                    wdt = DataType.get_smallest_possible(max_val)
+                    # increase wdt
+                    if not wdt.signed():
+                        wdt = DataType.get_smallest_possible(max_val)
+                    else:
+                        wdt = DataType.get_smallest_possible(-max_val - 1)
                     thresholds = np.insert(thresholds, len(thresholds[0]), max_val, axis=1)
             n_thres_steps += 1
 
@@ -548,7 +552,11 @@ def make_weight_file(self, weights, weight_file_mode, weight_file_name):
                     thresholds = np.insert(thresholds, len(thresholds[0]), max_val, axis=1)
                 else:
                     max_val = max_val + 1
-                    wdt = DataType.get_smallest_possible(max_val)
+                    # increase wdt
+                    if not wdt.signed():
+                        wdt = DataType.get_smallest_possible(max_val)
+                    else:
+                        wdt = DataType.get_smallest_possible(-max_val - 1)
                     thresholds = np.insert(thresholds, len(thresholds[0]), max_val, axis=1)
             n_thres_steps += 1