From b5520dc15175bd91517b2dd3209cee9893fe63a0 Mon Sep 17 00:00:00 2001
From: Felix Jentzsch <fepaje@mail.upb.de>
Date: Tue, 25 May 2021 16:44:00 +0200
Subject: [PATCH] Further cleanup

---
 docker/finn_entrypoint.sh                     |   2 +-
 .../fpgadataflow/streamingmaxpool_batch.py    | 117 +++++++-----------
 .../fpgadataflow/convert_to_hls_layers.py     |  39 ++----
 .../test_layer_streaming_maxpool_batch.py     |  50 ++++----
 4 files changed, 81 insertions(+), 127 deletions(-)

diff --git a/docker/finn_entrypoint.sh b/docker/finn_entrypoint.sh
index 3933d8918..46a294cd7 100644
--- a/docker/finn_entrypoint.sh
+++ b/docker/finn_entrypoint.sh
@@ -16,7 +16,7 @@ FINN_BASE_COMMIT=ac0b86a63eb937b869bfa453a996a8a8b8506546
 FINN_EXP_COMMIT=e9f97dcdb4db2f889b0f36af079a6a1792b7d4de
 BREVITAS_COMMIT=14abbe1e7ef82485d79415871fcf5766b0a40a00
 CNPY_COMMIT=4e8810b1a8637695171ed346ce68f6984e585ef4
-HLSLIB_COMMIT=4d74baefa79df48b5a0348d63f39a26df075de51
+HLSLIB_COMMIT=b37337c571b98f40423020bc79f97e189f2661d5
 PYVERILATOR_COMMIT=e2ff74030de3992dcac54bf1b6aad2915946e8cb
 OMX_COMMIT=1bae737669901e762f581af73348332b5c4b2ada
 
diff --git a/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py b/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py
index a15c91af1..dba279878 100644
--- a/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py
+++ b/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py
@@ -58,6 +58,23 @@ class StreamingMaxPool_Batch(HLSCustomOp):
         """Returns FINN DataType of output."""
         return DataType[self.get_nodeattr("dataType")]
 
+    def get_1d_attrs_normalized(self):
+        # support both (1, D) and (D, 1) cases transparently:
+        # assume the dummy ('1') dimension is the Y-dimension, i.e.
+        # images and kernels (and their attributes) of dimension
+        # [H, W] = [Y, X] = [D, 1] or [1, D] are always mapped to [1, D]
+        ifm_dim = self.get_nodeattr("ImgDim")
+        k = self.get_nodeattr("PoolDim")
+        ifm_ch = self.get_nodeattr("NumChannels")
+        if ifm_dim[1] == 1:
+            ifm_dim = ifm_dim[::-1]
+            k = k[::-1]
+        return (ifm_dim, k, ifm_ch)
+
+    def is_1d(self):
+        ifm_dim, k, ifm_ch = self.get_1d_attrs_normalized()
+        return (ifm_dim[0] == 1) and (k[0] == 1)
+
     def get_normal_input_shape(self):
         ifm_dim_h, ifm_dim_w = self.get_nodeattr("ImgDim")
         ifm_ch = self.get_nodeattr("NumChannels")
@@ -73,8 +90,8 @@ class StreamingMaxPool_Batch(HLSCustomOp):
         return tuple(ret)
 
     def get_normal_output_shape(self):
-        k_h, k_w = self.get_nodeattr("PoolDim")
         ifm_dim_h, ifm_dim_w = self.get_nodeattr("ImgDim")
+        k_h, k_w = tuple(self.get_nodeattr("PoolDim"))
         ifm_ch = self.get_nodeattr("NumChannels")
         stride_h = k_h
         stride_w = k_w
@@ -100,13 +117,12 @@ class StreamingMaxPool_Batch(HLSCustomOp):
 
     def get_exp_cycles(self):
         # derived from StreamingMaxPool_Batch loop nest
-        k_h, k_w = self.get_nodeattr("PoolDim")
-        ifm_dim_h, ifm_dim_w = self.get_nodeattr("ImgDim")
-        # TODO: correct formula
-        if k_h == k_w:  # todo: better condition
-            return int(ifm_dim_h * (ifm_dim_h + (ifm_dim_h / k_h)))
+        ifm_dim, k, ifm_ch = self.get_1d_attrs_normalized()
+        if self.is_1d():
+            return int(ifm_dim[1] + k[1])
         else:
-            return int((ifm_dim_h / k_h) * (k_h + 1))
+            # TODO: adjust inaccurate formula
+            return int(ifm_dim[1] * (ifm_dim[1] + (ifm_dim[1] / k[1])))
 
     def get_instream_width(self):
         dt_bits = self.get_input_datatype().bitwidth()
@@ -173,51 +189,17 @@ class StreamingMaxPool_Batch(HLSCustomOp):
 
     def defines(self, var):
         numReps = 2
-        k = self.get_nodeattr("PoolDim")
-        ifm_dim = self.get_nodeattr("ImgDim")
-
-        if k[0] == k[1]:  # todo: better condition
-            self.code_gen_dict["$DEFINES$"] = [
-                """#define ImgDim {}\n #define PoolDim {}\n
-                #define NumChannels {}\n #define numReps {}""".format(
-                    ifm_dim[0],
-                    k[0],
-                    self.get_nodeattr("NumChannels"),
-                    numReps,
-                )
-            ]
-        else:
-            # TODO: use the same convention als convinpgen?:
-
-            # For the kernel, presenting the input data of size D as
-            # [H, W] = [Y, X] = [1, D] or [D, 1]
-            # effectively gives the same result. Because the
-            # ConvolutionInputGenerator_NonSquare_Dilated(_dws) kernel currently only
-            # supports dilation>1 along the X-axis and the
-            # ConvolutionInputGenerator_NonSquare only works for stride>1 along the
-            # X-axis, we are working with the following assumption:
-            # the dummy ('1') dimension is the Y-dimension, i.e.
-            # images and kernels (and their attributes) of dimension
-            # [H, W] = [Y, X] = [D, 1] or [1, D] are always mapped to [1, D]
-            if ifm_dim[1] == 1:
-                ifm_dim = ifm_dim[::-1]
-                k = k[::-1]
-
-            ifm_dim_y, ifm_dim_x = ifm_dim
-            k_y, k_x = k
-
-            self.code_gen_dict["$DEFINES$"] = [
-                """#define ImgDim_x {}\n #define ImgDim_y {}\n
-                #define PoolDim_x {}\n #define PoolDim_y {}\n
-                #define NumChannels {}\n #define numReps {}""".format(
-                    ifm_dim_x,
-                    ifm_dim_y,
-                    k_x,
-                    k_y,
-                    self.get_nodeattr("NumChannels"),
-                    numReps,
-                )
-            ]
+        ifm_dim, k, ifm_ch = self.get_1d_attrs_normalized()
+
+        self.code_gen_dict["$DEFINES$"] = [
+            """#define ImgDim {}\n #define PoolDim {}\n
+            #define NumChannels {}\n #define numReps {}""".format(
+                ifm_dim[1],
+                k[1],
+                self.get_nodeattr("NumChannels"),
+                numReps,
+            )
+        ]
 
     def read_npy_data(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
@@ -248,33 +230,24 @@ class StreamingMaxPool_Batch(HLSCustomOp):
 
     def docompute(self):
         dtype = self.get_input_datatype()
-
-        k = self.get_nodeattr("PoolDim")
-        # ifm_dim = self.get_nodeattr("ImgDim")
-        if k[0] == k[1]:  # todo: better condition
-            if dtype.bitwidth() == 1:
+        if dtype.bitwidth() == 1:
+            if self.is_1d():
+                raise Exception("Binary 1d MaxPool not implemented on HLS backend")
+            else:
                 op = "StreamingMaxPool_Batch"
-                self.code_gen_dict["$DOCOMPUTE$"] = [
-                    "%s<ImgDim, PoolDim, NumChannels>(in0, out, numReps);" % (op)
-                ]
+            self.code_gen_dict["$DOCOMPUTE$"] = [
+                "%s<ImgDim, PoolDim, NumChannels>(in0, out, numReps);" % (op)
+            ]
+        else:
+            if self.is_1d():
+                op = "StreamingMaxPool_Precision_Batch_1d"
             else:
                 op = "StreamingMaxPool_Precision_Batch"
-                dtype = self.get_input_datatype()
-                dtype_hls = dtype.get_hls_datatype_str()
-                minval_str = str(int(dtype.min()))
-                self.code_gen_dict["$DOCOMPUTE$"] = [
-                    "%s<ImgDim, PoolDim, NumChannels, %s, %s>(in0, out, numReps);"
-                    % (op, dtype_hls, minval_str)
-                ]
-        else:
-            # todo: add binary op
-            op = "StreamingMaxPool_Precision_Batch_NonSquare"
             dtype = self.get_input_datatype()
             dtype_hls = dtype.get_hls_datatype_str()
             minval_str = str(int(dtype.min()))
             self.code_gen_dict["$DOCOMPUTE$"] = [
-                """%s<ImgDim_x, ImgDim_y, PoolDim_x, PoolDim_y,
-                NumChannels, %s, %s>(in0, out, numReps);"""
+                "%s<ImgDim, PoolDim, NumChannels, %s, %s>(in0, out, numReps);"
                 % (op, dtype_hls, minval_str)
             ]
 
diff --git a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
index d3989343f..e1c3eced7 100644
--- a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
+++ b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
@@ -235,40 +235,23 @@ class InferStreamingMaxPool(Transformation):
                 # mp_out_shape = model.get_tensor_shape(mp_output)
                 dt = model.get_tensor_datatype(mp_input)
                 mp_inst = getCustomOp(n)
-                # stride = mp_inst.get_nodeattr("strides")[0]
                 k_h, k_w = mp_inst.get_nodeattr("kernel_shape")
-                # pad = mp_inst.get_nodeattr("pads")[0]
                 ifm_ch = mp_in_shape[-1]
                 ifm_dim_h = mp_in_shape[1]
                 ifm_dim_w = mp_in_shape[2]
-                # ofm_dim = mp_out_shape[1]
                 if ifm_dim_h % k_h == 0 and ifm_dim_w % k_w == 0:
                     # create equivalent StreamingMaxPool_Batch node
-                    # TODO support non-k strides
-                    if k_h == k_w:  # todo: better condition or none at all
-                        new_node = helper.make_node(
-                            "StreamingMaxPool_Batch",
-                            [mp_input],
-                            [mp_output],
-                            domain="finn.custom_op.fpgadataflow",
-                            backend="fpgadataflow",
-                            PoolDim=k_h,
-                            NumChannels=ifm_ch,
-                            ImgDim=ifm_dim_h,
-                            dataType=dt.name,
-                        )
-                    else:
-                        new_node = helper.make_node(
-                            "StreamingMaxPool_Batch",
-                            [mp_input],
-                            [mp_output],
-                            domain="finn.custom_op.fpgadataflow",
-                            backend="fpgadataflow",
-                            PoolDim=(k_h, k_w),
-                            NumChannels=ifm_ch,
-                            ImgDim=(ifm_dim_h, ifm_dim_w),
-                            dataType=dt.name,
-                        )
+                    new_node = helper.make_node(
+                        "StreamingMaxPool_Batch",
+                        [mp_input],
+                        [mp_output],
+                        domain="finn.custom_op.fpgadataflow",
+                        backend="fpgadataflow",
+                        PoolDim=(k_h, k_w),
+                        NumChannels=ifm_ch,
+                        ImgDim=(ifm_dim_h, ifm_dim_w),
+                        dataType=dt.name,
+                    )
                     graph.node.insert(node_ind, new_node)
                     # remove old nodes
                     graph.node.remove(n)
diff --git a/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py b/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py
index 790c0cb7e..2220bd29e 100644
--- a/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py
+++ b/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py
@@ -43,6 +43,7 @@ from finn.transformation.general import GiveUniqueNodeNames
 from finn.util.basic import gen_finn_dt_tensor
 from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
 from finn.custom_op.registry import getCustomOp
+import numpy as np
 
 
 def make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt):
@@ -120,37 +121,38 @@ def prepare_inputs(input_tensor):
 
 
 # input datatype
-# @pytest.mark.parametrize("idt", [DataType.BIPOLAR, DataType.INT2])
-@pytest.mark.parametrize("idt", [DataType.INT4])
+@pytest.mark.parametrize("idt", [DataType.BIPOLAR, DataType.INT4])
+# 1d maxpool
+@pytest.mark.parametrize("dim_1d", [False, True])
 # kernel size
-@pytest.mark.parametrize(
-    "k",
-    [
-        (2, 1),
-    ],
-)  # (4,4)])
+@pytest.mark.parametrize("k", [2, 4])
 # input dimension
-@pytest.mark.parametrize(
-    "ifm_dim",
-    [
-        (1024, 1),
-    ],
-)  # (6,6), (8,8)])
+@pytest.mark.parametrize("ifm_dim", [4, 8])
 # input channels
-@pytest.mark.parametrize("ifm_ch", [1, 3])
+@pytest.mark.parametrize("ifm_ch", [1, 3])  # 1,3
 # execution mode
-# @pytest.mark.parametrize("exec_mode", ["rtlsim", "cppsim"])
-@pytest.mark.parametrize("exec_mode", ["rtlsim"])
+@pytest.mark.parametrize("exec_mode", ["rtlsim", "cppsim"])
 @pytest.mark.slow
 @pytest.mark.vivado
-def test_fpgadataflow_streamingmaxpool(idt, k, ifm_dim, ifm_ch, exec_mode):
-    k_h, k_w = k
-    ifm_dim_h, ifm_dim_w = ifm_dim
+def test_fpgadataflow_streamingmaxpool(idt, dim_1d, k, ifm_dim, ifm_ch, exec_mode):
+    ifm_dim_h = ifm_dim
+    k_h = k
+    if dim_1d:
+        ifm_dim_w = 1
+        k_w = 1
+    else:
+        ifm_dim_w = ifm_dim_h
+        k_w = k_h
+    ifm_dim = (ifm_dim_h, ifm_dim_w)
+    k = (k_h, k_w)
+
     stride_h = k_h
     stride_w = k_w
     ofm_dim_h = int(((ifm_dim_h - k_h) / stride_h) + 1)
     ofm_dim_w = int(((ifm_dim_w - k_w) / stride_w) + 1)
     ofm_dim = (ofm_dim_h, ofm_dim_w)
+    if idt == DataType.BIPOLAR and dim_1d:
+        pytest.skip("Skipping binary StreamingMaxPool_1d (not implemented)")
     if ifm_dim_h % k_h != 0 or ifm_dim_w % k_w != 0:
         pytest.skip("Skipping StreamingMaxPool test w/ ImgDim % PoolDim != 0")
 
@@ -186,9 +188,5 @@ def test_fpgadataflow_streamingmaxpool(idt, k, ifm_dim, ifm_ch, exec_mode):
         cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
         exp_cycles_dict = model.analysis(exp_cycles_per_layer)
         exp_cycles = exp_cycles_dict[node.name]
-        # DEBUG:
-        print("expected vs rtlsim cycles")
-        print(exp_cycles)
-        print(cycles_rtlsim)
-        # assert np.isclose(exp_cycles, cycles_rtlsim, atol=15)
-        # assert exp_cycles != 0
+        assert np.isclose(exp_cycles, cycles_rtlsim, atol=15)
+        assert exp_cycles != 0
-- 
GitLab