Merge branch 'feature/convolutioninputgenerator_nonsquare' of...

Merge branch 'feature/convolutioninputgenerator_nonsquare' of https://github.com/mmrahorovic/finn into feature/1d_convs_and_finn_base_update

Merge branch 'feature/convolutioninputgenerator_nonsquare' of...
Merge branch 'feature/convolutioninputgenerator_nonsquare' of https://github.com/mmrahorovic/finn into feature/1d_convs_and_finn_base_update
bda8ae7b · Yaman Umuroglu · 0def4eb0 · a41a6454 · bda8ae7b · bda8ae7b
Commit bda8ae7b authored 3 years ago by Yaman Umuroglu
--- a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py
+++ b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py
@@ -61,12 +61,13 @@ class ConvolutionInputGenerator(HLSCustomOp):

    def get_nodeattr_types(self):
        my_attrs = {
-            "ConvKernelDim": ("i", True, 0),
+            "ConvKernelDim": ("ints", True, []),  # [H, W] = [Y, X]
            "IFMChannels": ("i", True, 0),
-            "IFMDim": ("i", True, 0),
-            "OFMDim": ("i", True, 0),
+            "IFMDim": ("ints", True, []),  # [H, W] = [Y, X]
+            "OFMDim": ("ints", True, []),  # [H, W] = [Y, X]
            "SIMD": ("i", True, 0),
-            "Stride": ("i", True, 0),
+            "Stride": ("ints", True, [1, 1]),  # [H, W] = [Y, X]
+            "Dilation": ("ints", True, [1, 1]),  # [H, W] = [Y, X]
            # FINN DataTypes for inputs, weights, outputs
            "inputDataType": ("s", True, ""),
            "outputDataType": ("s", True, ""),
@@ -87,43 +88,45 @@ class ConvolutionInputGenerator(HLSCustomOp):
        return my_attrs

    def get_normal_input_shape(self):
-
-        ifm_dim = self.get_nodeattr("IFMDim")
+        ifm_dim_h, ifm_dim_w = self.get_nodeattr("IFMDim")
        ifm_ch = self.get_nodeattr("IFMChannels")
-
-        ishape = (1, ifm_dim, ifm_dim, ifm_ch)
+        ishape = (1, ifm_dim_h, ifm_dim_w, ifm_ch)
        return ishape

    def get_folded_input_shape(self):
-        ifm_dim = self.get_nodeattr("IFMDim")
+        ifm_dim_h, ifm_dim_w = self.get_nodeattr("IFMDim")
        ifm_ch = self.get_nodeattr("IFMChannels")
        simd = self.get_nodeattr("SIMD")
        assert ifm_ch % simd == 0, "SIMD must divide IFMChannels"
        wf = int(ifm_ch / simd)
-        folded_ishape = (1, ifm_dim, ifm_dim, wf, simd)
+        folded_ishape = (1, ifm_dim_h, ifm_dim_w, wf, simd)
        return folded_ishape

    def get_normal_output_shape(self):
-        k = self.get_nodeattr("ConvKernelDim")
-        ifm_dim = self.get_nodeattr("IFMDim")
+        k_h, k_w = self.get_nodeattr("ConvKernelDim")
+        ifm_dim_h, ifm_dim_w = self.get_nodeattr("IFMDim")
        ifm_ch = self.get_nodeattr("IFMChannels")
-        stride = self.get_nodeattr("Stride")
+        stride_h, stride_w = self.get_nodeattr("Stride")
+        dilation_h, dilation_w = self.get_nodeattr("Dilation")
        pad = 0
-        ofm_dim = compute_conv_output_dim(ifm_dim, k, stride, pad)
-        oshape = (1, ofm_dim, ofm_dim, k * k * ifm_ch)
+        ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, pad, dilation_h)
+        ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, pad, dilation_w)
+        oshape = (1, ofm_dim_h, ofm_dim_w, k_h * k_w * ifm_ch)
        return oshape

    def get_folded_output_shape(self):
-        k = self.get_nodeattr("ConvKernelDim")
-        ifm_dim = self.get_nodeattr("IFMDim")
+        k_h, k_w = self.get_nodeattr("ConvKernelDim")
+        ifm_dim_h, ifm_dim_w = self.get_nodeattr("IFMDim")
        ifm_ch = self.get_nodeattr("IFMChannels")
-        stride = self.get_nodeattr("Stride")
+        stride_h, stride_w = self.get_nodeattr("Stride")
+        dilation_h, dilation_w = self.get_nodeattr("Dilation")
        simd = self.get_nodeattr("SIMD")
        pad = 0
-        ofm_dim = compute_conv_output_dim(ifm_dim, k, stride, pad)
+        ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, pad, dilation_h)
+        ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, pad, dilation_w)
        assert ifm_ch % simd == 0, "SIMD must divide IFMChannels"
-        wf = int((k * k * ifm_ch) // simd)
-        folded_oshape = (1, ofm_dim, ofm_dim, wf, simd)
+        wf = int((k_h * k_w * ifm_ch) // simd)
+        folded_oshape = (1, ofm_dim_h, ofm_dim_w, wf, simd)
        return folded_oshape

    def make_shape_compatible_op(self, model):
@@ -186,26 +189,31 @@ class ConvolutionInputGenerator(HLSCustomOp):
    def get_exp_cycles(self):
        simd = self.get_nodeattr("SIMD")
        ifm_ch = self.get_nodeattr("IFMChannels")
-        k = self.get_nodeattr("ConvKernelDim")
-        ifm_dim = self.get_nodeattr("IFMDim")
-        ofm_dim = self.get_nodeattr("OFMDim")
-        stride = self.get_nodeattr("Stride")
+        k_h, k_w = self.get_nodeattr("ConvKernelDim")
+        ifm_dim_h, ifm_dim_w = self.get_nodeattr("IFMDim")
+        ofm_dim_h, ofm_dim_w = self.get_nodeattr("OFMDim")
+        stride_h, stride_w = self.get_nodeattr("Stride")
+        dilation_h, dilation_w = self.get_nodeattr("Dilation")
+
        # since mmv != 1 is not supported yet, we set mmv for now to 1
        mmv = 1
        # see https://github.com/Xilinx/finn-hlslib/blob/master/slidingwindow.h
-        cycles_write_block = (ofm_dim * k * k * (ifm_ch / simd)) / mmv
-        cycles_read_block = stride * ifm_dim * (ifm_ch / simd)
+        cycles_write_block = (ofm_dim_w * k_w * k_h * (ifm_ch / simd)) / mmv
+        cycles_read_block = stride_w * ifm_dim_w * (ifm_ch / simd)
        max_cycles = max(cycles_write_block, cycles_read_block)
-        exp_cycles = ifm_dim * k * (ifm_ch / simd) + ofm_dim * max_cycles
+        exp_cycles = (
+            ifm_dim_w * k_h * dilation_h * (ifm_ch / simd) + ofm_dim_h * max_cycles
+        )

        return int(exp_cycles)

    def bram_estimation(self):
+        # NOTE: only tested with a square convolution
        simd = self.get_nodeattr("SIMD")
        ifm_ch = self.get_nodeattr("IFMChannels")
-        ifm_dim = self.get_nodeattr("IFMDim")
-        k = self.get_nodeattr("ConvKernelDim")
-        stride = self.get_nodeattr("Stride")
+        ifm_dim = self.get_nodeattr("IFMDim")[0]
+        k = self.get_nodeattr("ConvKernelDim")[0]
+        stride = self.get_nodeattr("Stride")[0]
        ram_style = self.get_nodeattr("ram_style")
        if ram_style == "block" or ram_style == "auto":
            ram_depth = ifm_dim * ifm_ch / simd
@@ -232,11 +240,12 @@ class ConvolutionInputGenerator(HLSCustomOp):
            return 0

    def lut_estimation(self):
+        # NOTE: only tested with a square convolution
        simd = self.get_nodeattr("SIMD")
        ifm_ch = self.get_nodeattr("IFMChannels")
-        ifm_dim = self.get_nodeattr("IFMDim")
-        k = self.get_nodeattr("ConvKernelDim")
-        stride = self.get_nodeattr("Stride")
+        ifm_dim = self.get_nodeattr("IFMDim")[0]
+        k = self.get_nodeattr("ConvKernelDim")[0]
+        stride = self.get_nodeattr("Stride")[0]
        ram_style = self.get_nodeattr("ram_style")
        if ram_style == "distributed":
            ram_luts = int(
@@ -252,11 +261,12 @@ class ConvolutionInputGenerator(HLSCustomOp):
        return 300 + ram_luts

    def uram_estimation(self):
+        # NOTE: only tested with a square convolution
        simd = self.get_nodeattr("SIMD")
        ifm_ch = self.get_nodeattr("IFMChannels")
-        ifm_dim = self.get_nodeattr("IFMDim")
-        k = self.get_nodeattr("ConvKernelDim")
-        stride = self.get_nodeattr("Stride")
+        ifm_dim = self.get_nodeattr("IFMDim")[0]
+        k = self.get_nodeattr("ConvKernelDim")[0]
+        stride = self.get_nodeattr("Stride")[0]
        ram_style = self.get_nodeattr("ram_style")
        if ram_style == "ultra":
            return int(
@@ -295,7 +305,7 @@ class ConvolutionInputGenerator(HLSCustomOp):
        assert (
            inp.shape == exp_ishape
        ), """Input shape doesn't
-        match expected shape (1, ifm_dim, ifm_dim, ifm_ch)."""
+        match expected shape (1, ifm_dim_h, ifm_dim_w, ifm_ch)."""
        if self.get_input_datatype() == DataType.BIPOLAR:
            # store bipolar activations as binary
            inp = (inp + 1) / 2
@@ -354,25 +364,33 @@ class ConvolutionInputGenerator(HLSCustomOp):
        assert (
            context[node.output[0]].shape == exp_oshape
        ), """Output
-        shape doesn't match expected shape (1, ofm_dim, ofm_dim, k*k*ifm_ch)."""
+        shape doesn't match expected shape (1, ofm_dim_h, ofm_dim_w, k_h*k_w*ifm_ch)."""

    def global_includes(self):
        self.code_gen_dict["$GLOBALS$"] = ['#include "slidingwindow.h"']

    def defines(self, var):
        numReps = 1
+        ifm_dim = self.get_nodeattr("IFMDim")[0]
+        ifm_ch = self.get_nodeattr("IFMChannels")
+        ofm_dim = self.get_nodeattr("OFMDim")[0]
+        k = self.get_nodeattr("ConvKernelDim")[0]
+        stride = self.get_nodeattr("Stride")[0]
+        simd = self.get_nodeattr("SIMD")
+        ifm_precision = self.get_input_datatype().bitwidth()
+
        self.code_gen_dict["$DEFINES$"] = [
            """#define ConvKernelDim1 {}\n #define IFMChannels1 {}\n
            #define Input_precision1 {}\n #define IFMDim1 {}\n
            #define OFMDim1 {}\n #define SIMD1 {}\n
            #define Stride1 {}\n #define numReps {}""".format(
-                self.get_nodeattr("ConvKernelDim"),
-                self.get_nodeattr("IFMChannels"),
-                self.get_input_datatype().bitwidth(),
-                self.get_nodeattr("IFMDim"),
-                self.get_nodeattr("OFMDim"),
-                self.get_nodeattr("SIMD"),
-                self.get_nodeattr("Stride"),
+                k,
+                ifm_ch,
+                ifm_precision,
+                ifm_dim,
+                ofm_dim,
+                simd,
+                stride,
                numReps,
            )
        ]
@@ -415,9 +433,11 @@ class ConvolutionInputGenerator(HLSCustomOp):
        }
        hls_ram_style = map_to_hls_ram_style[ram_style]
        hls_call = node.op_type
-        # check if non optimized ConvolutionInputGenerator is needed
-        k = self.get_nodeattr("ConvKernelDim")
-        stride = self.get_nodeattr("Stride")
+
+        # check which ConvolutionInputGenerator is needed
+        k = self.get_nodeattr("ConvKernelDim")[0]
+        stride = self.get_nodeattr("Stride")[0]
+
        if k % stride != 0:
            hls_call += "_kernel_stride"


--- a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py
+++ b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py
@@ -47,7 +47,9 @@ from finn.custom_op.registry import getCustomOp
 from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer


-def make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, simd, stride, idt):
+def make_single_im2col_modelwrapper(
+    k, ifm_ch, ifm_dim, ofm_dim, simd, stride, dilation, idt
+):
    odt = idt
    inp = helper.make_tensor_value_info(
        "inp", TensorProto.FLOAT, [1, ifm_dim, ifm_dim, ifm_ch]
@@ -61,12 +63,12 @@ def make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, simd, stride, i
        ["inp"],
        ["outp"],
        domain="finn.custom_op.general",
-        backend="fpgadataflow",
-        stride=stride,
+        stride=[stride, stride],
        kernel_size=[k, k],
        input_shape=str((1, ifm_dim, ifm_dim, ifm_ch)),
        pad_amount=[0, 0, 0, 0],
        pad_value=0,
+        dilations=[dilation, dilation],
    )
    graph = helper.make_graph(
        nodes=[im2col_node], name="im2col_graph", inputs=[inp], outputs=[outp]
@@ -82,7 +84,7 @@ def make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, simd, stride, i


 def make_single_slidingwindow_modelwrapper(
-    k, ifm_ch, ifm_dim, ofm_dim, simd, stride, idt, dw=0
+    k, ifm_ch, ifm_dim, ofm_dim, simd, stride, dilation, idt, dw=0
 ):
    odt = idt
    inp = helper.make_tensor_value_info(
@@ -98,12 +100,13 @@ def make_single_slidingwindow_modelwrapper(
        ["outp"],
        domain="finn.custom_op.fpgadataflow",
        backend="fpgadataflow",
-        ConvKernelDim=k,
+        ConvKernelDim=[k, k],
        IFMChannels=ifm_ch,
-        IFMDim=ifm_dim,
-        OFMDim=ofm_dim,
+        IFMDim=[ifm_dim, ifm_dim],
+        OFMDim=[ofm_dim, ofm_dim],
        SIMD=simd,
-        Stride=stride,
+        Stride=[stride, stride],
+        Dilation=[dilation, dilation],
        inputDataType=idt.name,
        outputDataType=odt.name,
        depthwise=dw,
@@ -138,6 +141,9 @@ def prepare_inputs(input_tensor):
 @pytest.mark.parametrize("ifm_ch", [2, 4])
 # Stride
 @pytest.mark.parametrize("stride", [1, 2])
+# Dilation
+# Currently only dilation value of 1 is supported
+@pytest.mark.parametrize("dilation", [1])
 # execution mode
 @pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"])
 # input channel parallelism ("SIMD")
@@ -147,13 +153,13 @@ def prepare_inputs(input_tensor):
 @pytest.mark.slow
 @pytest.mark.vivado
 def test_fpgadataflow_slidingwindow(
-    idt, k, ifm_dim, ifm_ch, stride, exec_mode, simd, dw
+    idt, k, ifm_dim, ifm_ch, stride, dilation, exec_mode, simd, dw
 ):
    ofm_dim = int(((ifm_dim - k) / stride) + 1)

    x = gen_finn_dt_tensor(idt, (1, ifm_dim, ifm_dim, ifm_ch))
    model = make_single_slidingwindow_modelwrapper(
-        k, ifm_ch, ifm_dim, ofm_dim, simd, stride, idt, dw
+        k, ifm_ch, ifm_dim, ofm_dim, simd, stride, dilation, idt, dw
    )

    if exec_mode == "cppsim":
@@ -174,9 +180,10 @@ def test_fpgadataflow_slidingwindow(
    # execute model
    y_produced = oxe.execute_onnx(model, input_dict)["outp"]
    golden = make_single_im2col_modelwrapper(
-        k, ifm_ch, ifm_dim, ofm_dim, simd, stride, idt
+        k, ifm_ch, ifm_dim, ofm_dim, simd, stride, dilation, idt
    )
    y_expected = oxe.execute_onnx(golden, input_dict)["outp"]
+
    if dw == 0:
        assert (y_produced == y_expected).all()
    else: