From 48d574065c4dcd2ec635d8ea517d61ad4db9765c Mon Sep 17 00:00:00 2001
From: mmrahorovic <mmrahorovic@hotmail.com>
Date: Fri, 11 Mar 2022 12:13:24 +0000
Subject: [PATCH] [custom_op]: resource estimates 1D SWU

---
 .../convolutioninputgenerator1d.py            | 167 ++++++++++--------
 1 file changed, 93 insertions(+), 74 deletions(-)

diff --git a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py
index 7e084fb9f..5cb9bce0c 100644
--- a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py
+++ b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py
@@ -269,17 +269,12 @@ class ConvolutionInputGenerator1D(HLSCustomOp):
         simd = self.get_nodeattr("SIMD")
         (
             ifm_ch,
-            ifm_dim,
-            ofm_dim,
-            k,
-            stride,
-            dilation,
+            [ifm_dim_h, ifm_dim_w],
+            [ofm_dim_h, ofm_dim_w],
+            [k_h, k_w],
+            [stride_h, stride_w],
+            [dilation_h, dilation_w],
         ) = self.get_1d_conv_attrs_normalized()
-        ifm_dim_h, ifm_dim_w = ifm_dim
-        ofm_dim_h, ofm_dim_w = ofm_dim
-        k_h, k_w = k
-        stride_h, stride_w = stride
-        dilation_h, dilation_w = dilation
 
         # since mmv != 1 is not supported yet, we set mmv for now to 1
         # mmv = 1
@@ -307,17 +302,29 @@ class ConvolutionInputGenerator1D(HLSCustomOp):
         return int(exp_cycles)
 
     def bram_estimation(self):
-        # NOTE: not tested for correctness
         simd = self.get_nodeattr("SIMD")
-        ifm_ch = self.get_nodeattr("IFMChannels")
-        ifm_dim = np.prod(self.get_nodeattr("IFMDim"))
-        k = np.prod(self.get_nodeattr("ConvKernelDim"))
-        stride = np.prod(self.get_nodeattr("Stride"))
+        (
+            ifm_ch,
+            [ifm_dim_h, ifm_dim_w],
+            [ofm_dim_h, ofm_dim_w],
+            [k_h, k_w],
+            [stride_h, stride_w],
+            [dilation_h, dilation_w],
+        ) = self.get_1d_conv_attrs_normalized()
         ram_style = self.get_nodeattr("ram_style")
-        if self.use_parallel_window_output():
+        swu_variant = self.get_swu_variant()
+        if swu_variant == "ConvolutionInputGenerator_1D_parallel":
             return 0
         if ram_style == "block" or ram_style == "auto":
-            ram_depth = ifm_dim * ifm_ch / simd
+            if swu_variant == "ConvolutionInputGenerator_1D":
+                ram_depth = (k_w - 1) * ifm_ch / simd
+            elif swu_variant == "ConvolutionInputGenerator_1D_dws_naive":
+                ram_depth = ifm_dim_w * ifm_ch / simd
+            elif swu_variant in [
+                "ConvolutionInputGenerator_1D_dws",
+                "ConvolutionInputGenerator_1D_dws_stride",
+            ]:
+                ram_depth = k_w * ifm_ch / simd
             if ram_depth <= 512:
                 ram_width = 36
             elif ram_depth <= 1024:
@@ -330,63 +337,80 @@ class ConvolutionInputGenerator1D(HLSCustomOp):
                 ram_width = 2
             else:
                 ram_width = 1
-            return int(
-                (k + stride)
-                * (
-                    math.ceil(simd * self.get_input_datatype().bitwidth() / ram_width)
-                    * math.ceil(ifm_dim * ifm_ch / simd / ram_depth)
-                )
+            width_mul = math.ceil(
+                simd * self.get_input_datatype().bitwidth() / ram_width
             )
+            depth_mul = math.ceil(ram_depth / 18432)
+            return width_mul * depth_mul
         else:
             return 0
 
     def lut_estimation(self):
-        # NOTE: not tested for correctness
         simd = self.get_nodeattr("SIMD")
-        ifm_ch = self.get_nodeattr("IFMChannels")
-        ifm_dim = np.prod(self.get_nodeattr("IFMDim"))
-        k = np.prod(self.get_nodeattr("ConvKernelDim"))
-        stride = np.prod(self.get_nodeattr("Stride"))
+        (
+            ifm_ch,
+            [ifm_dim_h, ifm_dim_w],
+            [ofm_dim_h, ofm_dim_w],
+            [k_h, k_w],
+            [stride_h, stride_w],
+            [dilation_h, dilation_w],
+        ) = self.get_1d_conv_attrs_normalized()
         ram_style = self.get_nodeattr("ram_style")
-        if self.use_parallel_window_output():
+        swu_variant = self.get_swu_variant()
+        if swu_variant == "ConvolutionInputGenerator_1D_parallel":
             ram_luts = math.ceil(
-                (simd * self.get_input_datatype().bitwidth() * (k + 1)) / 64
+                simd * self.get_input_datatype().bitwidth() * (k_w + 1) / 64
             )
         elif ram_style == "distributed":
-            ram_luts = int(
-                (k + stride)
-                * (
-                    simd
-                    * self.get_input_datatype().bitwidth()
-                    * math.ceil(ifm_dim * ifm_ch / simd / 64)
+            if swu_variant == "ConvolutionInputGenerator_1D":
+                ram_luts = math.ceil(
+                    self.get_input_datatype().bitwidth() * (k_w - 1) * ifm_ch / 64
+                )
+            elif swu_variant == "ConvolutionInputGenerator_1D_dws_naive":
+                ram_luts = math.ceil(
+                    self.get_input_datatype().bitwidth() * ifm_dim_w * ifm_ch / 64
+                )
+            elif swu_variant in [
+                "ConvolutionInputGenerator_1D_dws",
+                "ConvolutionInputGenerator_1D_dws_stride",
+            ]:
+                ram_luts = math.ceil(
+                    self.get_input_datatype().bitwidth() * k_w * ifm_ch / 64
                 )
-            )
         else:
             ram_luts = 0
         return 300 + ram_luts
 
     def uram_estimation(self):
-        # NOTE: not tested for correctness
+        simd = self.get_nodeattr("SIMD")
         (
             ifm_ch,
-            ifm_dim,
-            ofm_dim,
-            k,
-            stride,
-            dilation,
+            [ifm_dim_h, ifm_dim_w],
+            [ofm_dim_h, ofm_dim_w],
+            [k_h, k_w],
+            [stride_h, stride_w],
+            [dilation_h, dilation_w],
         ) = self.get_1d_conv_attrs_normalized()
-        ifm_dim_y, ifm_dim_x = ifm_dim
-        k_y, k_x = k
-        stride_y, stride_x = stride
         ram_style = self.get_nodeattr("ram_style")
-        simd = self.get_nodeattr("SIMD")
-        if self.use_parallel_window_output():
+        swu_variant = self.get_swu_variant()
+        if swu_variant == "ConvolutionInputGenerator_1D_parallel":
             return 0
         elif ram_style == "ultra":
-            block_mul = 2
-            width_mul = math.ceil(simd * self.get_input_datatype().bitwidth() / 64)
-            depth_mul = math.ceil(stride_x * ifm_dim_x * (ifm_ch // simd) / 4096)
-            return block_mul * width_mul * depth_mul
+            if swu_variant == "ConvolutionInputGenerator_1D":
+                width_mul = math.ceil(simd * self.get_input_datatype().bitwidth() / 72)
+                depth_mul = math.ceil((k_w - 1) * ifm_ch / simd / 4096)
+                return width_mul * depth_mul
+            elif swu_variant == "ConvolutionInputGenerator_1D_dws_naive":
+                width_mul = math.ceil(simd * self.get_input_datatype().bitwidth() / 72)
+                depth_mul = math.ceil(ifm_dim_w * ifm_ch / simd / 4096)
+                return width_mul * depth_mul
+            elif swu_variant in [
+                "ConvolutionInputGenerator_1D_dws",
+                "ConvolutionInputGenerator_1D_dws_stride",
+            ]:
+                width_mul = math.ceil(simd * self.get_input_datatype().bitwidth() / 72)
+                depth_mul = math.ceil(k_w * ifm_ch / simd / 4096)
+                return width_mul * depth_mul
         else:
             return 0
 
@@ -484,19 +508,14 @@ class ConvolutionInputGenerator1D(HLSCustomOp):
         numReps = 1
         (
             ifm_ch,
-            ifm_dim,
-            ofm_dim,
-            k,
-            stride,
-            dilation,
+            [ifm_dim_h, ifm_dim_w],
+            [ofm_dim_h, ofm_dim_w],
+            [k_h, k_w],
+            [stride_h, stride_w],
+            [dilation_h, dilation_w],
         ) = self.get_1d_conv_attrs_normalized()
         simd = self.get_nodeattr("SIMD")
         ifm_precision = self.get_input_datatype().bitwidth()
-        ifm_dim_y, ifm_dim_x = ifm_dim
-        ofm_dim_y, ofm_dim_x = ofm_dim
-        k_y, k_x = k
-        dilation_y, dilation_x = dilation
-        stride_y, stride_x = stride
         swu_variant = self.get_swu_variant()
 
         if swu_variant in [
@@ -515,12 +534,12 @@ class ConvolutionInputGenerator1D(HLSCustomOp):
             #define SIMD1 {}\n
             #define numReps {}
             """.format(
-                    k_x,
+                    k_w,
                     ifm_ch,
                     ifm_precision,
-                    ifm_dim_x,
-                    ofm_dim_x,
-                    stride_x,
+                    ifm_dim_w,
+                    ofm_dim_w,
+                    stride_w,
                     simd,
                     numReps,
                 )
@@ -536,11 +555,11 @@ class ConvolutionInputGenerator1D(HLSCustomOp):
             #define SIMD1 {}\n
             #define numReps {}
             """.format(
-                    k_x,
+                    k_w,
                     ifm_ch,
                     ifm_precision,
-                    ifm_dim_x,
-                    ofm_dim_x,
+                    ifm_dim_w,
+                    ofm_dim_w,
                     simd,
                     numReps,
                 )
@@ -558,13 +577,13 @@ class ConvolutionInputGenerator1D(HLSCustomOp):
             #define SIMD1 {}\n
             #define numReps {}
             """.format(
-                    k_x,
+                    k_w,
                     ifm_ch,
                     ifm_precision,
-                    ifm_dim_x,
-                    ofm_dim_x,
-                    stride_x,
-                    dilation_x,
+                    ifm_dim_w,
+                    ofm_dim_w,
+                    stride_w,
+                    dilation_w,
                     simd,
                     numReps,
                 )
-- 
GitLab