From 065704ac53ab1887073670c33ff3fb4e4d57a124 Mon Sep 17 00:00:00 2001
From: auphelia <jakobapk@web.de>
Date: Thu, 9 Apr 2020 13:51:19 +0100
Subject: [PATCH] [HLSCustomOp] Add parameter to select if stream width should
 be round up to integer multiple of 8 (for axi streams)

---
 src/finn/custom_op/fpgadataflow/__init__.py   |  4 +-
 .../fpgadataflow/convolutioninputgenerator.py | 12 ++++--
 .../streamingdatawidthconverter_batch.py      | 17 ++++++---
 .../fpgadataflow/streamingfclayer_batch.py    | 38 +++++++++++--------
 .../fpgadataflow/streamingmaxpool_batch.py    | 12 ++++--
 .../custom_op/fpgadataflow/tlastmarker.py     |  9 ++++-
 6 files changed, 59 insertions(+), 33 deletions(-)

diff --git a/src/finn/custom_op/fpgadataflow/__init__.py b/src/finn/custom_op/fpgadataflow/__init__.py
index 4231be7c5..2274b699b 100644
--- a/src/finn/custom_op/fpgadataflow/__init__.py
+++ b/src/finn/custom_op/fpgadataflow/__init__.py
@@ -427,11 +427,11 @@ compilation transformations?
         """Returns folded output shape (according to neuron folding), if implemented."""
         raise Exception("get_folded_output_shape not implemented for this op")
 
-    def get_instream_width(self):
+    def get_instream_width(self, axi_strm_padding=False):
         """Returns input stream width, if implemented."""
         raise Exception("get_instream_width not implemented for this op")
 
-    def get_outstream_width(self):
+    def get_outstream_width(self, axi_strm_padding=False):
         """Returns output stream width, if implemented."""
         raise Exception("get_outstream_width not implemented for this op")
 
diff --git a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py
index 2ef5d350f..a695fe6df 100644
--- a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py
+++ b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py
@@ -39,6 +39,7 @@ from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow import HLSCustomOp
 from finn.custom_op.im2col import compute_conv_output_dim
 from onnx import TensorProto, helper
+from finn.util.basic import roundup_to_integer_multiple
 from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
 
 # ONNX i/o tensor shape assumptions for ConvolutionInputGenerator:
@@ -140,20 +141,23 @@ class ConvolutionInputGenerator(HLSCustomOp):
         """Returns FINN DataType of output."""
         return DataType[self.get_nodeattr("outputDataType")]
 
-    def get_instream_width(self):
+    def get_instream_width(self, axi_strm_padding=False):
         """Returns stream width, input and output stream width are equal for
         the sliding window function"""
         ibits = self.get_input_datatype().bitwidth()
         simd = self.get_nodeattr("SIMD")
         ifm_ch = self.get_nodeattr("IFMChannels")
         assert simd == ifm_ch, "SWG currently requires SIMD=IFM"
-        return simd * ibits
+        in_width = simd * ibits
+        if axi_strm_padding is True:
+            in_width = roundup_to_integer_multiple(in_width, 8)
+        return in_width
 
-    def get_outstream_width(self):
+    def get_outstream_width(self, axi_strm_padding=False):
         """Returns stream width, input and output stream width are equal for
         the sliding window function, so the function to determine the input
         stream width can be reused."""
-        return self.get_instream_width()
+        return self.get_instream_width(axi_strm_padding)
 
     def get_number_output_values(self):
         folded_oshape = self.get_folded_output_shape()
diff --git a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py
index 5e4c99aa4..1a9ee1118 100644
--- a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py
+++ b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py
@@ -36,6 +36,7 @@ except ModuleNotFoundError:
 from finn.custom_op.fpgadataflow import HLSCustomOp
 from finn.core.datatype import DataType
 from onnx import TensorProto, helper
+from finn.util.basic import roundup_to_integer_multiple
 from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
 
 # does not do anything at the ONNX node-by-node level, and input-output
@@ -154,11 +155,17 @@ class StreamingDataWidthConverter_Batch(HLSCustomOp):
         folded_ishape = self.get_folded_input_shape()
         return np.prod(folded_ishape[:-1])
 
-    def get_instream_width(self):
-        return self.get_nodeattr("inWidth")
-
-    def get_outstream_width(self):
-        return self.get_nodeattr("outWidth")
+    def get_instream_width(self, axi_strm_padding=False):
+        in_width = self.get_nodeattr("inWidth")
+        if axi_strm_padding is True:
+            in_width = roundup_to_integer_multiple(in_width, 8)
+        return in_width
+
+    def get_outstream_width(self, axi_strm_padding=False):
+        out_width = self.get_nodeattr("outWidth")
+        if axi_strm_padding is True:
+            out_width = roundup_to_integer_multiple(out_width, 8)
+        return out_width
 
     def make_shape_compatible_op(self, model):
         exp_ishape = self.get_normal_input_shape()
diff --git a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
index 567a6cc98..eee197154 100644
--- a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
+++ b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
@@ -40,7 +40,10 @@ except ModuleNotFoundError:
 from onnx import TensorProto, helper
 from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow import HLSCustomOp
-from finn.util.basic import interleave_matrix_outer_dim_from_partitions
+from finn.util.basic import (
+    interleave_matrix_outer_dim_from_partitions,
+    roundup_to_integer_multiple,
+)
 from finn.util.data_packing import (
     npy_to_rtlsim_input,
     numpy_to_hls_code,
@@ -260,19 +263,28 @@ class StreamingFCLayer_Batch(HLSCustomOp):
         """Returns FINN DataType of output."""
         return DataType[self.get_nodeattr("outputDataType")]
 
-    def get_instream_width(self):
+    def get_instream_width(self, axi_strm_padding=False):
         i_bits = self.get_input_datatype().bitwidth()
-        return i_bits * self.get_nodeattr("SIMD")
+        in_width = i_bits * self.get_nodeattr("SIMD")
+        if axi_strm_padding is True:
+            in_width = roundup_to_integer_multiple(in_width, 8)
+        return in_width
 
-    def get_outstream_width(self):
+    def get_outstream_width(self, axi_strm_padding=False):
         o_bits = self.get_output_datatype().bitwidth()
-        return o_bits * self.get_nodeattr("PE")
+        out_width = o_bits * self.get_nodeattr("PE")
+        if axi_strm_padding is True:
+            out_width = roundup_to_integer_multiple(out_width, 8)
+        return out_width
 
-    def get_weightstream_width(self):
+    def get_weightstream_width(self, axi_strm_padding=False):
         pe = self.get_nodeattr("PE")
         simd = self.get_nodeattr("SIMD")
         wp = self.get_weight_datatype().bitwidth()
-        return pe * simd * wp
+        w_width = pe * simd * wp
+        if axi_strm_padding is True:
+            w_width = roundup_to_integer_multiple(w_width, 8)
+        return w_width
 
     def get_ap_int_max_w(self):
         temp_value = super().get_ap_int_max_w()
@@ -983,18 +995,12 @@ class StreamingFCLayer_Batch(HLSCustomOp):
             self.code_gen_dict["$LAYER_NAME$"] = [
                 "{}_{}".format(self.onnx_node.name, self.onnx_node.name)
             ]
-            # make instream width a multiple of 8 for axi interface
-            in_width = self.get_instream_width()
-            if in_width % 8 != 0:
-                in_width = math.floor(in_width / 8) + 8
+            in_width = self.get_instream_width(axi_strm_padding=True)
             self.code_gen_dict["$IN_RANGE$"] = ["[{}:0]".format(in_width - 1)]
             self.code_gen_dict["$OUT_RANGE$"] = [
-                "[{}:0]".format(self.get_outstream_width() - 1)
+                "[{}:0]".format(self.get_outstream_width(axi_strm_padding=True) - 1)
             ]
-            # make weight stream width a multiple of 8 for axi interface
-            weight_width = self.get_weightstream_width()
-            if weight_width % 8 != 0:
-                weight_width = math.floor(weight_width / 8) + 8
+            weight_width = self.get_weightstream_width(axi_strm_padding=True)
             self.code_gen_dict["$WEIGHT_RANGE$"] = ["[{}:0]".format(weight_width - 1)]
             self.code_gen_dict["$WEIGHT_WIDTH$"] = [str(weight_width)]
             mw = self.get_nodeattr("MW")
diff --git a/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py b/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py
index a7c2d5166..5e77a60de 100644
--- a/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py
+++ b/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py
@@ -37,6 +37,7 @@ from finn.custom_op.fpgadataflow import HLSCustomOp
 from finn.custom_op.im2col import compute_conv_output_dim
 from finn.core.datatype import DataType
 from onnx import TensorProto, helper
+from finn.util.basic import roundup_to_integer_multiple
 from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
 
 
@@ -91,14 +92,17 @@ class StreamingMaxPool_Batch(HLSCustomOp):
         folded_oshape = self.get_folded_output_shape()
         return np.prod(folded_oshape[:-1])
 
-    def get_instream_width(self):
+    def get_instream_width(self, axi_strm_padding=False):
         dt_bits = self.get_input_datatype().bitwidth()
         ifm_ch = self.get_nodeattr("NumChannels")
-        return int(dt_bits * ifm_ch)
+        in_width = int(dt_bits * ifm_ch)
+        if axi_strm_padding is True:
+            in_width = roundup_to_integer_multiple(in_width, 8)
+        return in_width
 
-    def get_outstream_width(self):
+    def get_outstream_width(self, axi_strm_padding=False):
         """For streaming maxpool out stream with is the same as in stream width"""
-        return self.get_instream_width()
+        return self.get_instream_width(axi_strm_padding)
 
     def make_shape_compatible_op(self, model):
         exp_ishape = self.get_normal_input_shape()
diff --git a/src/finn/custom_op/fpgadataflow/tlastmarker.py b/src/finn/custom_op/fpgadataflow/tlastmarker.py
index 4d4dee650..a04b2a886 100644
--- a/src/finn/custom_op/fpgadataflow/tlastmarker.py
+++ b/src/finn/custom_op/fpgadataflow/tlastmarker.py
@@ -27,6 +27,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 from finn.custom_op.fpgadataflow import HLSCustomOp
+from finn.util.basic import roundup_to_integer_multiple
 
 
 class TLastMarker(HLSCustomOp):
@@ -133,12 +134,16 @@ class TLastMarker(HLSCustomOp):
     def get_folded_output_shape(self):
         return self.get_folded_input_shape()
 
-    def get_instream_width(self):
+    def get_instream_width(self, axi_strm_padding=False):
         stream_width = self.get_nodeattr("StreamWidth")
+        if axi_strm_padding is True:
+            stream_width = roundup_to_integer_multiple(stream_width, 8)
         return stream_width
 
-    def get_outstream_width(self):
+    def get_outstream_width(self, axi_strm_padding=False):
         stream_width = self.get_nodeattr("StreamWidth")
+        if axi_strm_padding is True:
+            stream_width = roundup_to_integer_multiple(stream_width, 8)
         return stream_width
 
     def strm_decl(self):
-- 
GitLab