[fpgadataflow/fmpadding_batch]: added support for non-square images

6b9a094d · Mirzam98 · e68d7a3a · 6b9a094d · 6b9a094d
Commit 6b9a094d authored 3 years ago by Mirzam98
--- a/src/finn/custom_op/fpgadataflow/fmpadding_batch.py
+++ b/src/finn/custom_op/fpgadataflow/fmpadding_batch.py
@@ -17,9 +17,17 @@ class FMPadding_Batch(HLSCustomOp):
    def get_nodeattr_types(self):
        my_attrs = {
            # spatial size of input images
-            "ImgDim": ("i", True, 0),
+            "ImgDim": ("ints", True, []),  # [H, W] = [Y, X]
            # total padding (per dimension) to apply
-            "Padding": ("i", True, 2),
+            # NOTE: Current padding scheme that is applied tries to pad the same
+            # amount of zeros in front and behind the image for each dimension.
+            # As an example, a padding scheme such as [1, x, 3, x] is equal
+            # to [2, x, 2, x]
+            "Padding": (
+                "ints",
+                True,
+                [1, 1, 1, 1],
+            ),  # [H_begin, W_begin, H_end, W_end] = [Y_begin, X_begin, Y_end, X_end]
            # number of channels in input image
            "NumChannels": ("i", True, 0),
            # SIMD Input parallelism
@@ -38,31 +46,33 @@ class FMPadding_Batch(HLSCustomOp):
    def get_padded_odim(self):
        "Return the padded spatial size of the output."
+        idim_h, idim_w = self.get_nodeattr("ImgDim")
-        idim = self.get_nodeattr("ImgDim")
        pad = self.get_nodeattr("Padding")
-        return idim + pad
+        pad_h = pad[0] + pad[2]
+        pad_w = pad[1] + pad[3]
+        odim_h = idim_h + pad_h
+        odim_w = idim_w + pad_w
+        return [odim_h, odim_w]
    def get_exp_cycles(self):
-        odim = self.get_padded_odim()
+        odim_h, odim_w = self.get_padded_odim()
        channels = self.get_nodeattr("NumChannels")
        simd = self.get_nodeattr("SIMD")
        batch_size = self.get_nodeattr("numInputVectors")
-        exp_cycles = (channels / simd) * batch_size * odim * odim
+        exp_cycles = (channels / simd) * batch_size * odim_h * odim_w
        return int(exp_cycles)
    def get_normal_input_shape(self):
-        idim = self.get_nodeattr("ImgDim")
+        idim_h, idim_w = self.get_nodeattr("ImgDim")
        num_ch = self.get_nodeattr("NumChannels")
+        ishape = (1, idim_h, idim_w, num_ch)
-        ishape = (1, idim, idim, num_ch)
        return ishape
    def get_normal_output_shape(self):
-        odim = self.get_padded_odim()
+        odim_h, odim_w = self.get_padded_odim()
        num_ch = self.get_nodeattr("NumChannels")
-        oshape = (1, odim, odim, num_ch)
+        oshape = (1, odim_h, odim_w, num_ch)
        return oshape
    def get_folded_input_shape(self):
@@ -148,20 +158,53 @@ class FMPadding_Batch(HLSCustomOp):
        self.code_gen_dict["$GLOBALS$"] = ['#include "streamtools.h"']
    def defines(self, var):
-        self.code_gen_dict["$DEFINES$"] = [
+        idim_h, idim_w = self.get_nodeattr("ImgDim")
-            """#define ImgDim1 {}\n#define OutputDim1 {}\n
+        odim_h, odim_w = self.get_padded_odim()
-            #define Padding1 {}\n#define NumChannels1 {}\n
+        pad = self.get_nodeattr("Padding")
-            #define PaddingStyle1 {}\n#define numReps {}
+        pad_h = pad[0] + pad[2]
-            #define SIMD1 {}\n""".format(
+        pad_w = pad[1] + pad[3]
-                self.get_nodeattr("ImgDim"),
+        is_square = idim_h == idim_w
-                self.get_padded_odim(),
-                self.get_nodeattr("Padding"),
+        if is_square:
-                self.get_nodeattr("NumChannels"),
+            assert (
-                self.get_nodeattr("PaddingStyle"),
+                pad_h == pad_w
-                self.get_nodeattr("numInputVectors"),
+            ), "Only equal padding along the dimensions for square images is supported"
-                self.get_nodeattr("SIMD"),
+            self.code_gen_dict["$DEFINES$"] = [
-            )
+                """#define ImgDim1 {}\n#define OutputDim1 {}\n
-        ]
+                #define Padding1 {}\n#define NumChannels1 {}\n
+                #define SIMD1 {}\n#define PaddingStyle1 {}\n
+                #define numReps {}\n""".format(
+                    idim_h,
+                    odim_h,
+                    pad_h,
+                    self.get_nodeattr("NumChannels"),
+                    self.get_nodeattr("SIMD"),
+                    self.get_nodeattr("PaddingStyle"),
+                    self.get_nodeattr("numInputVectors"),
+                )
+            ]
+        else:
+            self.code_gen_dict["$DEFINES$"] = [
+                """
+                #define OutputDim1_x {}\n
+                #define OutputDim1_y {}\n
+                #define Padding1_x {}\n
+                #define Padding1_y {}\n
+                #define NumChannels1 {}\n
+                #define SIMD1 {}\n
+                #define PaddingStyle1 {}\n
+                #define numReps {}\n
+                """.format(
+                    odim_w,
+                    odim_h,
+                    pad_w,
+                    pad_h,
+                    self.get_nodeattr("NumChannels"),
+                    self.get_nodeattr("SIMD"),
+                    self.get_nodeattr("PaddingStyle"),
+                    self.get_nodeattr("numInputVectors"),
+                )
+            ]
    def read_npy_data(self):
        code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
@@ -193,12 +236,26 @@ class FMPadding_Batch(HLSCustomOp):
    def docompute(self):
        in_t = self.get_input_datatype().get_hls_datatype_str()
        node = self.onnx_node
-        self.code_gen_dict["$DOCOMPUTE$"] = [
-            """{}<ImgDim1, OutputDim1, Padding1, NumChannels1,SIMD1,
+        idim_h, idim_w = self.get_nodeattr("ImgDim")
-            {}, PaddingStyle1> (in0, out, numReps);""".format(
+        is_square = idim_h == idim_w
-                node.op_type, in_t
-            )
+        if is_square:
-        ]
+            hls_call = node.op_type
+            self.code_gen_dict["$DOCOMPUTE$"] = [
+                """{}<ImgDim1, OutputDim1, Padding1, NumChannels1,SIMD1,
+                {}, PaddingStyle1> (in0, out, numReps);""".format(
+                    hls_call, in_t
+                )
+            ]
+        else:
+            hls_call = "FMPadding_nonsquare_Batch"
+            self.code_gen_dict["$DOCOMPUTE$"] = [
+                """{}<OutputDim1_x, OutputDim1_y, Padding1_x, Padding1_y, NumChannels1,
+                SIMD1, {}, PaddingStyle1> (in0, out, numReps);""".format(
+                    hls_call, in_t
+                )
+            ]
    def dataoutstrm(self):
        code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
@@ -270,7 +327,7 @@ class FMPadding_Batch(HLSCustomOp):
        assert (
            inp.shape == exp_ishape
        ), """Input shape doesn't
-        match expected shape (1, ImgDim, ImgDim, NumChannels)."""
+        match expected shape (1, ImgDim_h, ImgDim_w, NumChannels)."""
        export_idt = self.get_input_datatype()
        reshaped_input = inp.reshape(folded_ishape)
@@ -316,4 +373,4 @@ class FMPadding_Batch(HLSCustomOp):
        assert (
            context[node.output[0]].shape == exp_oshape
        ), """Output shape doesn't match expected shape
-            (1, OutputDim, OutputDim, NumChannels)."""
+            (1, OutputDim_H, OutputDim_W, NumChannels)."""
--- a/tests/fpgadataflow/test_fpgadataflow_fmpadding.py
+++ b/tests/fpgadataflow/test_fpgadataflow_fmpadding.py
@@ -54,15 +54,20 @@ target_clk_ns = 10
 def make_single_fmpadding_modelwrapper(idim, padding, num_ch, simd, idt, pad_style):
+    pad_h = padding[0] + padding[2]
+    pad_w = padding[1] + padding[3]
+    idim_h, idim_w = idim
    assert pad_style == 2, "only pad_style == 2 supported in hlslib"
-    assert padding > 0, "Output dim should be greater than input dim"
+    assert pad_h > 0 or pad_w > 0, "Output dim should be greater than input dim"
-    odim = idim + padding
+    odim_h = idim_h + pad_h
+    odim_w = idim_w + pad_w
    inp = helper.make_tensor_value_info(
-        "inp", TensorProto.FLOAT, [1, idim, idim, num_ch]
+        "inp", TensorProto.FLOAT, [1, idim_h, idim_w, num_ch]
    )
    outp = helper.make_tensor_value_info(
-        "outp", TensorProto.FLOAT, [1, odim, odim, num_ch]
+        "outp", TensorProto.FLOAT, [1, odim_h, odim_w, num_ch]
    )
    FMPadding = helper.make_node(
@@ -94,9 +99,9 @@ def make_single_fmpadding_modelwrapper(idim, padding, num_ch, simd, idt, pad_sty
 # input image dimension
-@pytest.mark.parametrize("idim", [8])
+@pytest.mark.parametrize("idim", [[8, 8], [10, 8]])
 # number of rows and number of cols to add
-@pytest.mark.parametrize("pad", [2, 3])
+@pytest.mark.parametrize("pad", [[1, 1, 1, 1], [1, 1, 2, 2], [1, 3, 2, 3]])
 # number of channels
 @pytest.mark.parametrize("num_ch", [2, 4])
 # Input parallelism
@@ -112,10 +117,22 @@ def make_single_fmpadding_modelwrapper(idim, padding, num_ch, simd, idt, pad_sty
 def test_fpgadataflow_fmpadding(idim, pad, num_ch, simd, pad_style, idt, mode):
    if num_ch % simd != 0:
        pytest.skip(" num_ch % simd != 0, skipping")
+    idim_h, idim_w = idim
+    pad_h = pad[0] + pad[2]
+    pad_w = pad[1] + pad[3]
+    if idim_h == idim_w and pad_h != pad_w:
+        pytest.skip(
+            """Only equal padding along the dimensions for square images
+            is supported, skipping"""
+        )
    # generate input data
-    x = gen_finn_dt_tensor(idt, [1, idim, idim, num_ch])
+    x = gen_finn_dt_tensor(idt, [1, idim_h, idim_w, num_ch])
    input_dict = {"inp": x}
-    odim = idim + pad
+    odim_h = idim_h + pad_h
+    odim_w = idim_w + pad_w
    model = make_single_fmpadding_modelwrapper(idim, pad, num_ch, simd, idt, pad_style)
    model = model.transform(InferShapes())
@@ -129,24 +146,26 @@ def test_fpgadataflow_fmpadding(idim, pad, num_ch, simd, pad_style, idt, mode):
        model = model.transform(HLSSynthIP())
        model = model.transform(PrepareRTLSim())
    y_produced = oxe.execute_onnx(model, input_dict)["outp"]
-    expected_oshape = (1, odim, odim, num_ch)
+    expected_oshape = (1, odim_h, odim_w, num_ch)
    assert y_produced.shape == expected_oshape
    # calculate reference
    # calculate correct pad according to parameters
    if pad_style == 2:
-        if pad % 2 == 0:
+        if pad_h % 2 == 0:
-            pad_up = pad // 2
+            pad_up = pad_h // 2
-            pad_left = pad // 2
+        else:
+            pad_up = pad_h // 2 + 1
+        if pad_w % 2 == 0:
+            pad_left = pad_w // 2
        else:
-            pad_up = pad // 2 + 1
+            pad_left = pad_w // 2 + 1
-            pad_left = pad // 2 + 1
    else:
-        pad_up = pad // 2
+        pad_up = pad_h // 2
-        pad_left = pad // 2
+        pad_left = pad_w // 2
-    pad_down = pad - pad_up
+    pad_down = pad_h - pad_up
-    pad_right = pad - pad_left
+    pad_right = pad_w - pad_left
    y_expected = np.pad(
        x, ((0, 0), (pad_up, pad_down), (pad_left, pad_right), (0, 0)), "constant"