From d7a5881b62a14fd565509c7dd83155d3611d2f3b Mon Sep 17 00:00:00 2001 From: mmrahorovic <mmrahorovic@hotmail.com> Date: Thu, 16 Dec 2021 17:52:03 +0000 Subject: [PATCH] [custom_op]: support for 1D SWU and other small modifications --- .../convolutioninputgenerator1d.py | 117 +++++++++--------- .../fpgadataflow/convert_to_hls_layers.py | 7 +- .../test_fpgadataflow_convinputgenerator1d.py | 31 ++++- 3 files changed, 90 insertions(+), 65 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py index 03630916a..8d401c40d 100644 --- a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py +++ b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py @@ -29,6 +29,7 @@ import math import numpy as np import os +import warnings from finn.core.datatype import DataType from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp @@ -85,6 +86,7 @@ class ConvolutionInputGenerator1D(HLSCustomOp): "distributed", {"auto", "block", "distributed", "ultra"}, ), + "parallel_window": ("i", False, 0, {0, 1}), } my_attrs.update(super().get_nodeattr_types()) return my_attrs @@ -219,13 +221,31 @@ class ConvolutionInputGenerator1D(HLSCustomOp): dilation_h, dilation_w = dilation ram_style = self.get_nodeattr("ram_style") - if self.get_nodeattr("SIMD") == self.get_nodeattr("IFMChannels"): - if self.get_nodeattr("depthwise") == 0: - if stride_h == 1 and stride_w == 1: - if dilation_h == 1 and dilation_w == 1: - return ram_style in ["auto", "distributed"] - - return False + fully_unfolded = self.get_nodeattr("SIMD") == self.get_nodeattr("IFMChannels") + non_dws = self.get_nodeattr("depthwise") == 0 + no_stride = stride_h == 1 and stride_w == 1 + no_dilation = dilation_h == 1 and dilation_w == 1 + supported_ram_style = ram_style in ["auto", "distributed"] + if self.get_nodeattr("parallel_window") == 1: + if ( + fully_unfolded + and non_dws + and no_stride + and no_dilation + and supported_ram_style + ): + return True + else: + warnings.warn( + "{}: Parallel window output variant is not supported for this node,\ + please inspect requirements in use_parallel_window_output method\ + of the custom_op".format( + self.onnx_node.name + ) + ) + return False + else: + return False def get_exp_cycles(self): simd = self.get_nodeattr("SIMD") @@ -244,17 +264,18 @@ class ConvolutionInputGenerator1D(HLSCustomOp): dilation_h, dilation_w = dilation # since mmv != 1 is not supported yet, we set mmv for now to 1 - mmv = 1 + # mmv = 1 # see https://github.com/Xilinx/finn-hlslib/blob/master/slidingwindow.h if self.use_parallel_window_output(): exp_cycles = k_w + ofm_dim_w + elif dilation_h > 1 or dilation_w > 1: + cycles_read_block = ifm_dim_w * ifm_ch / simd + cycles_write_block = ofm_dim_w * k_w * ifm_ch / simd + exp_cycles = cycles_read_block + cycles_write_block else: - cycles_write_block = (ofm_dim_w * k_w * k_h * (ifm_ch / simd)) / mmv - cycles_read_block = stride_w * ifm_dim_w * (ifm_ch / simd) - max_cycles = max(cycles_write_block, cycles_read_block) - exp_cycles = ( - ifm_dim_w * k_h * dilation_h * (ifm_ch / simd) + ofm_dim_h * max_cycles - ) + cycles_read_block = ifm_ch / simd * (k_w - 1) - (k_w - 1) + cycles_write_block = ofm_dim_w * k_w * ifm_ch / simd + exp_cycles = cycles_read_block + cycles_write_block return int(exp_cycles) @@ -451,7 +472,6 @@ class ConvolutionInputGenerator1D(HLSCustomOp): # For a 1d convolution with stride=[S,1] or [1,S], the finn-hlslib function # of ConvInpGen must be created with [stride_y, stride_x] = [S, S]. # TODO: changes in finn-hlslib (slidingwindow.h) - stride_y = np.prod(stride) stride_x = np.prod(stride) if dilation_x > 1: @@ -461,33 +481,23 @@ class ConvolutionInputGenerator1D(HLSCustomOp): self.code_gen_dict["$DEFINES$"] = [ """ #define ConvKernelDim1_x {}\n - #define ConvKernelDim1_y {}\n #define IFMChannels1 {}\n #define Input_precision1 {}\n #define IFMDim1_x {}\n - #define IFMDim1_y {}\n #define OFMDim1_x {}\n - #define OFMDim1_y {}\n #define SIMD1 {}\n #define Stride1_x {}\n - #define Stride1_y {}\n #define Dilation1_x {}\n - #define Dilation1_y {}\n #define numReps {} """.format( k_x, - k_y, ifm_ch, ifm_precision, ifm_dim_x, - ifm_dim_y, ofm_dim_x, - ofm_dim_y, simd, stride_x, - stride_y, dilation_x, - dilation_y, numReps, ) ] @@ -496,29 +506,21 @@ class ConvolutionInputGenerator1D(HLSCustomOp): self.code_gen_dict["$DEFINES$"] = [ """ #define ConvKernelDim1_x {}\n - #define ConvKernelDim1_y {}\n #define IFMChannels1 {}\n #define Input_precision1 {}\n #define IFMDim1_x {}\n - #define IFMDim1_y {}\n #define OFMDim1_x {}\n - #define OFMDim1_y {}\n #define SIMD1 {}\n #define Stride1_x {}\n - #define Stride1_y {}\n #define numReps {} """.format( k_x, - k_y, ifm_ch, ifm_precision, ifm_dim_x, - ifm_dim_y, ofm_dim_x, - ofm_dim_y, simd, stride_x, - stride_y, numReps, ) ] @@ -571,34 +573,33 @@ class ConvolutionInputGenerator1D(HLSCustomOp): ) ] else: - hls_call = "ConvolutionInputGenerator_NonSquare" dilation_h, dilation_w = self.get_nodeattr("Dilation") - if dilation_h > 1 or dilation_w > 1: - hls_call += "_Dilated" - if self.get_nodeattr("depthwise") == 1: - hls_call += "_dws" - self.code_gen_dict["$DOCOMPUTE$"] = [ - """{}<ConvKernelDim1_x, ConvKernelDim1_y, IFMChannels1, - Input_precision1, IFMDim1_x, IFMDim1_y, OFMDim1_x, OFMDim1_y, - SIMD1, Stride1_x, Stride1_y, Dilation1_x, Dilation1_y> - (in0, out, numReps, {});""".format( - hls_call, hls_ram_style - ) - ] - elif self.get_nodeattr("depthwise") == 1: - hls_call += "_dws" - self.code_gen_dict["$DOCOMPUTE$"] = [ - """{}<ConvKernelDim1_x, ConvKernelDim1_y, IFMChannels1, - Input_precision1, IFMDim1_x, IFMDim1_y, OFMDim1_x, OFMDim1_y, - SIMD1, Stride1_x, Stride1_y> (in0, out, numReps, {});""".format( - hls_call, hls_ram_style - ) - ] + if self.get_nodeattr("depthwise") == 1: + if dilation_h > 1 or dilation_w > 1: + hls_call = "ConvolutionInputGenerator_1D_dilated_dws" + self.code_gen_dict["$DOCOMPUTE$"] = [ + """{}<ConvKernelDim1_x, IFMChannels1, + Input_precision1, IFMDim1_x, OFMDim1_x, + SIMD1, Stride1_x, Dilation1_x> + (in0, out, numReps, {});""".format( + hls_call, hls_ram_style + ) + ] + else: + hls_call = "ConvolutionInputGenerator_1D_dws_lowbuffer" + self.code_gen_dict["$DOCOMPUTE$"] = [ + """{}<ConvKernelDim1_x, IFMChannels1, + Input_precision1, IFMDim1_x, OFMDim1_x, + SIMD1> (in0, out, numReps, {});""".format( + hls_call, hls_ram_style + ) + ] else: + hls_call = "ConvolutionInputGenerator_1D_lowbuffer" self.code_gen_dict["$DOCOMPUTE$"] = [ - """{}<ConvKernelDim1_x, ConvKernelDim1_y, IFMChannels1, - Input_precision1, IFMDim1_x, IFMDim1_y, OFMDim1_x, OFMDim1_y, - SIMD1, Stride1_x, Stride1_y> (in0, out, numReps, {});""".format( + """{}<ConvKernelDim1_x, IFMChannels1, + Input_precision1, IFMDim1_x, OFMDim1_x, + SIMD1> (in0, out, numReps, {});""".format( hls_call, hls_ram_style ) ] diff --git a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py index b2f50b1a2..f7cb0a16b 100644 --- a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py +++ b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py @@ -197,7 +197,7 @@ class InferConvInpGen(Transformation): depthwise=depthwise, name="ConvolutionInputGenerator_" + n.name, ) - else: # non-square images and/or kernels + else: # 1D images and/or kernels assert is_1d_convolution, ( "%s: ConvolutionInputGenerator1D works only for 1D convs" % n.name @@ -208,6 +208,11 @@ class InferConvInpGen(Transformation): with dilation value greater than 1""" % n.name ) + assert depthwise == 1, ( + """%s: Dilation value > 1 is only supported for + 1D depthwise separable convolutions""" + % n.name + ) ConvInpGen_node = helper.make_node( "ConvolutionInputGenerator1D", [ConvInpGen_input], diff --git a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator1d.py b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator1d.py index 8440ac1fe..f2be24185 100644 --- a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator1d.py +++ b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator1d.py @@ -90,7 +90,7 @@ def make_single_im2col_modelwrapper( def make_single_slidingwindow_modelwrapper( - k, ifm_ch, ifm_dim, ofm_dim, simd, stride, dilation, idt, dw=0 + k, ifm_ch, ifm_dim, ofm_dim, simd, stride, dilation, idt, parallel_window, dw=0 ): k_h, k_w = k ifm_dim_h, ifm_dim_w = ifm_dim @@ -122,6 +122,7 @@ def make_single_slidingwindow_modelwrapper( inputDataType=idt.name, outputDataType=odt.name, depthwise=dw, + parallel_window=parallel_window, ) graph = helper.make_graph( nodes=[SlidingWindow_node], @@ -153,10 +154,10 @@ def prepare_inputs(input_tensor): # input channels @pytest.mark.parametrize("ifm_ch", [1, 4]) # Stride -@pytest.mark.parametrize("stride", [[1, 1], [2, 1]]) +# @pytest.mark.parametrize("stride", [[1, 1], [2, 1]]) +@pytest.mark.parametrize("stride", [[1, 1]]) # Dilation -# @pytest.mark.parametrize("dilation", [[1, 1], [2, 1]]) -@pytest.mark.parametrize("dilation", [[1, 1]]) +@pytest.mark.parametrize("dilation", [[1, 1], [2, 1]]) # execution mode @pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) # input channel parallelism ("SIMD") @@ -165,10 +166,22 @@ def prepare_inputs(input_tensor): @pytest.mark.parametrize("dw", [0, 1]) # Flip dimensions @pytest.mark.parametrize("flip", [False, True]) +# Use parallel window output variant +@pytest.mark.parametrize("parallel_window", [False, True]) @pytest.mark.slow @pytest.mark.vivado def test_fpgadataflow_slidingwindow_1d( - idt, k, ifm_dim, ifm_ch, stride, dilation, exec_mode, simd, dw, flip + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + exec_mode, + simd, + dw, + flip, + parallel_window, ): if flip: k = k[::-1] @@ -186,6 +199,11 @@ def test_fpgadataflow_slidingwindow_1d( """Dilation value greater than 1 and stride greater than 1 currently not supported for 1D convolutions""" ) + if (dilation_h > 1 or dilation_w > 1) and dw == 0: + pytest.skip( + """Dilation value greater than 1 currently not supported + for non-dws 1D convolutions""" + ) if simd > ifm_ch: pytest.skip("SIMD cannot be larger than number of input channels") @@ -203,6 +221,7 @@ def test_fpgadataflow_slidingwindow_1d( stride=stride, dilation=dilation, idt=idt, + parallel_window=parallel_window, dw=dw, ) @@ -213,7 +232,7 @@ def test_fpgadataflow_slidingwindow_1d( elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) - model = model.transform(PrepareIP("xc7z020clg400-1", 5)) + model = model.transform(PrepareIP("xcu250-figd2104-2L-e", 5)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: -- GitLab