diff --git a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py index 8b4d3e425b449079615b769564b82a7571c152db..ef0f10e33ebc3f5c3ca28bb7819e1c346c5ef283 100644 --- a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py +++ b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py @@ -563,6 +563,15 @@ class ConvolutionInputGenerator1D(HLSCustomOp): "ultra": "ap_resource_uram()", } hls_ram_style = map_to_hls_ram_style[ram_style] + ( + ifm_ch, + ifm_dim, + ofm_dim, + k, + stride, + dilation, + ) = self.get_1d_conv_attrs_normalized() + stride_x = np.prod(stride) # check which ConvolutionInputGenerator is needed if self.use_parallel_window_output(): @@ -588,14 +597,28 @@ class ConvolutionInputGenerator1D(HLSCustomOp): ) ] else: - hls_call = "ConvolutionInputGenerator_1D_dws_lowbuffer" - self.code_gen_dict["$DOCOMPUTE$"] = [ - """{}<ConvKernelDim1_x, IFMChannels1, - Input_precision1, IFMDim1_x, OFMDim1_x, - SIMD1> (in0, out, numReps, {});""".format( - hls_call, hls_ram_style - ) - ] + if stride_x > 1: + # temporarily use old ConvolutionInputGenerator_NonSquare_dws + # for depthwise with stride > 1 + # note that both x and y stride are set to same (hlslib bug) + hls_call = "ConvolutionInputGenerator_NonSquare_dws" + self.code_gen_dict["$DOCOMPUTE$"] = [ + """{}<ConvKernelDim1_x, 1, IFMChannels1, + Input_precision1, IFMDim1_x, 1, OFMDim1_x, 1, + SIMD1, Stride1_x, Stride1_x + > (in0, out, numReps, {});""".format( + hls_call, hls_ram_style + ) + ] + else: + hls_call = "ConvolutionInputGenerator_1D_dws_lowbuffer" + self.code_gen_dict["$DOCOMPUTE$"] = [ + """{}<ConvKernelDim1_x, IFMChannels1, + Input_precision1, IFMDim1_x, OFMDim1_x, + SIMD1> (in0, out, numReps, {});""".format( + hls_call, hls_ram_style + ) + ] else: hls_call = "ConvolutionInputGenerator_1D_lowbuffer" self.code_gen_dict["$DOCOMPUTE$"] = [