diff --git a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py index 03119b3fbea12cf9065e561089ca5875a8f622b0..4dfbef739e5acfb110f155320ccca4816906fc24 100644 --- a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py +++ b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py @@ -106,7 +106,6 @@ class ConvolutionInputGenerator(HLSCustomOp): pad = 0 ofm_dim = compute_conv_output_dim(ifm_dim, k, stride, pad) assert ifm_ch % simd == 0, "SIMD must divide IFMChannels" - assert k % stride == 0, "stride must divide kernel size k" wf = int((k * k * ifm_ch) // simd) folded_oshape = (1, ofm_dim, ofm_dim, wf, simd) return folded_oshape @@ -313,10 +312,18 @@ class ConvolutionInputGenerator(HLSCustomOp): "ultra": "ap_resource_uram()", } hls_ram_style = map_to_hls_ram_style[ram_style] + + hls_call = node.op_type + # check if non optimized ConvolutionInputGenerator is needed + k = self.get_nodeattr("ConvKernelDim") + stride = self.get_nodeattr("Stride") + if k % stride != 0: + hls_call += "_kernel_stride" + self.code_gen_dict["$DOCOMPUTE$"] = [ """{}<ConvKernelDim1, IFMChannels1, Input_precision1, IFMDim1, OFMDim1, SIMD1, Stride1> (in0, out, numReps, {});""".format( - node.op_type, hls_ram_style + hls_call, hls_ram_style ) ] diff --git a/src/finn/custom_op/im2col.py b/src/finn/custom_op/im2col.py index 16446c15d46ee7996162f864708f7fde6cfedaf3..1ac2dad677f76b8f2aca1a04d96f4ae379940e9a 100644 --- a/src/finn/custom_op/im2col.py +++ b/src/finn/custom_op/im2col.py @@ -21,8 +21,6 @@ def get_im2col_indices_nchw( """Returns im2col indices.""" # First figure out what the size of the output should be N, C, H, W = x_shape - assert (H + 2 * padding - field_height) % stride_y == 0 - assert (W + 2 * padding - field_width) % stride_x == 0 out_height = compute_conv_output_dim(H, field_height, stride_y, padding) out_width = compute_conv_output_dim(W, field_width, stride_x, padding)