diff --git a/src/finn/custom_op/fpgadataflow/__init__.py b/src/finn/custom_op/fpgadataflow/__init__.py index 4231be7c523a5a510de89fb1202dc7bbcf30d39f..2274b699bd3b37f6a55b2b6ee3ccb562eaeeff8b 100644 --- a/src/finn/custom_op/fpgadataflow/__init__.py +++ b/src/finn/custom_op/fpgadataflow/__init__.py @@ -427,11 +427,11 @@ compilation transformations? """Returns folded output shape (according to neuron folding), if implemented.""" raise Exception("get_folded_output_shape not implemented for this op") - def get_instream_width(self): + def get_instream_width(self, axi_strm_padding=False): """Returns input stream width, if implemented.""" raise Exception("get_instream_width not implemented for this op") - def get_outstream_width(self): + def get_outstream_width(self, axi_strm_padding=False): """Returns output stream width, if implemented.""" raise Exception("get_outstream_width not implemented for this op") diff --git a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py index 2ef5d350fb972e448b9a3745eb8c98197ab87d94..a695fe6df209bb3810664c2ce7af5410e03a077c 100644 --- a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py +++ b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py @@ -39,6 +39,7 @@ from finn.core.datatype import DataType from finn.custom_op.fpgadataflow import HLSCustomOp from finn.custom_op.im2col import compute_conv_output_dim from onnx import TensorProto, helper +from finn.util.basic import roundup_to_integer_multiple from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy # ONNX i/o tensor shape assumptions for ConvolutionInputGenerator: @@ -140,20 +141,23 @@ class ConvolutionInputGenerator(HLSCustomOp): """Returns FINN DataType of output.""" return DataType[self.get_nodeattr("outputDataType")] - def get_instream_width(self): + def get_instream_width(self, axi_strm_padding=False): """Returns stream width, input and output stream width are equal for the sliding window function""" ibits = self.get_input_datatype().bitwidth() simd = self.get_nodeattr("SIMD") ifm_ch = self.get_nodeattr("IFMChannels") assert simd == ifm_ch, "SWG currently requires SIMD=IFM" - return simd * ibits + in_width = simd * ibits + if axi_strm_padding is True: + in_width = roundup_to_integer_multiple(in_width, 8) + return in_width - def get_outstream_width(self): + def get_outstream_width(self, axi_strm_padding=False): """Returns stream width, input and output stream width are equal for the sliding window function, so the function to determine the input stream width can be reused.""" - return self.get_instream_width() + return self.get_instream_width(axi_strm_padding) def get_number_output_values(self): folded_oshape = self.get_folded_output_shape() diff --git a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py index 5e4c99aa41216b05f66da8341870269c620c6c40..1a9ee1118596a95b624258d3ee8fe4c37a71edde 100644 --- a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py +++ b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py @@ -36,6 +36,7 @@ except ModuleNotFoundError: from finn.custom_op.fpgadataflow import HLSCustomOp from finn.core.datatype import DataType from onnx import TensorProto, helper +from finn.util.basic import roundup_to_integer_multiple from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy # does not do anything at the ONNX node-by-node level, and input-output @@ -154,11 +155,17 @@ class StreamingDataWidthConverter_Batch(HLSCustomOp): folded_ishape = self.get_folded_input_shape() return np.prod(folded_ishape[:-1]) - def get_instream_width(self): - return self.get_nodeattr("inWidth") - - def get_outstream_width(self): - return self.get_nodeattr("outWidth") + def get_instream_width(self, axi_strm_padding=False): + in_width = self.get_nodeattr("inWidth") + if axi_strm_padding is True: + in_width = roundup_to_integer_multiple(in_width, 8) + return in_width + + def get_outstream_width(self, axi_strm_padding=False): + out_width = self.get_nodeattr("outWidth") + if axi_strm_padding is True: + out_width = roundup_to_integer_multiple(out_width, 8) + return out_width def make_shape_compatible_op(self, model): exp_ishape = self.get_normal_input_shape() diff --git a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py index 567a6cc984293c1db79657ce6ac8d186aa2fa1f3..eee1971547428bf56291030814c69415bd31c074 100644 --- a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py +++ b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py @@ -40,7 +40,10 @@ except ModuleNotFoundError: from onnx import TensorProto, helper from finn.core.datatype import DataType from finn.custom_op.fpgadataflow import HLSCustomOp -from finn.util.basic import interleave_matrix_outer_dim_from_partitions +from finn.util.basic import ( + interleave_matrix_outer_dim_from_partitions, + roundup_to_integer_multiple, +) from finn.util.data_packing import ( npy_to_rtlsim_input, numpy_to_hls_code, @@ -260,19 +263,28 @@ class StreamingFCLayer_Batch(HLSCustomOp): """Returns FINN DataType of output.""" return DataType[self.get_nodeattr("outputDataType")] - def get_instream_width(self): + def get_instream_width(self, axi_strm_padding=False): i_bits = self.get_input_datatype().bitwidth() - return i_bits * self.get_nodeattr("SIMD") + in_width = i_bits * self.get_nodeattr("SIMD") + if axi_strm_padding is True: + in_width = roundup_to_integer_multiple(in_width, 8) + return in_width - def get_outstream_width(self): + def get_outstream_width(self, axi_strm_padding=False): o_bits = self.get_output_datatype().bitwidth() - return o_bits * self.get_nodeattr("PE") + out_width = o_bits * self.get_nodeattr("PE") + if axi_strm_padding is True: + out_width = roundup_to_integer_multiple(out_width, 8) + return out_width - def get_weightstream_width(self): + def get_weightstream_width(self, axi_strm_padding=False): pe = self.get_nodeattr("PE") simd = self.get_nodeattr("SIMD") wp = self.get_weight_datatype().bitwidth() - return pe * simd * wp + w_width = pe * simd * wp + if axi_strm_padding is True: + w_width = roundup_to_integer_multiple(w_width, 8) + return w_width def get_ap_int_max_w(self): temp_value = super().get_ap_int_max_w() @@ -983,18 +995,12 @@ class StreamingFCLayer_Batch(HLSCustomOp): self.code_gen_dict["$LAYER_NAME$"] = [ "{}_{}".format(self.onnx_node.name, self.onnx_node.name) ] - # make instream width a multiple of 8 for axi interface - in_width = self.get_instream_width() - if in_width % 8 != 0: - in_width = math.floor(in_width / 8) + 8 + in_width = self.get_instream_width(axi_strm_padding=True) self.code_gen_dict["$IN_RANGE$"] = ["[{}:0]".format(in_width - 1)] self.code_gen_dict["$OUT_RANGE$"] = [ - "[{}:0]".format(self.get_outstream_width() - 1) + "[{}:0]".format(self.get_outstream_width(axi_strm_padding=True) - 1) ] - # make weight stream width a multiple of 8 for axi interface - weight_width = self.get_weightstream_width() - if weight_width % 8 != 0: - weight_width = math.floor(weight_width / 8) + 8 + weight_width = self.get_weightstream_width(axi_strm_padding=True) self.code_gen_dict["$WEIGHT_RANGE$"] = ["[{}:0]".format(weight_width - 1)] self.code_gen_dict["$WEIGHT_WIDTH$"] = [str(weight_width)] mw = self.get_nodeattr("MW") diff --git a/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py b/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py index a7c2d5166b6af41327abcfeaa5cb5ae25fd23856..5e77a60de07e0b6de5c001f6e889476f496db50f 100644 --- a/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py +++ b/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py @@ -37,6 +37,7 @@ from finn.custom_op.fpgadataflow import HLSCustomOp from finn.custom_op.im2col import compute_conv_output_dim from finn.core.datatype import DataType from onnx import TensorProto, helper +from finn.util.basic import roundup_to_integer_multiple from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy @@ -91,14 +92,17 @@ class StreamingMaxPool_Batch(HLSCustomOp): folded_oshape = self.get_folded_output_shape() return np.prod(folded_oshape[:-1]) - def get_instream_width(self): + def get_instream_width(self, axi_strm_padding=False): dt_bits = self.get_input_datatype().bitwidth() ifm_ch = self.get_nodeattr("NumChannels") - return int(dt_bits * ifm_ch) + in_width = int(dt_bits * ifm_ch) + if axi_strm_padding is True: + in_width = roundup_to_integer_multiple(in_width, 8) + return in_width - def get_outstream_width(self): + def get_outstream_width(self, axi_strm_padding=False): """For streaming maxpool out stream with is the same as in stream width""" - return self.get_instream_width() + return self.get_instream_width(axi_strm_padding) def make_shape_compatible_op(self, model): exp_ishape = self.get_normal_input_shape() diff --git a/src/finn/custom_op/fpgadataflow/tlastmarker.py b/src/finn/custom_op/fpgadataflow/tlastmarker.py index 4d4dee6506f04909c53cd05e4898a7ad77e4a83a..a04b2a886984f3f98bd765ce617be6ca7c0170a8 100644 --- a/src/finn/custom_op/fpgadataflow/tlastmarker.py +++ b/src/finn/custom_op/fpgadataflow/tlastmarker.py @@ -27,6 +27,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from finn.custom_op.fpgadataflow import HLSCustomOp +from finn.util.basic import roundup_to_integer_multiple class TLastMarker(HLSCustomOp): @@ -133,12 +134,16 @@ class TLastMarker(HLSCustomOp): def get_folded_output_shape(self): return self.get_folded_input_shape() - def get_instream_width(self): + def get_instream_width(self, axi_strm_padding=False): stream_width = self.get_nodeattr("StreamWidth") + if axi_strm_padding is True: + stream_width = roundup_to_integer_multiple(stream_width, 8) return stream_width - def get_outstream_width(self): + def get_outstream_width(self, axi_strm_padding=False): stream_width = self.get_nodeattr("StreamWidth") + if axi_strm_padding is True: + stream_width = roundup_to_integer_multiple(stream_width, 8) return stream_width def strm_decl(self):