diff --git a/src/finn/analysis/fpgadataflow/hls_synth_res_estimation.py b/src/finn/analysis/fpgadataflow/hls_synth_res_estimation.py index 71e80d7da18c82d8f69a6c37829c4c6fc58b398d..78fc2ccfc92f9b7ca3ae6beafe7d24bdbfada2bc 100644 --- a/src/finn/analysis/fpgadataflow/hls_synth_res_estimation.py +++ b/src/finn/analysis/fpgadataflow/hls_synth_res_estimation.py @@ -74,5 +74,4 @@ def hls_synth_res_estimation(model): for this node. Please run "CodeGen_ipgen" transformation and "HLSSynth_IPGen" first to generate the report files""" ) - return res_dict diff --git a/src/finn/custom_op/fpgadataflow/__init__.py b/src/finn/custom_op/fpgadataflow/__init__.py index 38c2ed6638bef5cd709a7da83218145de91dce0a..9a6f66087fafff3745e239da4cb9f05c4ec73451 100644 --- a/src/finn/custom_op/fpgadataflow/__init__.py +++ b/src/finn/custom_op/fpgadataflow/__init__.py @@ -31,7 +31,7 @@ import numpy as np import os import subprocess from finn.custom_op import CustomOp -from finn.util.basic import CppBuilder, make_build_dir +from finn.util.basic import CppBuilder, make_build_dir, roundup_to_integer_multiple from finn.util.fpgadataflow import ( IPGenBuilder, pyverilate_get_liveness_threshold_cycles, @@ -493,15 +493,28 @@ compilation transformations? """Returns folded output shape (according to neuron folding), if implemented.""" raise Exception("get_folded_output_shape not implemented for this op") - def get_instream_width(self, axi_strm_padding=False): + def get_instream_width(self): """Returns input stream width, if implemented.""" raise Exception("get_instream_width not implemented for this op") - def get_outstream_width(self, axi_strm_padding=False): + def get_outstream_width(self): """Returns output stream width, if implemented.""" raise Exception("get_outstream_width not implemented for this op") + def get_instream_width_padded(self): + """Returns input stream width padded to a multiple of 8. This is required + by the AXI Stream spec.""" + in_width = self.get_instream_width() + return roundup_to_integer_multiple(in_width, 8) + + def get_outstream_width_padded(self): + """Returns output stream width padded to a multiple of 8. This is required + by the AXI Stream spec.""" + out_width = self.get_outstream_width() + return roundup_to_integer_multiple(out_width, 8) + def get_ap_int_max_w(self): + "Return the maximum width of any ap_int used in this module." instream = self.get_instream_width() outstream = self.get_outstream_width() return max([instream, outstream]) diff --git a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py index 0c8fe863fef9f2be97ab3d29e00d72b877d8108a..66daa9f7b408a1d17ee3cca6aea5ab4a843f7e4f 100644 --- a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py +++ b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py @@ -34,7 +34,6 @@ from finn.core.datatype import DataType from finn.custom_op.fpgadataflow import HLSCustomOp from finn.custom_op.im2col import compute_conv_output_dim from onnx import TensorProto, helper -from finn.util.basic import roundup_to_integer_multiple from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy # ONNX i/o tensor shape assumptions for ConvolutionInputGenerator: @@ -142,7 +141,7 @@ class ConvolutionInputGenerator(HLSCustomOp): """Returns FINN DataType of output.""" return DataType[self.get_nodeattr("outputDataType")] - def get_instream_width(self, axi_strm_padding=False): + def get_instream_width(self): """Returns stream width, input and output stream width are equal for the sliding window function""" ibits = self.get_input_datatype().bitwidth() @@ -150,15 +149,13 @@ class ConvolutionInputGenerator(HLSCustomOp): ifm_ch = self.get_nodeattr("IFMChannels") assert ifm_ch % simd == 0, "SIMD must divide IFMChannels" in_width = simd * ibits - if axi_strm_padding is True: - in_width = roundup_to_integer_multiple(in_width, 8) return in_width - def get_outstream_width(self, axi_strm_padding=False): + def get_outstream_width(self): """Returns stream width, input and output stream width are equal for the sliding window function, so the function to determine the input stream width can be reused.""" - return self.get_instream_width(axi_strm_padding) + return self.get_instream_width() def get_number_output_values(self): folded_oshape = self.get_folded_output_shape() diff --git a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py index ce4f883fa029225a5748c08463858e3bf1bfd35c..f30871909b1c70f3b5df148f1b6eae22fdbadc25 100644 --- a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py +++ b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py @@ -32,7 +32,6 @@ import numpy as np from finn.custom_op.fpgadataflow import HLSCustomOp from finn.core.datatype import DataType from onnx import TensorProto, helper -from finn.util.basic import roundup_to_integer_multiple from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy # does not do anything at the ONNX node-by-node level, and input-output @@ -151,16 +150,12 @@ class StreamingDataWidthConverter_Batch(HLSCustomOp): folded_ishape = self.get_folded_input_shape() return np.prod(folded_ishape[:-1]) - def get_instream_width(self, axi_strm_padding=False): + def get_instream_width(self): in_width = self.get_nodeattr("inWidth") - if axi_strm_padding is True: - in_width = roundup_to_integer_multiple(in_width, 8) return in_width - def get_outstream_width(self, axi_strm_padding=False): + def get_outstream_width(self): out_width = self.get_nodeattr("outWidth") - if axi_strm_padding is True: - out_width = roundup_to_integer_multiple(out_width, 8) return out_width def make_shape_compatible_op(self, model): diff --git a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py index f04ee7ca7830760f4ed2804b8b71f8fe5d29325f..46920711e13057178be9fca5fe3a18ce3e14feda 100644 --- a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py +++ b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py @@ -279,29 +279,27 @@ class StreamingFCLayer_Batch(HLSCustomOp): """Returns FINN DataType of output.""" return DataType[self.get_nodeattr("outputDataType")] - def get_instream_width(self, axi_strm_padding=False): + def get_instream_width(self): i_bits = self.get_input_datatype().bitwidth() in_width = i_bits * self.get_nodeattr("SIMD") - if axi_strm_padding is True: - in_width = roundup_to_integer_multiple(in_width, 8) return in_width - def get_outstream_width(self, axi_strm_padding=False): + def get_outstream_width(self): o_bits = self.get_output_datatype().bitwidth() out_width = o_bits * self.get_nodeattr("PE") - if axi_strm_padding is True: - out_width = roundup_to_integer_multiple(out_width, 8) return out_width - def get_weightstream_width(self, axi_strm_padding=False): + def get_weightstream_width(self): pe = self.get_nodeattr("PE") simd = self.get_nodeattr("SIMD") wp = self.get_weight_datatype().bitwidth() w_width = pe * simd * wp - if axi_strm_padding is True: - w_width = roundup_to_integer_multiple(w_width, 8) return w_width + def get_weightstream_width_padded(self): + weight_width = self.get_weightstream_width() + return roundup_to_integer_multiple(weight_width, 8) + def get_ap_int_max_w(self): temp_value = super().get_ap_int_max_w() weightstream = self.get_weightstream_width() @@ -982,13 +980,13 @@ class StreamingFCLayer_Batch(HLSCustomOp): "{}_{}".format(self.onnx_node.name, self.onnx_node.name) ] # make instream width a multiple of 8 for AXI stream interface - in_width = roundup_to_integer_multiple(self.get_instream_width(), 8) + in_width = self.get_instream_width_padded() self.code_gen_dict["$IN_RANGE$"] = ["[{}:0]".format(in_width - 1)] self.code_gen_dict["$OUT_RANGE$"] = [ - "[{}:0]".format(self.get_outstream_width(axi_strm_padding=True) - 1) + "[{}:0]".format(self.get_outstream_width_padded() - 1) ] # make weight stream width a multiple of 8 for AXI stream interface - weight_width = roundup_to_integer_multiple(self.get_weightstream_width(), 8) + weight_width = self.get_weightstream_width_padded() self.code_gen_dict["$WEIGHT_RANGE$"] = ["[{}:0]".format(weight_width - 1)] self.code_gen_dict["$WEIGHT_WIDTH$"] = [str(weight_width)] self.code_gen_dict["$WSTREAM_DEPTH$"] = [str(self.calc_wmem())] diff --git a/src/finn/custom_op/fpgadataflow/streamingfifo.py b/src/finn/custom_op/fpgadataflow/streamingfifo.py index 6e004c47b1e13d95efa356b6b8984688f54027cc..eb96c6c04eb0b7b83c3f925e10f86b17ec399e42 100644 --- a/src/finn/custom_op/fpgadataflow/streamingfifo.py +++ b/src/finn/custom_op/fpgadataflow/streamingfifo.py @@ -33,7 +33,6 @@ import subprocess from finn.custom_op.fpgadataflow import HLSCustomOp from finn.core.datatype import DataType from onnx import TensorProto, helper -from finn.util.basic import roundup_to_integer_multiple from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy from . import templates @@ -110,7 +109,7 @@ class StreamingFIFO(HLSCustomOp): "{}_{}".format(self.onnx_node.name, self.onnx_node.name) ] # make instream width a multiple of 8 for axi interface - in_width = self.get_instream_width(axi_strm_padding=True) + in_width = self.get_instream_width_padded() self.code_gen_dict["$IN_RANGE$"] = ["[{}:0]".format(in_width - 1)] self.code_gen_dict["$OUT_RANGE$"] = ["[{}:0]".format(in_width - 1)] self.code_gen_dict["$WIDTH$"] = [str(in_width)] @@ -164,6 +163,8 @@ class StreamingFIFO(HLSCustomOp): def get_normal_input_shape(self): depth = self.get_nodeattr("depth") + # depth has to be between 2 and 256 with the current + # StreamingFIFO implementation assert ( depth >= 2 ), """Depth is too low. Please set node attribute "depth" to a value @@ -172,10 +173,22 @@ class StreamingFIFO(HLSCustomOp): depth <= 256 ), """Depth is too high. Please set node attribute "depth" to a value between 2 and 256""" + # derive normal shape from folded shape + # StreamingFIFOs are inserted in between fpgadataflow nodes + # the folded shape could be for example (1, nf, pe) + # with nf (neuron folding): mh // pe + # the normal input shape is in this case (1, mh) + # so to achieve this the two inner dimensions are multiplied + # and together with all previous dimensions + # this gives the normal input shape + folded_shape = self.get_nodeattr("folded_shape") + # extract inner dimension inner_dim = folded_shape[-1] + # multiply with the next inner dimension folding_factor = folded_shape[-2] * inner_dim normal_ishape = [] + # create the normal_ishape for i in range(len(folded_shape) - 2): normal_ishape.append(folded_shape[i]) normal_ishape.append(folding_factor) @@ -191,20 +204,16 @@ class StreamingFIFO(HLSCustomOp): def get_folded_output_shape(self): return self.get_nodeattr("folded_shape") - def get_instream_width(self, axi_strm_padding=False): + def get_instream_width(self): dtype = DataType[self.get_nodeattr("dataType")] folded_shape = self.get_nodeattr("folded_shape") in_width = folded_shape[-1] * dtype.bitwidth() - if axi_strm_padding is True: - in_width = roundup_to_integer_multiple(in_width, 8) return in_width - def get_outstream_width(self, axi_strm_padding=False): + def get_outstream_width(self): dtype = DataType[self.get_nodeattr("dataType")] folded_shape = self.get_nodeattr("folded_shape") in_width = folded_shape[-1] * dtype.bitwidth() - if axi_strm_padding is True: - in_width = roundup_to_integer_multiple(in_width, 8) return in_width def execute_node(self, context, graph): diff --git a/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py b/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py index ef1a5ee1bdc0bbe5c773aa375bf4402a8cb16ddb..83bc19030ebba66907e08c5b1e52d7c0ff9207a6 100644 --- a/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py +++ b/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py @@ -33,7 +33,6 @@ from finn.custom_op.fpgadataflow import HLSCustomOp from finn.custom_op.im2col import compute_conv_output_dim from finn.core.datatype import DataType from onnx import TensorProto, helper -from finn.util.basic import roundup_to_integer_multiple from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy @@ -88,17 +87,15 @@ class StreamingMaxPool_Batch(HLSCustomOp): folded_oshape = self.get_folded_output_shape() return np.prod(folded_oshape[:-1]) - def get_instream_width(self, axi_strm_padding=False): + def get_instream_width(self): dt_bits = self.get_input_datatype().bitwidth() ifm_ch = self.get_nodeattr("NumChannels") in_width = int(dt_bits * ifm_ch) - if axi_strm_padding is True: - in_width = roundup_to_integer_multiple(in_width, 8) return in_width - def get_outstream_width(self, axi_strm_padding=False): + def get_outstream_width(self): """For streaming maxpool out stream with is the same as in stream width""" - return self.get_instream_width(axi_strm_padding) + return self.get_instream_width() def make_shape_compatible_op(self, model): exp_ishape = self.get_normal_input_shape() diff --git a/src/finn/custom_op/fpgadataflow/tlastmarker.py b/src/finn/custom_op/fpgadataflow/tlastmarker.py index e5a5fed6c9d5d31fbf0082707879480e0c0a2dc7..25ea05e3607a52731ae1b64de421837bf137ee2b 100644 --- a/src/finn/custom_op/fpgadataflow/tlastmarker.py +++ b/src/finn/custom_op/fpgadataflow/tlastmarker.py @@ -27,7 +27,6 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from finn.custom_op.fpgadataflow import HLSCustomOp -from finn.util.basic import roundup_to_integer_multiple class TLastMarker(HLSCustomOp): @@ -148,16 +147,12 @@ class TLastMarker(HLSCustomOp): def get_folded_output_shape(self): return self.get_folded_input_shape() - def get_instream_width(self, axi_strm_padding=False): + def get_instream_width(self): stream_width = self.get_nodeattr("StreamWidth") - if axi_strm_padding is True: - stream_width = roundup_to_integer_multiple(stream_width, 8) return stream_width - def get_outstream_width(self, axi_strm_padding=False): + def get_outstream_width(self): stream_width = self.get_nodeattr("StreamWidth") - if axi_strm_padding is True: - stream_width = roundup_to_integer_multiple(stream_width, 8) return stream_width def strm_decl(self): diff --git a/src/finn/transformation/fpgadataflow/insert_fifo.py b/src/finn/transformation/fpgadataflow/insert_fifo.py index b80f2fbc1d9893d304f28c3494c44a69a1db052e..f66d0dc087ecbdd112422484ee1e01cb5ceef1c0 100644 --- a/src/finn/transformation/fpgadataflow/insert_fifo.py +++ b/src/finn/transformation/fpgadataflow/insert_fifo.py @@ -27,14 +27,20 @@ def _suitable_node(node): class InsertFIFO(Transformation): - """Ensure that the graph is terminated with a TLastMarker node, inserting - one if necessary.""" + """Inserting FIFOs in the beginning and end of the graph as well as + between fpgadataflow nodes. + + Takes the setting for the depth from the surrounding nodes by extracting + node attribute 'outFIFODepth' of the previous and node attribute 'inFIFODepth' + of the subsequent node. max() of these two values sets the FIFO depth. + + The other node attributes necessary to create a FIFO node are taking from the + node the FIFO node is inserted after: 'folded_shape' and 'dtype'""" def __init__(self): super().__init__() def apply(self, model): - # default depth for FIFOs graph = model.graph node_ind = -1 graph_modified = False @@ -50,10 +56,19 @@ class InsertFIFO(Transformation): fld_shape = n0.get_folded_output_shape() dtype = n0.get_output_datatype() + # check if folded_shape of output of first node and + # input of the second node is equal + n1 = getCustomOp(consumer) + assert ( + fld_shape == n1.get_folded_input_shape() + ), """The + folded output shape of the first node is not the same as the + folded output shape of the second node. A streaming fifo can't + be implemented in between these nodes.""" + # check if outFIFOdepth attribute of first node # and inFIFOdepth attribute of consumer node is equal n0_depth = n0.get_nodeattr("outFIFODepth") - n1 = getCustomOp(consumer) n1_depth = n1.get_nodeattr("inFIFODepth") if n0_depth == n1_depth: fifo_depth = n0_depth @@ -69,6 +84,7 @@ class InsertFIFO(Transformation): n0.get_normal_output_shape(), ) graph.value_info.append(fifo_output_tensor) + model.set_tensor_datatype(fifo_output_tensor.name, dtype) fifo_node = oh.make_node( "StreamingFIFO", @@ -104,6 +120,7 @@ class InsertFIFO(Transformation): n0.get_normal_input_shape(), ) graph.value_info.append(fifo_output_tensor) + model.set_tensor_datatype(fifo_output_tensor.name, dtype) fifo_node = oh.make_node( "StreamingFIFO", @@ -142,6 +159,7 @@ class InsertFIFO(Transformation): n0.get_normal_output_shape(), ) graph.value_info.append(fifo_input_tensor) + model.set_tensor_datatype(fifo_output_tensor.name, dtype) fifo_node = oh.make_node( "StreamingFIFO", diff --git a/tests/end2end/test_end2end_tfc_w1a1_throughput_test.py b/tests/end2end/test_end2end_tfc_w1a1_throughput_test.py index 26fd247aaf517158078a8cdbb577a55f1fdae6fa..6d72d9983d7a99d495b4e03e5ff0b5b633ee16ae 100644 --- a/tests/end2end/test_end2end_tfc_w1a1_throughput_test.py +++ b/tests/end2end/test_end2end_tfc_w1a1_throughput_test.py @@ -219,7 +219,7 @@ def test_end2end_tfc_w1a1_verify_dataflow_part(): ret_rtlsim_whole = execute_onnx(model, inp_dict, True) res_rtlsim_whole = ret_rtlsim_whole[out_name] assert np.isclose(res_npysim, res_rtlsim_nodebynode).all() - assert np.isclose(res_rtlsim_nodebynode, res_rtlsim_whole).all() + assert np.isclose(res_npysim, res_rtlsim_whole).all() def test_end2end_tfc_w1a1_verify_all():