diff --git a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py index a3aa9d570d0efcbe82090d19a151d4f5b12078b6..a80d2bbefac96e8ec2a48e04179d3d285e78cef7 100644 --- a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py +++ b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py @@ -78,24 +78,33 @@ class StreamingDataWidthConverter_Batch(HLSCustomOp): def check_divisible_iowidths(self): impl_style = self.get_nodeattr("impl_style") - if impl_style == "hls": - # when using impl_style = hls must have the following - # if inWidth > outWidth: inWidth % outWidth = 0 - # if inWidth < outWidth: outWidth % inWidth = 0 - iwidth = self.get_nodeattr("inWidth") - owidth = self.get_nodeattr("outWidth") - if iwidth > owidth: - assert ( - iwidth % owidth == 0 - ), """DWC InWidth is bigger than OutWidth and is not divisible by it. - Please adjust PE and SIMD values so that InWidth % OutWidth = 0 - or alternatively use impl_style = vivado""" - else: - assert ( - owidth % iwidth == 0 - ), """DWC OutWidth is bigger than InWidth and is not divisible by it. - Please adjust PE and SIMD values so that OutWidth % InWidth = 0 - or alternatively use impl_style = vivado""" + iwidth = self.get_nodeattr("inWidth") + owidth = self.get_nodeattr("outWidth") + if impl_style == "vivado": + # the AXIS IP we use in vivado mode only supports + # stream widths that are divisible by 8 + iwidth_d8 = iwidth % 8 == 0 + owidth_d8 = owidth % 8 == 0 + assert ( + iwidth_d8 and owidth_d8 + ), """DWC impl_style=vivado requires + stream widths that are divisible by 8: (%d, %d)""" % ( + iwidth, + owidth, + ) + + def get_iowidth_lcm(self): + iwidth = self.get_nodeattr("inWidth") + owidth = self.get_nodeattr("outWidth") + return int(np.lcm(iwidth, owidth)) + + def needs_lcm(self): + iwidth = self.get_nodeattr("inWidth") + owidth = self.get_nodeattr("outWidth") + maxwidth = max(iwidth, owidth) + minwidth = min(iwidth, owidth) + impl_style = self.get_nodeattr("impl_style") + return (impl_style == "hls") and (maxwidth % minwidth != 0) def get_folded_input_shape(self, ind=0): self.check_divisible_iowidths() @@ -202,6 +211,16 @@ class StreamingDataWidthConverter_Batch(HLSCustomOp): "#define NumInWords %d " % numInWords, "#define numReps %d" % numReps, ] + if self.needs_lcm(): + lcmWidth = self.get_iowidth_lcm() + assert ( + numInWords % (lcmWidth / inWidth) == 0 + ), "Error in DWC LCM calculation" + numLCMToOut = numInWords // (lcmWidth / inWidth) + self.code_gen_dict["$DEFINES$"].append("#define LCMWidth %d" % lcmWidth) + self.code_gen_dict["$DEFINES$"].append( + "#define NumLCMToOut %d" % (numLCMToOut) + ) def read_npy_data(self): code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") @@ -226,6 +245,12 @@ class StreamingDataWidthConverter_Batch(HLSCustomOp): self.code_gen_dict["$STREAMDECLARATIONS$"].append( 'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width()) ) + if self.needs_lcm(): + self.code_gen_dict["$STREAMDECLARATIONS$"].append( + 'hls::stream<ap_uint<{}>> intermediate ("intermediate");'.format( + self.get_iowidth_lcm() + ) + ) self.code_gen_dict["$STREAMDECLARATIONS$"].append( 'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width()) ) @@ -233,9 +258,19 @@ class StreamingDataWidthConverter_Batch(HLSCustomOp): def docompute(self): # TODO continue with fxns below, they are copy-pasted op = "StreamingDataWidthConverter_Batch" - self.code_gen_dict["$DOCOMPUTE$"] = [ - "%s<InWidth, OutWidth, NumInWords>(in0, out, numReps);" % (op) - ] + if self.needs_lcm(): + self.code_gen_dict["$DOCOMPUTE$"] = [ + 'hls::stream<ap_uint<{}>> intermediate ("intermediate");'.format( + self.get_iowidth_lcm() + ), + "%s<InWidth, LCMWidth, NumInWords>(in0, intermediate, numReps);" % (op), + "%s<LCMWidth, OutWidth, NumLCMToOut>(intermediate, out, numReps);" + % (op), + ] + else: + self.code_gen_dict["$DOCOMPUTE$"] = [ + "%s<InWidth, OutWidth, NumInWords>(in0, out, numReps);" % (op) + ] def dataoutstrm(self): code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") @@ -287,6 +322,10 @@ class StreamingDataWidthConverter_Batch(HLSCustomOp): self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE ap_ctrl_none port=return" ) + if self.needs_lcm(): + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS DATAFLOW disable_start_propagation" + ) def execute_node(self, context, graph): mode = self.get_nodeattr("exec_mode") @@ -466,3 +505,28 @@ class StreamingDataWidthConverter_Batch(HLSCustomOp): cset_luts += outw return int(cnt_luts + cset_luts) + + def prepare_rtlsim(self): + assert self.get_nodeattr("impl_style") != "vivado", ( + "StreamingDataWidthConverter impl_style " + "cannot be vivado for rtlsim. Only impl_style=rtl supported." + ) + super().prepare_rtlsim() + + def code_generation_ipgen(self, model, fpgapart, clk): + # no codegen required for impl_style=vivado since + # that uses premade, configurable AXIS IP + if self.get_nodeattr("impl_style") == "hls": + super().code_generation_ipgen(model, fpgapart, clk) + + def ipgen_singlenode_code(self): + # no IP generation required for impl_style=vivado since + # that uses premade, configurable AXIS IP + if self.get_nodeattr("impl_style") == "hls": + super().ipgen_singlenode_code() + else: + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + # set ipgen_path and ip_path so that HLSSynthIP + # and CreatedStitchedIP transformations do not complain + self.set_nodeattr("ipgen_path", code_gen_dir) + self.set_nodeattr("ip_path", code_gen_dir) diff --git a/src/finn/transformation/fpgadataflow/insert_dwc.py b/src/finn/transformation/fpgadataflow/insert_dwc.py index efc179923545eb06e4d173c683b0941887f8bb79..632d1f813b4d2509407930bc9294f7531d4c90af 100644 --- a/src/finn/transformation/fpgadataflow/insert_dwc.py +++ b/src/finn/transformation/fpgadataflow/insert_dwc.py @@ -83,10 +83,13 @@ class InsertDWC(Transformation): dwc_out_width = n1.get_instream_width() larger_width = max(dwc_in_width, dwc_out_width) smaller_width = min(dwc_in_width, dwc_out_width) - if larger_width % smaller_width == 0: - impl_style = "hls" - else: + both_8bit_aligned = (larger_width % 8 == 0) and ( + smaller_width % 8 == 0 + ) + if both_8bit_aligned: impl_style = "vivado" + else: + impl_style = "hls" # determine shape for dwc dwc_shape = n0.get_normal_output_shape() diff --git a/src/finn/util/create.py b/src/finn/util/create.py index 642cabcf6dd320f226539e03fd6800156c9fe852..ed3e1a843eca47d2e20e9ca1c9df0d2d6f5a8a13 100644 --- a/src/finn/util/create.py +++ b/src/finn/util/create.py @@ -30,7 +30,11 @@ import numpy as np from onnx import TensorProto, helper from qonnx.core.datatype import DataType from qonnx.core.modelwrapper import ModelWrapper -from qonnx.util.basic import calculate_signed_dot_prod_range, gen_finn_dt_tensor, qonnx_make_model +from qonnx.util.basic import ( + calculate_signed_dot_prod_range, + gen_finn_dt_tensor, + qonnx_make_model, +) def hls_random_mlp_maker(layer_spec): diff --git a/tests/fpgadataflow/test_depthwise_convolution.py b/tests/fpgadataflow/test_depthwise_convolution.py index caa22e077f58035ac2acf0a3455ae08a1163cabc..8ab22bcfdcb0312bd49677f0e00d8e97cdcad3c1 100644 --- a/tests/fpgadataflow/test_depthwise_convolution.py +++ b/tests/fpgadataflow/test_depthwise_convolution.py @@ -37,7 +37,11 @@ from qonnx.custom_op.general.im2col import compute_conv_output_dim from qonnx.custom_op.registry import getCustomOp from qonnx.transformation.general import GiveUniqueNodeNames from qonnx.transformation.infer_shapes import InferShapes -from qonnx.util.basic import calculate_signed_dot_prod_range, gen_finn_dt_tensor, qonnx_make_model +from qonnx.util.basic import ( + calculate_signed_dot_prod_range, + gen_finn_dt_tensor, + qonnx_make_model, +) import finn.core.onnx_exec as oxe from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim diff --git a/tests/fpgadataflow/test_fpgadataflow_dwc.py b/tests/fpgadataflow/test_fpgadataflow_dwc.py index 104bfa011fae03aeab0880709763d1b098bbbaa5..2bde148a1499e4c7065ab1e151e3c4198e1e96da 100644 --- a/tests/fpgadataflow/test_fpgadataflow_dwc.py +++ b/tests/fpgadataflow/test_fpgadataflow_dwc.py @@ -35,16 +35,16 @@ from qonnx.transformation.general import GiveUniqueNodeNames from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model import finn.core.onnx_exec as oxe +from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP +from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO from finn.transformation.fpgadataflow.prepare_ip import PrepareIP -from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim -from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode -def make_single_dwc_modelwrapper(Shape, INWidth, OUTWidth, finn_dtype): +def make_single_dwc_modelwrapper(shape, inWidth, outWidth, finn_dtype, impl_style): - inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, Shape) - outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, Shape) + inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, shape) + outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, shape) DWC_node = helper.make_node( "StreamingDataWidthConverter_Batch", @@ -52,10 +52,11 @@ def make_single_dwc_modelwrapper(Shape, INWidth, OUTWidth, finn_dtype): ["outp"], domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", - shape=Shape, - inWidth=INWidth, - outWidth=OUTWidth, + shape=shape, + inWidth=inWidth, + outWidth=outWidth, dataType=str(finn_dtype.name), + impl_style=impl_style, ) graph = helper.make_graph( @@ -75,34 +76,42 @@ def prepare_inputs(input_tensor, dt): return {"inp": input_tensor} -# shape -@pytest.mark.parametrize("Shape", [[1, 4], [1, 2, 8]]) -# inWidth -@pytest.mark.parametrize("INWidth", [2, 4]) -# outWidth -@pytest.mark.parametrize("OUTWidth", [2, 4]) -# finn_dtype -@pytest.mark.parametrize("finn_dtype", [DataType["BIPOLAR"], DataType["INT2"]]) +@pytest.mark.parametrize( + "config", + [ + ([1, 24], 6, 4, DataType["INT2"], "hls"), + ([1, 24], 4, 6, DataType["INT2"], "hls"), + ([1, 4], 2, 4, DataType["BIPOLAR"], "hls"), + ([1, 2, 8], 2, 4, DataType["BIPOLAR"], "hls"), + ([1, 4], 4, 2, DataType["INT2"], "hls"), + ([1, 2, 8], 4, 4, DataType["INT2"], "hls"), + ([1, 2, 8], 8, 16, DataType["INT2"], "vivado"), + ], +) @pytest.mark.fpgadataflow @pytest.mark.slow @pytest.mark.vivado -def test_fpgadataflow_dwc_rtlsim(Shape, INWidth, OUTWidth, finn_dtype): - +def test_fpgadataflow_dwc_rtlsim(config): + shape, inWidth, outWidth, finn_dtype, impl_style = config + test_fpga_part = "xc7z020clg400-1" + target_clk_ns = 10.0 # generate input data - x = gen_finn_dt_tensor(finn_dtype, Shape) + x = gen_finn_dt_tensor(finn_dtype, shape) input_dict = prepare_inputs(x, finn_dtype) - model = make_single_dwc_modelwrapper(Shape, INWidth, OUTWidth, finn_dtype) - - model = model.transform(SetExecMode("rtlsim")) + model = make_single_dwc_modelwrapper( + shape, inWidth, outWidth, finn_dtype, impl_style + ) + model = model.transform(InsertFIFO(create_shallow_fifos=True)) model = model.transform(GiveUniqueNodeNames()) - model = model.transform(PrepareIP("xc7z020clg400-1", 5)) + model = model.transform(PrepareIP(test_fpga_part, 5)) model = model.transform(HLSSynthIP()) - model = model.transform(PrepareRTLSim()) + model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns)) + model.set_metadata_prop("exec_mode", "rtlsim") y = oxe.execute_onnx(model, input_dict)["outp"] assert ( y == x ).all(), """The output values are not the same as the input values anymore.""" - assert y.shape == tuple(Shape), """The output shape is incorrect.""" + assert y.shape == tuple(shape), """The output shape is incorrect.""" diff --git a/tests/fpgadataflow/test_fpgadataflow_mvau.py b/tests/fpgadataflow/test_fpgadataflow_mvau.py index f3efd6a686b630cc4031b99efd199490e481aeab..b80ef76a19e487a93b23ae7db17350e85fb66822 100644 --- a/tests/fpgadataflow/test_fpgadataflow_mvau.py +++ b/tests/fpgadataflow/test_fpgadataflow_mvau.py @@ -36,7 +36,11 @@ from qonnx.core.modelwrapper import ModelWrapper from qonnx.custom_op.general.multithreshold import multithreshold from qonnx.custom_op.registry import getCustomOp from qonnx.transformation.general import GiveUniqueNodeNames -from qonnx.util.basic import calculate_signed_dot_prod_range, gen_finn_dt_tensor, qonnx_make_model +from qonnx.util.basic import ( + calculate_signed_dot_prod_range, + gen_finn_dt_tensor, + qonnx_make_model, +) import finn.core.onnx_exec as oxe from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer