diff --git a/finn-rtllib/memstream/hdl/Q_srl.v b/finn-rtllib/memstream/hdl/Q_srl.v index 80b015f6d4eb69df36831b25262cda3539ac8ae9..6c619c51ceb4a99a077fc61c52ce81763cfd27f5 100644 --- a/finn-rtllib/memstream/hdl/Q_srl.v +++ b/finn-rtllib/memstream/hdl/Q_srl.v @@ -193,7 +193,7 @@ module Q_srl (clock, reset, i_d, i_v, i_r, o_d, o_v, o_r, count); if (shift_en_) begin // synthesis loop_limit 256 for (a_=depth-2; a_>0; a_=a_-1) begin - srl[a_] <= srl[a_-1]; + srl[a_] = srl[a_-1]; end srl[0] <= i_d; end diff --git a/src/finn/analysis/fpgadataflow/hls_synth_res_estimation.py b/src/finn/analysis/fpgadataflow/hls_synth_res_estimation.py index c7db5b1d9d22ea89740f4c82633c96746a6fa5ee..958890f9e6a84d796ecb4a817dbf740c117ede0b 100644 --- a/src/finn/analysis/fpgadataflow/hls_synth_res_estimation.py +++ b/src/finn/analysis/fpgadataflow/hls_synth_res_estimation.py @@ -25,7 +25,7 @@ # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - +import warnings import os import xml.etree.ElementTree as ET @@ -50,9 +50,16 @@ def hls_synth_res_estimation(model): inst = registry.custom_op[op_type](node) code_gen_dir = inst.get_nodeattr("code_gen_dir_ipgen") if code_gen_dir == "": - raise Exception( - """Please run "CodeGen_ipgen" transformation and - "HLSSynth_IPGen" first to generate the report files""" + res_dict[node.name] = dict() + res_dict[node.name]["BRAM_18K"] = 0 + res_dict[node.name]["FF"] = 0 + res_dict[node.name]["LUT"] = 0 + res_dict[node.name]["DSP48E"] = 0 + res_dict[node.name]["URAM"] = 0 + warnings.warn( + """Could not find report files, values will be set to zero + for this node. Please run "CodeGen_ipgen" transformation and + "HLSSynth_IPGen" first to generate the report files""" ) else: xmlfile = "{}/project_{}/sol1/syn/report/{}_csynth.xml".format( @@ -67,9 +74,16 @@ def hls_synth_res_estimation(model): for child in item: res_dict[node.name][child.tag] = child.text else: - raise Exception( - """Please run "HLSSynth_IPGen" first - to generate the report files""" + res_dict[node.name] = dict() + res_dict[node.name]["BRAM_18K"] = 0 + res_dict[node.name]["FF"] = 0 + res_dict[node.name]["LUT"] = 0 + res_dict[node.name]["DSP48E"] = 0 + res_dict[node.name]["URAM"] = 0 + warnings.warn( + """Could not find report files, values will be set to zero + for this node. Please run "HLSSynth_IPGen" first + to generate the report files""" ) return res_dict diff --git a/src/finn/custom_op/fpgadataflow/__init__.py b/src/finn/custom_op/fpgadataflow/__init__.py index 4fc69985f7cdf09298f79055e159f63b2eabaf97..8d8d64b708117d48dda7a6bff8b35f4208e00dd1 100644 --- a/src/finn/custom_op/fpgadataflow/__init__.py +++ b/src/finn/custom_op/fpgadataflow/__init__.py @@ -428,11 +428,11 @@ compilation transformations? """Returns folded output shape (according to neuron folding), if implemented.""" raise Exception("get_folded_output_shape not implemented for this op") - def get_instream_width(self): + def get_instream_width(self, axi_strm_padding=False): """Returns input stream width, if implemented.""" raise Exception("get_instream_width not implemented for this op") - def get_outstream_width(self): + def get_outstream_width(self, axi_strm_padding=False): """Returns output stream width, if implemented.""" raise Exception("get_outstream_width not implemented for this op") diff --git a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py index 2ef5d350fb972e448b9a3745eb8c98197ab87d94..a695fe6df209bb3810664c2ce7af5410e03a077c 100644 --- a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py +++ b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py @@ -39,6 +39,7 @@ from finn.core.datatype import DataType from finn.custom_op.fpgadataflow import HLSCustomOp from finn.custom_op.im2col import compute_conv_output_dim from onnx import TensorProto, helper +from finn.util.basic import roundup_to_integer_multiple from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy # ONNX i/o tensor shape assumptions for ConvolutionInputGenerator: @@ -140,20 +141,23 @@ class ConvolutionInputGenerator(HLSCustomOp): """Returns FINN DataType of output.""" return DataType[self.get_nodeattr("outputDataType")] - def get_instream_width(self): + def get_instream_width(self, axi_strm_padding=False): """Returns stream width, input and output stream width are equal for the sliding window function""" ibits = self.get_input_datatype().bitwidth() simd = self.get_nodeattr("SIMD") ifm_ch = self.get_nodeattr("IFMChannels") assert simd == ifm_ch, "SWG currently requires SIMD=IFM" - return simd * ibits + in_width = simd * ibits + if axi_strm_padding is True: + in_width = roundup_to_integer_multiple(in_width, 8) + return in_width - def get_outstream_width(self): + def get_outstream_width(self, axi_strm_padding=False): """Returns stream width, input and output stream width are equal for the sliding window function, so the function to determine the input stream width can be reused.""" - return self.get_instream_width() + return self.get_instream_width(axi_strm_padding) def get_number_output_values(self): folded_oshape = self.get_folded_output_shape() diff --git a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py index 5e4c99aa41216b05f66da8341870269c620c6c40..1a9ee1118596a95b624258d3ee8fe4c37a71edde 100644 --- a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py +++ b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py @@ -36,6 +36,7 @@ except ModuleNotFoundError: from finn.custom_op.fpgadataflow import HLSCustomOp from finn.core.datatype import DataType from onnx import TensorProto, helper +from finn.util.basic import roundup_to_integer_multiple from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy # does not do anything at the ONNX node-by-node level, and input-output @@ -154,11 +155,17 @@ class StreamingDataWidthConverter_Batch(HLSCustomOp): folded_ishape = self.get_folded_input_shape() return np.prod(folded_ishape[:-1]) - def get_instream_width(self): - return self.get_nodeattr("inWidth") - - def get_outstream_width(self): - return self.get_nodeattr("outWidth") + def get_instream_width(self, axi_strm_padding=False): + in_width = self.get_nodeattr("inWidth") + if axi_strm_padding is True: + in_width = roundup_to_integer_multiple(in_width, 8) + return in_width + + def get_outstream_width(self, axi_strm_padding=False): + out_width = self.get_nodeattr("outWidth") + if axi_strm_padding is True: + out_width = roundup_to_integer_multiple(out_width, 8) + return out_width def make_shape_compatible_op(self, model): exp_ishape = self.get_normal_input_shape() diff --git a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py index 7784024aae102989338df9b040fcfc1f9dc36983..f00c19ff1a7d2758af0e3320677b32b87279082a 100644 --- a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py +++ b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py @@ -40,7 +40,10 @@ except ModuleNotFoundError: from onnx import TensorProto, helper from finn.core.datatype import DataType from finn.custom_op.fpgadataflow import HLSCustomOp -from finn.util.basic import interleave_matrix_outer_dim_from_partitions +from finn.util.basic import ( + interleave_matrix_outer_dim_from_partitions, + roundup_to_integer_multiple, +) from finn.util.data_packing import ( npy_to_rtlsim_input, numpy_to_hls_code, @@ -260,19 +263,28 @@ class StreamingFCLayer_Batch(HLSCustomOp): """Returns FINN DataType of output.""" return DataType[self.get_nodeattr("outputDataType")] - def get_instream_width(self): + def get_instream_width(self, axi_strm_padding=False): i_bits = self.get_input_datatype().bitwidth() - return i_bits * self.get_nodeattr("SIMD") + in_width = i_bits * self.get_nodeattr("SIMD") + if axi_strm_padding is True: + in_width = roundup_to_integer_multiple(in_width, 8) + return in_width - def get_outstream_width(self): + def get_outstream_width(self, axi_strm_padding=False): o_bits = self.get_output_datatype().bitwidth() - return o_bits * self.get_nodeattr("PE") + out_width = o_bits * self.get_nodeattr("PE") + if axi_strm_padding is True: + out_width = roundup_to_integer_multiple(out_width, 8) + return out_width - def get_weightstream_width(self): + def get_weightstream_width(self, axi_strm_padding=False): pe = self.get_nodeattr("PE") simd = self.get_nodeattr("SIMD") wp = self.get_weight_datatype().bitwidth() - return pe * simd * wp + w_width = pe * simd * wp + if axi_strm_padding is True: + w_width = roundup_to_integer_multiple(w_width, 8) + return w_width def get_ap_int_max_w(self): temp_value = super().get_ap_int_max_w() @@ -981,18 +993,12 @@ class StreamingFCLayer_Batch(HLSCustomOp): self.code_gen_dict["$LAYER_NAME$"] = [ "{}_{}".format(self.onnx_node.name, self.onnx_node.name) ] - # make instream width a multiple of 8 for axi interface - in_width = self.get_instream_width() - if in_width % 8 != 0: - in_width = math.floor(in_width / 8) + 8 + in_width = self.get_instream_width(axi_strm_padding=True) self.code_gen_dict["$IN_RANGE$"] = ["[{}:0]".format(in_width - 1)] self.code_gen_dict["$OUT_RANGE$"] = [ - "[{}:0]".format(self.get_outstream_width() - 1) + "[{}:0]".format(self.get_outstream_width(axi_strm_padding=True) - 1) ] - # make weight stream width a multiple of 8 for axi interface - weight_width = self.get_weightstream_width() - if weight_width % 8 != 0: - weight_width = math.floor(weight_width / 8) + 8 + weight_width = self.get_weightstream_width(axi_strm_padding=True) self.code_gen_dict["$WEIGHT_RANGE$"] = ["[{}:0]".format(weight_width - 1)] self.code_gen_dict["$WEIGHT_WIDTH$"] = [str(weight_width)] mw = self.get_nodeattr("MW") diff --git a/src/finn/custom_op/fpgadataflow/streamingfifo.py b/src/finn/custom_op/fpgadataflow/streamingfifo.py new file mode 100644 index 0000000000000000000000000000000000000000..8fcb1fe43a3927a7d49b6e041727a54cc384942f --- /dev/null +++ b/src/finn/custom_op/fpgadataflow/streamingfifo.py @@ -0,0 +1,293 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import os +import numpy as np +from shutil import copy +import subprocess + +from pyverilator import PyVerilator +from finn.custom_op.fpgadataflow import HLSCustomOp +from finn.core.datatype import DataType +from onnx import TensorProto, helper +from finn.util.basic import roundup_to_integer_multiple +from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy + +from . import templates + + +class StreamingFIFO(HLSCustomOp): + def __init__(self, onnx_node): + super().__init__(onnx_node) + self.strm_fifo_wrapper = templates.strm_fifo_wrapper + + def get_nodeattr_types(self): + my_attrs = { + # FIFO depth + "depth": ("i", True, 0), + # folded shape of input/output + "folded_shape": ("ints", True, []), + # FINN DataTypes for inputs/outputs + "dataType": ("s", True, ""), + } + my_attrs.update(super().get_nodeattr_types()) + + return my_attrs + + def make_shape_compatible_op(self, model): + exp_ishape = self.get_normal_input_shape() + oshape = self.get_normal_output_shape() + ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0])) + assert ishape == tuple(exp_ishape), "Unexpect input shape for StreamingFIFO." + # implement tensor with correct shape + values = np.random.randn(*oshape).astype(np.float32) + return helper.make_node( + "Constant", + inputs=[], + outputs=[self.onnx_node.output[0]], + value=helper.make_tensor( + name="const_tensor", + data_type=TensorProto.FLOAT, + dims=values.shape, + vals=values.flatten().astype(float), + ), + ) + + def infer_node_datatype(self, model): + node = self.onnx_node + # data type stays the same + dtype = model.get_tensor_datatype(node.input[0]) + model.set_tensor_datatype(node.output[0], dtype) + + def verify_node(self): + pass + + def code_generation_ipgen(self, model, fpgapart, clk): + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + # copy Q_srl.v from finn-rtllib to code gen directory + memstream_dir = "/workspace/finn/finn-rtllib/memstream/hdl/" + Q_file = os.path.join(memstream_dir, "Q_srl.v") + copy(Q_file, code_gen_dir) + + # empty code gen dictionary for new entries + self.code_gen_dict.clear() + self.code_gen_dict["$TOPNAME$"] = ["{}".format(self.onnx_node.name)] + self.code_gen_dict["$LAYER_NAME$"] = [ + "{}_{}".format(self.onnx_node.name, self.onnx_node.name) + ] + # make instream width a multiple of 8 for axi interface + in_width = self.get_instream_width(axi_strm_padding=True) + self.code_gen_dict["$IN_RANGE$"] = ["[{}:0]".format(in_width - 1)] + self.code_gen_dict["$OUT_RANGE$"] = ["[{}:0]".format(in_width - 1)] + self.code_gen_dict["$WIDTH$"] = [str(in_width)] + self.code_gen_dict["$DEPTH$"] = [str(self.get_nodeattr("depth"))] + + template = self.strm_fifo_wrapper + + for key in self.code_gen_dict: + # transform list into long string separated by '\n' + code_gen_line = "\n".join(self.code_gen_dict[key]) + template = template.replace(key, code_gen_line) + f = open(os.path.join(code_gen_dir, "{}.v".format(self.onnx_node.name)), "w",) + f.write(template) + f.close() + self.code_gen_dict.clear() + + def ipgen_singlenode_code(self): + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + # prepare the IP packaging tcl template + template = templates.ip_package_tcl + self.code_gen_dict.clear() + self.code_gen_dict["$TOPNAME$"] = ["{}".format(self.onnx_node.name)] + self.code_gen_dict["$VERILOG_DIR$"] = [code_gen_dir] + for key in self.code_gen_dict: + # transform list into long string separated by '\n' + code_gen_line = "\n".join(self.code_gen_dict[key]) + template = template.replace(key, code_gen_line) + f = open(os.path.join(code_gen_dir, "package_ip.tcl"), "w") + f.write(template) + f.close() + # create a shell script and call Vivado to invoke the IP pkg script + make_project_sh = code_gen_dir + "/make_ip.sh" + working_dir = os.environ["PWD"] + with open(make_project_sh, "w") as f: + f.write("#!/bin/bash \n") + f.write("cd {}\n".format(code_gen_dir)) + f.write("vivado -mode batch -source package_ip.tcl\n") + f.write("cd {}\n".format(working_dir)) + bash_command = ["bash", make_project_sh] + process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE) + process_compile.communicate() + # set ipgen_path and ip_path to point to the new packaged IP + self.set_nodeattr("ipgen_path", code_gen_dir) + self.set_nodeattr("ip_path", code_gen_dir) + vlnv = "xilinx.com:hls:%s:1.0" % (self.onnx_node.name) + self.set_nodeattr("ip_vlnv", vlnv) + self.code_gen_dict.clear() + + def get_normal_input_shape(self): + depth = self.get_nodeattr("depth") + assert ( + depth >= 2 + ), """Depth is too low. Please set node attribute "depth" to a value + between 2 and 256""" + assert ( + depth <= 256 + ), """Depth is too high. Please set node attribute "depth" to a value + between 2 and 256""" + folded_shape = self.get_nodeattr("folded_shape") + inner_dim = folded_shape[-1] + folding_factor = folded_shape[-2] * inner_dim + normal_ishape = [] + for i in range(len(folded_shape) - 2): + normal_ishape.append(folded_shape[i]) + normal_ishape.append(folding_factor) + + return normal_ishape + + def get_normal_output_shape(self): + return self.get_normal_input_shape() + + def get_folded_input_shape(self): + return self.get_nodeattr("folded_shape") + + def get_folded_output_shape(self): + return self.get_nodeattr("folded_shape") + + def get_instream_width(self, axi_strm_padding=False): + dtype = DataType[self.get_nodeattr("dataType")] + folded_shape = self.get_nodeattr("folded_shape") + in_width = folded_shape[-1] * dtype.bitwidth() + if axi_strm_padding is True: + in_width = roundup_to_integer_multiple(in_width, 8) + return in_width + + def get_outstream_width(self, axi_strm_padding=False): + dtype = DataType[self.get_nodeattr("dataType")] + folded_shape = self.get_nodeattr("folded_shape") + in_width = folded_shape[-1] * dtype.bitwidth() + if axi_strm_padding is True: + in_width = roundup_to_integer_multiple(in_width, 8) + return in_width + + def execute_node(self, context, graph): + mode = self.get_nodeattr("exec_mode") + node = self.onnx_node + inp = context[node.input[0]] + exp_shape = self.get_normal_input_shape() + + if mode == "npysim": + output = inp + output = np.asarray([output], dtype=np.float32).reshape(*exp_shape) + context[node.output[0]] = output + elif mode == "rtlsim": + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + # create a npy file for the input of the node + assert ( + str(inp.dtype) == "float32" + ), """Input datatype is + not float32 as expected.""" + expected_inp_shape = self.get_folded_input_shape() + reshaped_input = inp.reshape(expected_inp_shape) + if DataType[self.get_nodeattr("dataType")] == DataType.BIPOLAR: + # store bipolar activations as binary + reshaped_input = (reshaped_input + 1) / 2 + export_idt = DataType.BINARY + else: + export_idt = DataType[self.get_nodeattr("dataType")] + # make copy before saving the array + reshaped_input = reshaped_input.copy() + np.save( + os.path.join(code_gen_dir, "input_0.npy"), reshaped_input, + ) + verilog_file = os.path.join( + code_gen_dir, "{}.v".format(self.onnx_node.name) + ) + if os.path.isfile(verilog_file): + nbits = self.get_instream_width(axi_strm_padding=True) + inp = npy_to_rtlsim_input( + "{}/input_0.npy".format(code_gen_dir), export_idt, nbits + ) + sim = PyVerilator.build(verilog_file, verilog_path=[code_gen_dir],) + super().reset_rtlsim(sim) + super().toggle_clk(sim) + output = self.rtlsim(sim, inp) + odt = DataType[self.get_nodeattr("dataType")] + target_bits = odt.bitwidth() + packed_bits = self.get_outstream_width(axi_strm_padding=True) + out_npy_path = "{}/output.npy".format(code_gen_dir) + out_shape = self.get_folded_output_shape() + rtlsim_output_to_npy( + output, out_npy_path, odt, out_shape, packed_bits, target_bits + ) + + # load and reshape output + output = np.load(out_npy_path) + oshape = self.get_normal_output_shape() + output = np.asarray([output], dtype=np.float32).reshape(*oshape) + context[node.output[0]] = output + + else: + raise Exception( + """Found no verilog files for this node, + did you run the codegen_ipgen transformation?""" + ) + + def get_number_output_values(self): + folded_oshape = self.get_folded_output_shape() + return np.prod(folded_oshape[:-1]) + + def get_number_input_values(self): + folded_ishape = self.get_folded_input_shape() + return np.prod(folded_ishape[:-1]) + + def global_includes(self): + pass + + def defines(self, var): + pass + + def read_npy_data(self): + pass + + def strm_decl(self): + pass + + def docompute(self): + pass + + def dataoutstrm(self): + pass + + def save_as_npy(self): + pass + + def blackboxfunction(self): + pass + + def pragmas(self): + pass diff --git a/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py b/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py index a7c2d5166b6af41327abcfeaa5cb5ae25fd23856..5e77a60de07e0b6de5c001f6e889476f496db50f 100644 --- a/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py +++ b/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py @@ -37,6 +37,7 @@ from finn.custom_op.fpgadataflow import HLSCustomOp from finn.custom_op.im2col import compute_conv_output_dim from finn.core.datatype import DataType from onnx import TensorProto, helper +from finn.util.basic import roundup_to_integer_multiple from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy @@ -91,14 +92,17 @@ class StreamingMaxPool_Batch(HLSCustomOp): folded_oshape = self.get_folded_output_shape() return np.prod(folded_oshape[:-1]) - def get_instream_width(self): + def get_instream_width(self, axi_strm_padding=False): dt_bits = self.get_input_datatype().bitwidth() ifm_ch = self.get_nodeattr("NumChannels") - return int(dt_bits * ifm_ch) + in_width = int(dt_bits * ifm_ch) + if axi_strm_padding is True: + in_width = roundup_to_integer_multiple(in_width, 8) + return in_width - def get_outstream_width(self): + def get_outstream_width(self, axi_strm_padding=False): """For streaming maxpool out stream with is the same as in stream width""" - return self.get_instream_width() + return self.get_instream_width(axi_strm_padding) def make_shape_compatible_op(self, model): exp_ishape = self.get_normal_input_shape() diff --git a/src/finn/custom_op/fpgadataflow/templates.py b/src/finn/custom_op/fpgadataflow/templates.py index 5323aac2e344fb8b3c1166e695753e68a435b08f..c53a17aafc496a2ffb6dd8009f8bbf7358b90737 100644 --- a/src/finn/custom_op/fpgadataflow/templates.py +++ b/src/finn/custom_op/fpgadataflow/templates.py @@ -415,3 +415,43 @@ ipx::create_xgui_files [ipx::current_core] ipx::update_checksums [ipx::current_core] ipx::save_core [ipx::current_core] """ + +strm_fifo_wrapper = """ +module $TOPNAME$( +ap_clk, +ap_rst_n, +in0_V_V_TDATA, +in0_V_V_TVALID, +in0_V_V_TREADY, +out_V_V_TDATA, +out_V_V_TVALID, +out_V_V_TREADY +); + +input ap_clk; +input ap_rst_n; +input $IN_RANGE$ in0_V_V_TDATA; +input in0_V_V_TVALID; +output in0_V_V_TREADY; +output $OUT_RANGE$ out_V_V_TDATA; +output out_V_V_TVALID; +input out_V_V_TREADY; + +Q_srl #( +.depth($DEPTH$), +.width($WIDTH$) +) +$LAYER_NAME$ +( + .clock(ap_clk), + .reset(!ap_rst_n), + .i_d(in0_V_V_TDATA), + .i_v(in0_V_V_TVALID), + .i_r(in0_V_V_TREADY), + .o_d(out_V_V_TDATA), + .o_v(out_V_V_TVALID), + .o_r(out_V_V_TREADY) +); + +endmodule +""" diff --git a/src/finn/custom_op/fpgadataflow/tlastmarker.py b/src/finn/custom_op/fpgadataflow/tlastmarker.py index 4d4dee6506f04909c53cd05e4898a7ad77e4a83a..a04b2a886984f3f98bd765ce617be6ca7c0170a8 100644 --- a/src/finn/custom_op/fpgadataflow/tlastmarker.py +++ b/src/finn/custom_op/fpgadataflow/tlastmarker.py @@ -27,6 +27,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from finn.custom_op.fpgadataflow import HLSCustomOp +from finn.util.basic import roundup_to_integer_multiple class TLastMarker(HLSCustomOp): @@ -133,12 +134,16 @@ class TLastMarker(HLSCustomOp): def get_folded_output_shape(self): return self.get_folded_input_shape() - def get_instream_width(self): + def get_instream_width(self, axi_strm_padding=False): stream_width = self.get_nodeattr("StreamWidth") + if axi_strm_padding is True: + stream_width = roundup_to_integer_multiple(stream_width, 8) return stream_width - def get_outstream_width(self): + def get_outstream_width(self, axi_strm_padding=False): stream_width = self.get_nodeattr("StreamWidth") + if axi_strm_padding is True: + stream_width = roundup_to_integer_multiple(stream_width, 8) return stream_width def strm_decl(self): diff --git a/src/finn/custom_op/registry.py b/src/finn/custom_op/registry.py index c797affff9dbf1310c413db0847e0e2dae222a97..411311c2b9def953ee5ac6d03adfafb81704c177 100644 --- a/src/finn/custom_op/registry.py +++ b/src/finn/custom_op/registry.py @@ -33,6 +33,7 @@ from finn.custom_op.fpgadataflow.convolutioninputgenerator import ( ) from finn.custom_op.fpgadataflow.streamingfclayer_batch import StreamingFCLayer_Batch from finn.custom_op.fpgadataflow.streamingmaxpool_batch import StreamingMaxPool_Batch +from finn.custom_op.fpgadataflow.streamingfifo import StreamingFIFO from finn.custom_op.im2col import Im2Col from finn.custom_op.fpgadataflow.tlastmarker import TLastMarker from finn.custom_op.multithreshold import MultiThreshold @@ -56,6 +57,7 @@ custom_op["TLastMarker"] = TLastMarker custom_op["StreamingDataflowPartition"] = StreamingDataflowPartition custom_op["MaxPoolNHWC"] = MaxPoolNHWC custom_op["StreamingDataWidthConverter_Batch"] = StreamingDataWidthConverter_Batch +custom_op["StreamingFIFO"] = StreamingFIFO def getCustomOp(node): diff --git a/src/finn/transformation/fpgadataflow/insert_fifo.py b/src/finn/transformation/fpgadataflow/insert_fifo.py new file mode 100644 index 0000000000000000000000000000000000000000..c7efb95c8df4fbe83c210f7a3f0832f3e2a3d18d --- /dev/null +++ b/src/finn/transformation/fpgadataflow/insert_fifo.py @@ -0,0 +1,149 @@ +from onnx import TensorProto +from onnx import helper as oh + +from finn.custom_op.registry import getCustomOp +from finn.transformation import Transformation +from finn.util.fpgadataflow import is_fpgadataflow_node + + +def _is_fifo_node(node): + if node.op_type == "StreamingFIFO": + return True + else: + return False + + +def _suitable_node(node): + if node is not None: + if is_fpgadataflow_node(node) is True: + if _is_fifo_node(node) is False: + return True + else: + return False + else: + return False + else: + return False + + +class InsertFIFO(Transformation): + """Ensure that the graph is terminated with a TLastMarker node, inserting + one if necessary.""" + + def __init__(self): + super().__init__() + + def apply(self, model): + # default depth for FIFOs + default_depth = 2 + graph = model.graph + node_ind = -1 + graph_modified = False + for n in graph.node: + node_ind += 1 + if _suitable_node(n): + n_output = n.output[0] + consumer = model.find_consumer(n_output) + if _suitable_node(consumer) is True: + graph_modified = True + n0 = getCustomOp(n) + # determine fifo node attributes + fld_shape = n0.get_folded_output_shape() + dtype = n0.get_output_datatype() + + # create fifo node + fifo_output_tensor = oh.make_tensor_value_info( + model.make_new_valueinfo_name(), + TensorProto.FLOAT, + n0.get_normal_output_shape(), + ) + graph.value_info.append(fifo_output_tensor) + + fifo_node = oh.make_node( + "StreamingFIFO", + [n_output], + [fifo_output_tensor.name], + domain="finn", + backend="fpgadataflow", + depth=default_depth, + folded_shape=fld_shape, + dataType=str(dtype.name), + ) + # insert fifo + graph.node.insert(node_ind + 1, fifo_node) + + # set fifo output tensor as new input tensor of second node + consumer.input[0] = fifo_output_tensor.name + + if graph_modified is False: + # insert FIFO as first node + if graph.node[0].op_type != "StreamingFIFO": + n = graph.node[0] + n_input = n.input[0] + n0 = getCustomOp(n) + # determine fifo node attributes + fld_shape = n0.get_folded_input_shape() + dtype = n0.get_input_datatype() + + # create fifo node + fifo_output_tensor = oh.make_tensor_value_info( + model.make_new_valueinfo_name(), + TensorProto.FLOAT, + n0.get_normal_input_shape(), + ) + graph.value_info.append(fifo_output_tensor) + + fifo_node = oh.make_node( + "StreamingFIFO", + [n_input], + [fifo_output_tensor.name], + domain="finn", + backend="fpgadataflow", + depth=default_depth, + folded_shape=fld_shape, + dataType=str(dtype.name), + ) + # insert fifo + graph.node.insert(0, fifo_node) + + # set fifo output tensor as new input tensor of second node + n.input[0] = fifo_output_tensor.name + + # insert FIFO as first node + if graph.node[-1].op_type != "StreamingFIFO": + n = graph.node[-1] + assert ( + n.op_type != "TLastMarker" + ), """Insert tlast marker should be done + after inserting the FIFOs""" + graph_out_name = graph.output[0].name + n0 = getCustomOp(n) + # determine fifo node attributes + fld_shape = n0.get_folded_output_shape() + dtype = n0.get_output_datatype() + + # create fifo node + fifo_input_tensor = oh.make_tensor_value_info( + model.make_new_valueinfo_name(), + TensorProto.FLOAT, + n0.get_normal_output_shape(), + ) + graph.value_info.append(fifo_input_tensor) + + fifo_node = oh.make_node( + "StreamingFIFO", + [fifo_input_tensor.name], + [graph_out_name], + domain="finn", + backend="fpgadataflow", + depth=default_depth, + folded_shape=fld_shape, + dataType=str(dtype.name), + ) + # insert fifo + graph.node.append(fifo_node) + + # set fifo output tensor as new input tensor of second node + n.output[0] = fifo_input_tensor.name + + return (model, graph_modified) diff --git a/src/finn/transformation/fpgadataflow/templates.py b/src/finn/transformation/fpgadataflow/templates.py index 81cb954bb4503c8daf18bad5881661018e9d17b7..4a7845ee4f6f43edb067351352925d6c8bcb4fce 100644 --- a/src/finn/transformation/fpgadataflow/templates.py +++ b/src/finn/transformation/fpgadataflow/templates.py @@ -85,6 +85,7 @@ pynq_driver_template = """ from pynq import Overlay import numpy as np from pynq import allocate +import time from finn.util.data_packing import ( finnpy_to_packed_bytearray, packed_bytearray_to_finnpy @@ -129,12 +130,21 @@ np.copyto(ibuf_packed_device, ibuf_packed) # allocate a PYNQ buffer for the returned packed output buffer obuf_packed = allocate(shape=oshape_packed, dtype=np.uint8) +# measure runtime of network +start = time.time() + # set up the DMA and wait until all transfers complete dma.sendchannel.transfer(ibuf_packed_device) dma.recvchannel.transfer(obuf_packed) dma.sendchannel.wait() dma.recvchannel.wait() +end = time.time() +runtime = end - start +file = open("nw_runtime.txt", "w") +file.write(str(runtime)) +file.close() + # unpack the packed output buffer from accelerator obuf_folded = packed_bytearray_to_finnpy( obuf_packed, odt, oshape_folded, reverse_endian=True, reverse_inner=True diff --git a/tests/end2end/test_end2end_tfc_w1a1.py b/tests/end2end/test_end2end_tfc_w1a1_throughput_test.py similarity index 93% rename from tests/end2end/test_end2end_tfc_w1a1.py rename to tests/end2end/test_end2end_tfc_w1a1_throughput_test.py index 03d6f92f1c148ce444f08fd65a867ad9390a18fd..946e84f2ab386f2046cc4756d37a2438ed05238b 100644 --- a/tests/end2end/test_end2end_tfc_w1a1.py +++ b/tests/end2end/test_end2end_tfc_w1a1_throughput_test.py @@ -55,6 +55,7 @@ from finn.transformation.fpgadataflow.create_dataflow_partition import ( from finn.transformation.fpgadataflow.hlssynth_ipgen import HLSSynth_IPGen from finn.transformation.fpgadataflow.insert_dwc import InsertDWC from finn.transformation.fpgadataflow.insert_tlastmarker import InsertTLastMarker +from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO from finn.transformation.fpgadataflow.make_deployment import DeployToPYNQ from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver from finn.transformation.fpgadataflow.make_pynq_proj import MakePYNQProject @@ -132,22 +133,37 @@ def test_end2end_tfc_w1a1_fold_and_tlastmarker(): fc1w = getCustomOp(fc_layers[1]) fc2w = getCustomOp(fc_layers[2]) fc3w = getCustomOp(fc_layers[3]) - fc0w.set_nodeattr("inFIFODepth", 50) - fc0w.set_nodeattr("SIMD", 16) + fc0w.set_nodeattr("inFIFODepth", 256) + fc0w.set_nodeattr("SIMD", 196) fc0w.set_nodeattr("PE", 16) - fc0w.set_nodeattr("outFIFODepth", 4) - fc1w.set_nodeattr("SIMD", 8) - fc1w.set_nodeattr("PE", 8) - fc1w.set_nodeattr("outFIFODepth", 4) + fc0w.set_nodeattr("outFIFODepth", 64) + fc1w.set_nodeattr("SIMD", 16) + fc1w.set_nodeattr("PE", 16) + fc1w.set_nodeattr("outFIFODepth", 64) fc2w.set_nodeattr("SIMD", 16) fc2w.set_nodeattr("PE", 16) - fc2w.set_nodeattr("outFIFODepth", 4) + fc2w.set_nodeattr("outFIFODepth", 64) fc3w.set_nodeattr("SIMD", 16) fc3w.set_nodeattr("PE", 10) - fc3w.set_nodeattr("outFIFODepth", 50) + fc3w.set_nodeattr("outFIFODepth", 10) model = model.transform(InsertDWC()) + model = model.transform(InsertFIFO()) model = model.transform(InsertTLastMarker()) model = model.transform(GiveUniqueNodeNames()) + fifos = [] + for n in model.graph.node: + if n.op_type == "StreamingFIFO": + fifos.append(n) + fifo0 = getCustomOp(fifos[0]) + fifo1 = getCustomOp(fifos[1]) + fifo2 = getCustomOp(fifos[2]) + fifo3 = getCustomOp(fifos[3]) + fifo4 = getCustomOp(fifos[4]) + fifo0.set_nodeattr("depth", 256) + fifo1.set_nodeattr("depth", 64) + fifo2.set_nodeattr("depth", 64) + fifo3.set_nodeattr("depth", 64) + fifo4.set_nodeattr("depth", 10) model = model.transform(AnnotateResources("estimate")) model.save(build_dir + "/end2end_tfc_w1a1_folded.onnx") @@ -195,7 +211,7 @@ def test_end2end_tfc_w1a1_verify_dataflow_part(): ret_rtlsim_whole = execute_onnx(model, inp_dict, True) res_rtlsim_whole = ret_rtlsim_whole[out_name] assert np.isclose(res_npysim, res_rtlsim_nodebynode).all() - assert np.isclose(res_npysim, res_rtlsim_whole).all() + assert np.isclose(res_rtlsim_nodebynode, res_rtlsim_whole).all() def test_end2end_tfc_w1a1_verify_all(): diff --git a/tests/fpgadataflow/test_fpgadataflow_fifo.py b/tests/fpgadataflow/test_fpgadataflow_fifo.py new file mode 100644 index 0000000000000000000000000000000000000000..b561e8cc2f851b7f7a2a61b245d05bb98afc3f2e --- /dev/null +++ b/tests/fpgadataflow/test_fpgadataflow_fifo.py @@ -0,0 +1,107 @@ +import pytest +import os + +from onnx import TensorProto, helper + +from finn.core.datatype import DataType +from finn.core.modelwrapper import ModelWrapper +from finn.transformation.fpgadataflow.codegen_ipgen import CodeGen_ipgen +from finn.transformation.fpgadataflow.codegen_ipstitch import CodeGen_ipstitch + +from finn.transformation.fpgadataflow.hlssynth_ipgen import HLSSynth_IPGen + +# from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode +from finn.transformation.general import GiveUniqueNodeNames + +# from finn.util.basic import gen_finn_dt_tensor + +# import finn.core.onnx_exec as oxe +from finn.transformation.fpgadataflow.insert_tlastmarker import InsertTLastMarker +from finn.transformation.fpgadataflow.make_deployment import DeployToPYNQ +from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver +from finn.transformation.fpgadataflow.make_pynq_proj import MakePYNQProject +from finn.transformation.fpgadataflow.replace_verilog_relpaths import ( + ReplaceVerilogRelPaths, +) +from finn.transformation.fpgadataflow.synth_pynq_proj import SynthPYNQProject +from finn.util.basic import pynq_part_map + + +build_dir = "/tmp/" + os.environ["FINN_INST_NAME"] +test_pynq_board = os.getenv("PYNQ_BOARD", default="Pynq-Z1") +test_fpga_part = pynq_part_map[test_pynq_board] +target_clk_ns = 5 + + +def make_single_fifo_modelwrapper(Shape, Depth, fld_shape, finn_dtype): + + inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, Shape) + outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, Shape) + + FIFO_node = helper.make_node( + "StreamingFIFO", + ["inp"], + ["outp"], + domain="finn", + backend="fpgadataflow", + depth=Depth, + folded_shape=fld_shape, + dataType=str(finn_dtype.name), + ) + + graph = helper.make_graph( + nodes=[FIFO_node], name="fifo_graph", inputs=[inp], outputs=[outp] + ) + + model = helper.make_model(graph, producer_name="fifo-model") + model = ModelWrapper(model) + + model.set_tensor_datatype("inp", finn_dtype) + model.set_tensor_datatype("outp", finn_dtype) + + return model + + +def prepare_inputs(input_tensor, dt): + return {"inp": input_tensor} + + +# shape +@pytest.mark.parametrize("Shape", [[1, 128]]) +# inWidth +@pytest.mark.parametrize("folded_shape", [[1, 1, 128]]) +# outWidth +@pytest.mark.parametrize("depth", [256]) +# finn_dtype +@pytest.mark.parametrize("finn_dtype", [DataType.BIPOLAR]) # , DataType.INT2]) +def test_fpgadataflow_fifo_rtlsim(Shape, folded_shape, depth, finn_dtype): + + # generate input data + # x = gen_finn_dt_tensor(finn_dtype, Shape) + # input_dict = prepare_inputs(x, finn_dtype) + + model = make_single_fifo_modelwrapper(Shape, depth, folded_shape, finn_dtype) + + # model = model.transform(SetExecMode("rtlsim")) + model = model.transform(InsertTLastMarker()) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(CodeGen_ipgen(test_fpga_part, target_clk_ns)) + model = model.transform(HLSSynth_IPGen()) + model = model.transform(ReplaceVerilogRelPaths()) + model = model.transform(CodeGen_ipstitch(test_fpga_part)) + model = model.transform(MakePYNQProject(test_pynq_board)) + model = model.transform(SynthPYNQProject()) + model = model.transform(MakePYNQDriver()) + ip = os.environ["PYNQ_IP"] + username = os.getenv("PYNQ_USERNAME", "xilinx") + password = os.getenv("PYNQ_PASSWORD", "xilinx") + target_dir = os.getenv("PYNQ_TARGET_DIR", "/home/xilinx/finn") + model = model.transform(DeployToPYNQ(ip, username, password, target_dir)) + + # y = oxe.execute_onnx(model, input_dict)["outp"] + + # assert ( + # y == x + # ).all(), """The output values are not the same as the + # input values anymore.""" + # assert y.shape == tuple(Shape), """The output shape is incorrect."""