diff --git a/docker/finn_entrypoint.sh b/docker/finn_entrypoint.sh index 0e8988d5392810b08ed647fc0466699425430e12..0aea65fdd7999b56989239685f6606a8e1b2e618 100644 --- a/docker/finn_entrypoint.sh +++ b/docker/finn_entrypoint.sh @@ -12,8 +12,8 @@ gecho () { # checkout the correct dependency repo commits # the repos themselves are cloned in the Dockerfile -FINN_BASE_COMMIT=951d5e9dd25b7f38731fa539959667a86e7091b2 -BREVITAS_COMMIT=6ffefa8dbf37fdb0f44c994f34604c29fadb16b0 +FINN_BASE_COMMIT=f2e5f0582ef2b7cbc134168993816c337ca8d3a6 +BREVITAS_COMMIT=b75e0408d9759ed519296e3af29b9c16fb94b0b8 CNPY_COMMIT=4e8810b1a8637695171ed346ce68f6984e585ef4 HLSLIB_COMMIT=cfafe11a93b79ab1af7529d68f08886913a6466e PYVERILATOR_COMMIT=06c29ecf3ba0361e3d0a75c98f6918ba67bf0e27 diff --git a/docs/finn/internals.rst b/docs/finn/internals.rst index dee62f09a9253380e05300dac8fa34915c20dab5..4cbf671235cbe61b7afcba9979c1259ecddf35a0 100644 --- a/docs/finn/internals.rst +++ b/docs/finn/internals.rst @@ -23,7 +23,9 @@ This behavior can be disabled (not recommended!) by setting the environment vari Custom Operations/Nodes ======================= -FINN uses many custom operations (op_type in ONNX NodeProto) that are not defined in the ONNX operator schema. These custom nodes are marked with domain="finn" in the protobuf to identify them as such. These nodes can represent specific operations that we need for low-bit networks, or operations that are specific to a particular hardware backend. To get more familiar with custom operations and how they are created, please take a look in the Jupyter notebook about CustomOps (see chapter :ref:`tutorials` for details) or directly in the module :py:mod:`finn.custom_op`. +FINN uses many custom operations (op_type in ONNX NodeProto) that are not defined in the ONNX operator schema. These custom nodes are marked with domain="finn.*" in the protobuf to identify them as such. These nodes can represent specific operations that we need for low-bit networks, or operations that are specific to a particular hardware backend. To get more familiar with custom operations and how they are created, please take a look in the Jupyter notebook about CustomOps (see chapter :ref:`tutorials` for details) or directly in the module :py:mod:`finn.custom_op`. + +.. note:: See the description of `this PR <https://github.com/Xilinx/finn-base/pull/6>`_ for more on how the operator wrapper library is organized. Custom ONNX Execution Flow ========================== diff --git a/docs/finn/source_code/finn.custom_op.rst b/docs/finn/source_code/finn.custom_op.rst index 72dd4beb90e87d527543ab11ac1ce1d6ac0604b3..8c43ddb424b5f690a0c266c4f31ab95dfa77e480 100644 --- a/docs/finn/source_code/finn.custom_op.rst +++ b/docs/finn/source_code/finn.custom_op.rst @@ -24,7 +24,7 @@ Base Class finn.custom\_op.im2col ----------------------------- -.. automodule:: finn.custom_op.im2col +.. automodule:: finn.custom_op.general.im2col :members: :undoc-members: :show-inheritance: @@ -32,7 +32,7 @@ finn.custom\_op.im2col finn.custom\_op.maxpoolnhwc ---------------------------------- -.. automodule:: finn.custom_op.maxpoolnhwc +.. automodule:: finn.custom_op.general.maxpoolnhwc :members: :undoc-members: :show-inheritance: @@ -40,7 +40,7 @@ finn.custom\_op.maxpoolnhwc finn.custom\_op.multithreshold ------------------------------------- -.. automodule:: finn.custom_op.multithreshold +.. automodule:: finn.custom_op.general.multithreshold :members: :undoc-members: :show-inheritance: @@ -56,7 +56,7 @@ finn.custom\_op.registry finn.custom\_op.streamingdataflowpartition ------------------------------------------------- -.. automodule:: finn.custom_op.streamingdataflowpartition +.. automodule:: finn.custom_op.general.streamingdataflowpartition :members: :undoc-members: :show-inheritance: @@ -64,7 +64,7 @@ finn.custom\_op.streamingdataflowpartition finn.custom\_op.xnorpopcount ----------------------------------- -.. automodule:: finn.custom_op.xnorpopcount +.. automodule:: finn.custom_op.general.xnorpopcount :members: :undoc-members: :show-inheritance: diff --git a/notebooks/advanced/1_custom_transformation_pass.ipynb b/notebooks/advanced/1_custom_transformation_pass.ipynb index 9c54d6f26913e558867b2f800b424f4157f47491..9d9bc74633975076b9464dcc38da920204f05c06 100644 --- a/notebooks/advanced/1_custom_transformation_pass.ipynb +++ b/notebooks/advanced/1_custom_transformation_pass.ipynb @@ -398,7 +398,7 @@ " if is_fpgadataflow_node(node) is True:\n", " try:\n", " # lookup op_type in registry of CustomOps\n", - " inst = registry.custom_op[op_type](node)\n", + " inst = registry.getCustomOp(node)\n", " # ensure that code is generated\n", " assert (\n", " inst.get_nodeattr(\"code_gen_dir_cppsim\") != \"\"\n", diff --git a/notebooks/end2end_example/tfc_end2end_verification.ipynb b/notebooks/end2end_example/tfc_end2end_verification.ipynb index 92de7fb7e42b5d0013af31cc0fd88e34d354def8..54738c3725c0141fddc3497dee024ca90db3f3ce 100644 --- a/notebooks/end2end_example/tfc_end2end_verification.ipynb +++ b/notebooks/end2end_example/tfc_end2end_verification.ipynb @@ -128,7 +128,7 @@ } ], "source": [ - "from finn.custom_op.xnorpopcount import xnorpopcountmatmul\n", + "from finn.custom_op.general.xnorpopcount import xnorpopcountmatmul\n", "showSrc(xnorpopcountmatmul)" ] }, diff --git a/src/finn/analysis/fpgadataflow/exp_cycles_per_layer.py b/src/finn/analysis/fpgadataflow/exp_cycles_per_layer.py index 201333aebdb3fc1d15464389e37326dcaf6848e0..0fcf2e382561852eb1c0b02e1d417db05057655c 100644 --- a/src/finn/analysis/fpgadataflow/exp_cycles_per_layer.py +++ b/src/finn/analysis/fpgadataflow/exp_cycles_per_layer.py @@ -41,8 +41,7 @@ def exp_cycles_per_layer(model): cycle_dict = {} for node in model.graph.node: if is_fpgadataflow_node(node) is True: - op_type = node.op_type - inst = registry.custom_op[op_type](node) + inst = registry.getCustomOp(node) cycle_dict[node.name] = inst.get_exp_cycles() return cycle_dict diff --git a/src/finn/analysis/fpgadataflow/hls_synth_res_estimation.py b/src/finn/analysis/fpgadataflow/hls_synth_res_estimation.py index 03b31b9c1ec51b45e17152d35d5824b6137ab4a2..39d6332aa42594528fbd5a04dd5efad2c3237e77 100644 --- a/src/finn/analysis/fpgadataflow/hls_synth_res_estimation.py +++ b/src/finn/analysis/fpgadataflow/hls_synth_res_estimation.py @@ -51,8 +51,7 @@ def hls_synth_res_estimation(model): res_dict[node.name]["LUT"] = 0 res_dict[node.name]["DSP48E"] = 0 res_dict[node.name]["URAM"] = 0 - op_type = node.op_type - inst = registry.custom_op[op_type](node) + inst = registry.getCustomOp(node) code_gen_dir = inst.get_nodeattr("code_gen_dir_ipgen") if code_gen_dir == "": warnings.warn( diff --git a/src/finn/analysis/fpgadataflow/res_estimation.py b/src/finn/analysis/fpgadataflow/res_estimation.py index e52557573dab072709da4452f4e2d477e99b98c9..2c714b1f12b75e9789f1865d6737422f4d9d9a97 100644 --- a/src/finn/analysis/fpgadataflow/res_estimation.py +++ b/src/finn/analysis/fpgadataflow/res_estimation.py @@ -41,8 +41,45 @@ def res_estimation(model): res_dict = {} for node in model.graph.node: if is_fpgadataflow_node(node) is True: - op_type = node.op_type - inst = registry.custom_op[op_type](node) + inst = registry.getCustomOp(node) res_dict[node.name] = inst.node_res_estimation() return res_dict + + +def res_estimation_complete(model): + """Estimates the resources needed for the given model and all values for + resource-related switches. + Ensure that all nodes have unique names (by calling the GiveUniqueNodeNames + transformation) prior to calling this analysis pass to ensure all nodes are + visible in the results. + + Returns {node name : [resource estimation(s)]}.""" + + res_dict = {} + for node in model.graph.node: + if is_fpgadataflow_node(node) is True: + op_type = node.op_type + inst = registry.getCustomOp(node) + if op_type == "StreamingFCLayer_Batch" or op_type == "Vector_Vector_Activate_Batch": + orig_restype = inst.get_nodeattr("resType") + res_dict[node.name] = [] + inst.set_nodeattr("resType", "dsp") + res_dict[node.name].append(inst.node_res_estimation()) + inst.set_nodeattr("resType", "lut") + res_dict[node.name].append(inst.node_res_estimation()) + inst.set_nodeattr("resType", orig_restype) + elif op_type == "ConvolutionInputGenerator": + orig_ramstyle = inst.get_nodeattr("ram_style") + res_dict[node.name] = [] + inst.set_nodeattr("ram_style", "block") + res_dict[node.name].append(inst.node_res_estimation()) + inst.set_nodeattr("ram_style", "distributed") + res_dict[node.name].append(inst.node_res_estimation()) + inst.set_nodeattr("ram_style", "ultra") + res_dict[node.name].append(inst.node_res_estimation()) + inst.set_nodeattr("ram_style", orig_ramstyle) + else: + res_dict[node.name] = [inst.node_res_estimation()] + + return res_dict diff --git a/src/finn/analysis/verify_custom_nodes.py b/src/finn/analysis/verify_custom_nodes.py index 0e05022dd0cb72291128259b983513322524b9da..9af1e9a4fe83de24f64a7e9df535bcf78f5fc234 100644 --- a/src/finn/analysis/verify_custom_nodes.py +++ b/src/finn/analysis/verify_custom_nodes.py @@ -27,6 +27,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import finn.custom_op.registry as registry +from finn.util.basic import is_finn_op def verify_nodes(model): @@ -39,9 +40,9 @@ def verify_nodes(model): verification_dict = {} for node in model.graph.node: - if node.domain == "finn": + if is_finn_op(node.domain): op_type = node.op_type - inst = registry.custom_op[op_type](node) + inst = registry.getCustomOp(node) verification_dict[op_type] = inst.verify_node() return verification_dict diff --git a/src/finn/custom_op/__init__.py b/src/finn/custom_op/__init__.py deleted file mode 100644 index 06fc7e5659d8f55f63fe40380abac70dc74c0a4d..0000000000000000000000000000000000000000 --- a/src/finn/custom_op/__init__.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright (c) 2020, Xilinx -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of FINN nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -from pkgutil import extend_path - -__path__ = extend_path(__path__, __name__) - -from finn.custom_op.registry import custom_op - -# make sure new CustomOp subclasses are imported here so that they get -# registered and plug in correctly into the infrastructure -from finn.custom_op.fpgadataflow.convolutioninputgenerator import ( - ConvolutionInputGenerator, -) -from finn.custom_op.fpgadataflow.downsampler import DownSampler -from finn.custom_op.fpgadataflow.streamingfclayer_batch import StreamingFCLayer_Batch -from finn.custom_op.fpgadataflow.streamingmaxpool_batch import StreamingMaxPool_Batch -from finn.custom_op.fpgadataflow.streamingfifo import StreamingFIFO -from finn.custom_op.fpgadataflow.tlastmarker import TLastMarker -from finn.custom_op.fpgadataflow.streamingdatawidthconverter_batch import ( - StreamingDataWidthConverter_Batch, -) -from finn.custom_op.fpgadataflow.globalaccpool_batch import GlobalAccPool_Batch -from finn.custom_op.fpgadataflow.pool_batch import Pool_Batch -from finn.custom_op.fpgadataflow.fmpadding_batch import FMPadding_Batch -from finn.custom_op.fpgadataflow.thresholding_batch import Thresholding_Batch -from finn.custom_op.fpgadataflow.addstreams_batch import AddStreams_Batch -from finn.custom_op.fpgadataflow.labelselect_batch import LabelSelect_Batch -from finn.custom_op.fpgadataflow.duplicatestreams_batch import DuplicateStreams_Batch -from finn.custom_op.fpgadataflow.vector_vector_activate_batch import ( - Vector_Vector_Activate_Batch, -) -from finn.custom_op.fpgadataflow.channelwise_op_batch import ChannelwiseOp_Batch -from finn.custom_op.fpgadataflow.iodma import IODMA - - -custom_op["DownSampler"] = DownSampler -custom_op["StreamingMaxPool_Batch"] = StreamingMaxPool_Batch -custom_op["StreamingFCLayer_Batch"] = StreamingFCLayer_Batch -custom_op["ConvolutionInputGenerator"] = ConvolutionInputGenerator -custom_op["TLastMarker"] = TLastMarker -custom_op["StreamingDataWidthConverter_Batch"] = StreamingDataWidthConverter_Batch -custom_op["StreamingFIFO"] = StreamingFIFO -custom_op["GlobalAccPool_Batch"] = GlobalAccPool_Batch -custom_op["Pool_Batch"] = Pool_Batch -custom_op["FMPadding_Batch"] = FMPadding_Batch -custom_op["Thresholding_Batch"] = Thresholding_Batch -custom_op["AddStreams_Batch"] = AddStreams_Batch -custom_op["LabelSelect_Batch"] = LabelSelect_Batch -custom_op["DuplicateStreams_Batch"] = DuplicateStreams_Batch -custom_op["Vector_Vector_Activate_Batch"] = Vector_Vector_Activate_Batch -custom_op["ChannelwiseOp_Batch"] = ChannelwiseOp_Batch -custom_op["IODMA"] = IODMA diff --git a/src/finn/custom_op/fpgadataflow/__init__.py b/src/finn/custom_op/fpgadataflow/__init__.py index a0c10f08c017db78c8aff284a7e07fa1c26d466e..068950b89ae543f5a37c28d83d87ecfa605eaab4 100644 --- a/src/finn/custom_op/fpgadataflow/__init__.py +++ b/src/finn/custom_op/fpgadataflow/__init__.py @@ -25,601 +25,49 @@ # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# namespace package, extend path -from pkgutil import extend_path -__path__ = extend_path(__path__, __name__) - -from abc import abstractmethod -import numpy as np -import os -import subprocess -from finn.custom_op.base import CustomOp -from finn.util.basic import ( - CppBuilder, - make_build_dir, - roundup_to_integer_multiple, - get_rtlsim_trace_depth, +from finn.custom_op.fpgadataflow.convolutioninputgenerator import ( + ConvolutionInputGenerator, ) -from finn.util.fpgadataflow import ( - IPGenBuilder, - pyverilate_get_liveness_threshold_cycles, - rtlsim_multi_io, +from finn.custom_op.fpgadataflow.downsampler import DownSampler +from finn.custom_op.fpgadataflow.streamingfclayer_batch import StreamingFCLayer_Batch +from finn.custom_op.fpgadataflow.streamingmaxpool_batch import StreamingMaxPool_Batch +from finn.custom_op.fpgadataflow.streamingfifo import StreamingFIFO +from finn.custom_op.fpgadataflow.tlastmarker import TLastMarker +from finn.custom_op.fpgadataflow.streamingdatawidthconverter_batch import ( + StreamingDataWidthConverter_Batch, ) -from . import templates - -try: - from pyverilator import PyVerilator -except ModuleNotFoundError: - PyVerilator = None - - -class HLSCustomOp(CustomOp): - """HLSCustomOp class all custom ops that correspond to a finn-hlslib - function are based on. Contains different functions every fpgadataflow - custom node should have. Some as abstract methods, these have to be filled - when writing a new fpgadataflow custom op node.""" - - def __init__(self, onnx_node): - super().__init__(onnx_node) - - self.code_gen_dict = {} - - # getting templates from templates.py - - # template for single node execution - self.docompute_template = templates.docompute_template - - # templates for single node ip generation - # cpp file - self.ipgen_template = templates.ipgen_template - # tcl script - self.ipgentcl_template = templates.ipgentcl_template - - def get_nodeattr_types(self): - return { - "backend": ("s", True, "fpgadataflow"), - "code_gen_dir_cppsim": ("s", False, ""), - "code_gen_dir_ipgen": ("s", False, ""), - "executable_path": ("s", False, ""), - "ipgen_path": ("s", False, ""), - "ip_path": ("s", False, ""), - "ip_vlnv": ("s", False, ""), - "exec_mode": ("s", False, ""), - "cycles_rtlsim": ("i", False, 0), - "cycles_estimate": ("i", False, 0), - "rtlsim_trace": ("s", False, ""), - "res_estimate": ("s", False, ""), - "res_hls": ("s", False, ""), - "res_synth": ("s", False, ""), - "rtlsim_so": ("s", False, ""), - # partitioning info - "partition_id": ("i", False, 0), - # input and output FIFO depths - "inFIFODepth": ("i", False, 2), - "outFIFODepth": ("i", False, 2), - } - - def get_verilog_top_module_name(self): - "Return the Verilog top module name for this node." - - node = self.onnx_node - prefixed_top_name = "%s_%s" % (node.name, node.name) - return prefixed_top_name - - def get_verilog_top_module_intf_names(self): - """Return a dict of names of input and output interfaces. - The keys reflect the protocols each interface implements: - 'clk', 'rst', 'm_axis', 's_axis', 'aximm', 'axilite'. - Values are lists of names: - 's_axis' names correspond to the list of node inputs in order, - 'm_axis' names correspond to the list of node outputs in order' - Each block must have at most one aximm and one axilite.""" - intf_names = {} - intf_names["clk"] = ["ap_clk"] - intf_names["rst"] = ["ap_rst_n"] - intf_names["s_axis"] = [("in0_V_V", self.get_instream_width_padded())] - intf_names["m_axis"] = [("out_V_V", self.get_outstream_width_padded())] - intf_names["aximm"] = [] - intf_names["axilite"] = [] - return intf_names - - def get_verilog_top_filename(self): - "Return the Verilog top module filename for this node." - - verilog_file = "{}/project_{}/sol1/impl/verilog/{}.v".format( - self.get_nodeattr("code_gen_dir_ipgen"), - self.onnx_node.name, - self.get_verilog_top_module_name(), - ) - return verilog_file - - def get_all_verilog_paths(self): - "Return list of all folders containing Verilog code for this node." - - code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") - assert ( - code_gen_dir != "" - ), """Node attribute "code_gen_dir_ipgen" is - not set. Please run HLSSynthIP first.""" - verilog_path = "{}/project_{}/sol1/impl/verilog/".format( - code_gen_dir, self.onnx_node.name - ) - # default impl only returns the HLS verilog codegen dir - return [verilog_path] - - def get_all_verilog_filenames(self): - "Return list of all Verilog files used for this node." - - verilog_files = [] - verilog_paths = self.get_all_verilog_paths() - for verilog_path in verilog_paths: - for f in os.listdir(verilog_path): - if f.endswith(".v"): - verilog_files += [f] - return verilog_files - - def prepare_rtlsim(self): - """Creates a Verilator emulation library for the RTL code generated - for this node, sets the rtlsim_so attribute to its path and returns - a PyVerilator wrapper around it.""" - - if PyVerilator is None: - raise ImportError("Installation of PyVerilator is required.") - verilog_paths = self.get_all_verilog_paths() - verilog_files = self.get_all_verilog_filenames() - # build the Verilator emu library - sim = PyVerilator.build( - verilog_files, - build_dir=make_build_dir("pyverilator_" + self.onnx_node.name + "_"), - verilog_path=verilog_paths, - trace_depth=get_rtlsim_trace_depth(), - top_module_name=self.get_verilog_top_module_name(), - ) - # save generated lib filename in attribute - self.set_nodeattr("rtlsim_so", sim.lib._name) - return sim - - def get_rtlsim(self): - """Return a PyVerilator wrapper for the Verilator emulation library - for this node.""" - - rtlsim_so = self.get_nodeattr("rtlsim_so") - assert os.path.isfile(rtlsim_so), "Cannot find rtlsim library." - # create PyVerilator wrapper - sim = PyVerilator(rtlsim_so) - return sim - - def node_res_estimation(self): - """Returns summarized resource estimation of BRAMs and LUTs - of the node as a dictionary.""" - ret = dict() - ret["BRAM_18K"] = self.bram_estimation() - ret["BRAM_efficiency"] = self.bram_efficiency_estimation() - ret["LUT"] = self.lut_estimation() - return ret - - def bram_efficiency_estimation(self): - """Function for BRAM efficiency estimation: actual parameter storage - needed divided by the allocated BRAM storage (from estimation)""" - return 1 - - def bram_estimation(self): - """Function for BRAM resource estimation, is member function of - HLSCustomOp class but has to be filled by every node""" - return 0 - - def lut_estimation(self): - """Function for LUT resource estimation, is member function of - HLSCustomOp class but has to be filled by every node""" - return 0 - - def get_exp_cycles(self): - """Function for estimation of expected cycles for set folding, - is member function of HLSCustomOp class but has to be filled - by every node""" - return 0 - - def code_generation_ipgen(self, model, fpgapart, clk): - """Generates c++ code and tcl script for ip generation.""" - node = self.onnx_node - - # generate top cpp file for ip generation - path = self.get_nodeattr("code_gen_dir_ipgen") - self.code_gen_dict["$AP_INT_MAX_W$"] = [str(self.get_ap_int_max_w())] - self.generate_params(model, path) - self.global_includes() - self.defines("ipgen") - self.blackboxfunction() - self.pragmas() - self.docompute() - - template = self.ipgen_template - - for key in self.code_gen_dict: - # transform list into long string separated by '\n' - code_gen_line = "\n".join(self.code_gen_dict[key]) - template = template.replace(key, code_gen_line) - code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") - f = open(os.path.join(code_gen_dir, "top_{}.cpp".format(node.name)), "w") - f.write(template) - f.close() - self.code_gen_dict.clear() - - # generate tcl script for ip generation - self.code_gen_dict["$PROJECTNAME$"] = ["project_{}".format(node.name)] - self.code_gen_dict["$HWSRCDIR$"] = [code_gen_dir] - self.code_gen_dict["$FPGAPART$"] = [fpgapart] - self.code_gen_dict["$FINNHLSLIBDIR$"] = ["/workspace/finn-hlslib"] - self.code_gen_dict["$TOPFXN$"] = [node.name] - self.code_gen_dict["$CLKPERIOD$"] = [str(clk)] - self.code_gen_dict["$EXTRA_DIRECTIVES$"] = self.ipgen_extra_directives() - - template = self.ipgentcl_template - - for key in self.code_gen_dict: - # transform list into long string separated by '\n' - code_gen_line = "\n".join(self.code_gen_dict[key]) - template = template.replace(key, code_gen_line) - code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") - f = open(os.path.join(code_gen_dir, "hls_syn_{}.tcl".format(node.name)), "w") - f.write(template) - f.close() - self.code_gen_dict.clear() - - def ipgen_extra_directives(self): - "Return a list of extra tcl directives for HLS synthesis." - return [] - - def ipgen_singlenode_code(self): - """Builds the bash script for ip generation using the IPGenBuilder from - finn.util.fpgadataflow.""" - node = self.onnx_node - code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") - builder = IPGenBuilder() - builder.append_tcl(code_gen_dir + "/hls_syn_{}.tcl".format(node.name)) - builder.set_ipgen_path(code_gen_dir + "/project_{}".format(node.name)) - builder.build(code_gen_dir) - self.set_nodeattr("ipgen_path", builder.ipgen_path) - self.set_nodeattr("ip_path", builder.ipgen_path + "/sol1/impl/ip") - vlnv = "xilinx.com:hls:%s:1.0" % node.name - self.set_nodeattr("ip_vlnv", vlnv) - - def code_generation_cppsim(self, model): - """Generates c++ code for simulation (cppsim).""" - node = self.onnx_node - path = self.get_nodeattr("code_gen_dir_cppsim") - self.code_gen_dict["$AP_INT_MAX_W$"] = [str(self.get_ap_int_max_w())] - self.generate_params(model, path) - self.global_includes() - self.defines("cppsim") - self.read_npy_data() - self.strm_decl() - self.pragmas() - self.docompute() - self.dataoutstrm() - self.save_as_npy() - - template = self.docompute_template - - for key in self.code_gen_dict: - # transform list into long string separated by '\n' - code_gen_line = "\n".join(self.code_gen_dict[key]) - template = template.replace(key, code_gen_line) - code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") - f = open(os.path.join(code_gen_dir, "execute_{}.cpp".format(node.op_type)), "w") - f.write(template) - f.close() - self.code_gen_dict.clear() - - def code_generation_ipi(self): - """Constructs and returns the TCL for node instantiation in Vivado IPI.""" - vlnv = self.get_nodeattr("ip_vlnv") - cmd = ["create_bd_cell -type ip -vlnv %s %s" % (vlnv, self.onnx_node.name)] - return cmd - - def compile_singlenode_code(self): - """Builds the bash script for compilation using the CppBuilder from - finn.util.basic and executes the script to produce the executable.""" - code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") - builder = CppBuilder() - # to enable additional debug features please uncommand the next line - # builder.append_includes("-DDEBUG") - builder.append_includes("-I/workspace/finn/src/finn/qnn-data/cpp") - builder.append_includes("-I/workspace/cnpy/") - builder.append_includes("-I/workspace/finn-hlslib") - builder.append_includes("-I{}/include".format(os.environ["VIVADO_PATH"])) - builder.append_includes("--std=c++11") - builder.append_includes("-O3") - builder.append_sources(code_gen_dir + "/*.cpp") - builder.append_sources("/workspace/cnpy/cnpy.cpp") - builder.append_includes("-lz") - builder.set_executable_path(code_gen_dir + "/node_model") - builder.build(code_gen_dir) - self.set_nodeattr("executable_path", builder.executable_path) - - def dynamic_input_to_npy(self, context, count): - """Saves input (given context) into .npy files. - - Count indicates the number of inputs that have to be saved.""" - node = self.onnx_node - code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") - if code_gen_dir == "": - raise Exception( - """ -Found no codegen dir for this node, did you run the prepare_cppsim transformation? - """ - ) - # create a npy file for each input of the node (in_ind is input index) - # assuming dynamic inputs start from 0 - for in_ind in range(count): - current_input_name = node.input[in_ind] - # make copy before saving array - input_array = context[current_input_name].copy() - np.save( - os.path.join(code_gen_dir, "input_{}.npy".format(in_ind)), input_array - ) - - def npy_to_dynamic_output(self, context): - """Reads the output from an output.npy file generated from cppsim and - places its content into the context dictionary.""" - node = self.onnx_node - code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") - output = np.load("{}/output.npy".format(code_gen_dir)) - context[node.output[0]] = output - - def npy_to_dynamic_outputs(self, context, npy_list): - """Reads the output from .npy files generated from cppsim and places - their content into the context dictionary. - npy_list is a list specifying which files to read, and its order must - match the order of node outputs.""" - node = self.onnx_node - code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") - for i in range(len(npy_list)): - output = np.load("{}/{}".format(code_gen_dir, npy_list[i])) - context[node.output[i]] = output - - def exec_precompiled_singlenode_model(self): - """Executes precompiled executable.""" - executable_path = self.get_nodeattr("executable_path") - if executable_path == "": - raise Exception( - """ -Found no executable for this node, did you run the codegen and -compilation transformations? - """ - ) - process_execute = subprocess.Popen(executable_path, stdout=subprocess.PIPE) - process_execute.communicate() - - def reset_rtlsim(self, sim): - """Sets reset input in pyverilator to zero, toggles the clock and set it - back to one""" - sim.io.ap_rst_n = 0 - sim.io.ap_clk = 1 - sim.io.ap_clk = 0 - sim.io.ap_rst_n = 1 - - def toggle_clk(self, sim): - """Toggles the clock input in pyverilator once.""" - sim.io.ap_clk = 1 - sim.io.ap_clk = 0 - - def rtlsim(self, sim, inp, inp2=None): - """Runs the pyverilator simulation by passing the input values to the simulation, - toggle the clock and observing the execution time. Function contains also an - observation loop that can abort the simulation if no output value is produced - after 100 cycles.""" - - trace_file = self.get_nodeattr("rtlsim_trace") - if trace_file != "": - if trace_file == "default": - trace_file = self.onnx_node.name + ".vcd" - sim.start_vcd_trace(trace_file) - inputs = inp - outputs = [] - sim.io.out_V_V_TREADY = 1 - - # observe if output is completely calculated - # observation_count will contain the number of cycles the calculation ran - num_out_values = self.get_number_output_values() - output_observed = False - observation_count = 0 - - # avoid infinite looping of simulation by aborting when there is no change in - # output values after 100 cycles - no_change_count = 0 - old_outputs = outputs - liveness_threshold = pyverilate_get_liveness_threshold_cycles() - - while not (output_observed): - sim.io.in0_V_V_TVALID = 1 if len(inputs) > 0 else 0 - sim.io.in0_V_V_TDATA = inputs[0] if len(inputs) > 0 else 0 - if sim.io.in0_V_V_TREADY == 1 and sim.io.in0_V_V_TVALID == 1: - inputs = inputs[1:] - - if inp2 is not None: - sim.io.in1_V_V_TVALID = 1 if len(inp2) > 0 else 0 - sim.io.in1_V_V_TDATA = inp2[0] if len(inp2) > 0 else 0 - if sim.io.in1_V_V_TREADY == 1 and sim.io.in1_V_V_TVALID == 1: - inp2 = inp2[1:] - - if sim.io.out_V_V_TVALID == 1 and sim.io.out_V_V_TREADY == 1: - outputs = outputs + [sim.io.out_V_V_TDATA] - sim.io.ap_clk = 1 - sim.io.ap_clk = 0 - - observation_count = observation_count + 1 - no_change_count = no_change_count + 1 - - if len(outputs) == num_out_values: - self.set_nodeattr("cycles_rtlsim", observation_count) - output_observed = True - - if no_change_count == liveness_threshold: - if old_outputs == outputs: - if trace_file != "": - sim.flush_vcd_trace() - sim.stop_vcd_trace() - raise Exception( - "Error in simulation! Takes too long to produce output. " - "Consider setting the LIVENESS_THRESHOLD env.var. to a " - "larger value." - ) - else: - no_change_count = 0 - old_outputs = outputs - if trace_file != "": - sim.flush_vcd_trace() - sim.stop_vcd_trace() - return outputs - - def rtlsim_multi_io(self, sim, io_dict): - "Run rtlsim for this node, supports multiple i/o streams." - - trace_file = self.get_nodeattr("rtlsim_trace") - if trace_file == "default": - trace_file = self.onnx_node.name + ".vcd" - num_out_values = self.get_number_output_values() - total_cycle_count = rtlsim_multi_io(sim, io_dict, num_out_values, trace_file) - self.set_nodeattr("cycles_rtlsim", total_cycle_count) - - def execute_node(self, context, graph): - """Executes single node using cppsim or rtlsim.""" - mode = self.get_nodeattr("exec_mode") - if mode == "cppsim": - # save input(s) - self.dynamic_input_to_npy(context, 1) - # execute the precompiled model - self.exec_precompiled_singlenode_model() - # load output npy file - self.npy_to_dynamic_output(context) - elif mode == "rtlsim": - pass - - else: - raise Exception( - """Invalid value for attribute exec_mode! Is currently set to: {} - has to be set to one of the following value ("cppsim", "rtlsim")""".format( - mode - ) - ) - - def generate_params(self, model, path): - """Function to generate parameters (i.e. weights and thresholds), - is member function of HLSCustomOp class but has to be filled - by every node.""" - pass - - @abstractmethod - def get_number_output_values(self): - """Function to get the number of expected output values, - is member function of HLSCustomOp class but has to be filled - by every node.""" - pass - - @abstractmethod - def global_includes(self): - """Function to set the global includes for c++ code that has to be generated - for cppsim or rtlsim, is member function of HLSCustomOp class but has to - be filled by every node.""" - pass - - @abstractmethod - def defines(self, var): - """Function to set the define commands for c++ code that has to be generated - for cppsim or rtlsim, is member function of HLSCustomOp class but has to - be filled by every node. - - var: makes it possible to reuse the function for different c++ code generation. - I.e. if set to "ipgen" in StreamingFCLayer_Batch additional PRAGMA defines are - added.""" - pass - - @abstractmethod - def read_npy_data(self): - """Function to generate the commands for reading data from .npy file in c++, - is member function of HLSCustomOp class but has to be filled by every node.""" - pass - - @abstractmethod - def strm_decl(self): - """Function to generate the commands for the stream declaration in c++, - is member function of HLSCustomOp class but has to be filled - by every node.""" - pass - - @abstractmethod - def docompute(self): - """Function to generate the commands for the computational part of the - c++ code, is member function of HLSCustomOp class but has to be filled - by every node.""" - pass - - @abstractmethod - def dataoutstrm(self): - """Function to generate the commands for reading out data from c++ and convert - into npy format, is member function of HLSCustomOp class but has to be filled - by every node.""" - pass - - @abstractmethod - def save_as_npy(self): - """Function to generate the commands for saving data in .npy file in c++, - is member function of HLSCustomOp class but has to be filled by every node.""" - pass - - @abstractmethod - def blackboxfunction(self): - """Function to generate a blackbock function in c++ from which an IP block - will be generated, is member function of HLSCustomOp class but has to be filled - by every node.""" - pass - - @abstractmethod - def pragmas(self): - """Function to generate the pragma commands in c++, is member function of - HLSCustomOp class but has to be filled by every node.""" - pass - - def get_normal_input_shape(self): - """Returns normal input shape if implemented.""" - raise Exception("get_normal_input_shape not implemented for this op") - - def get_normal_output_shape(self): - """Returns folded output shape if implemented.""" - raise Exception("get_normal_output_shape not implemented for this op") - - def get_folded_input_shape(self): - """Returns folded input shape (according to synapse folding), if implemented.""" - raise Exception("get_folded_input_shape not implemented for this op") - - def get_folded_output_shape(self): - """Returns folded output shape (according to neuron folding), if implemented.""" - raise Exception("get_folded_output_shape not implemented for this op") - - def get_instream_width(self): - """Returns input stream width, if implemented.""" - raise Exception("get_instream_width not implemented for this op") - - def get_outstream_width(self): - """Returns output stream width, if implemented.""" - raise Exception("get_outstream_width not implemented for this op") - - def get_instream_width_padded(self): - """Returns input stream width padded to a multiple of 8. This is required - by the AXI Stream spec.""" - in_width = self.get_instream_width() - return roundup_to_integer_multiple(in_width, 8) - - def get_outstream_width_padded(self): - """Returns output stream width padded to a multiple of 8. This is required - by the AXI Stream spec.""" - out_width = self.get_outstream_width() - return roundup_to_integer_multiple(out_width, 8) - - def get_ap_int_max_w(self): - "Return the maximum width of any ap_int used in this module." - instream = self.get_instream_width() - outstream = self.get_outstream_width() - return max([instream, outstream]) +from finn.custom_op.fpgadataflow.globalaccpool_batch import GlobalAccPool_Batch +from finn.custom_op.fpgadataflow.pool_batch import Pool_Batch +from finn.custom_op.fpgadataflow.fmpadding_batch import FMPadding_Batch +from finn.custom_op.fpgadataflow.thresholding_batch import Thresholding_Batch +from finn.custom_op.fpgadataflow.addstreams_batch import AddStreams_Batch +from finn.custom_op.fpgadataflow.labelselect_batch import LabelSelect_Batch +from finn.custom_op.fpgadataflow.duplicatestreams_batch import DuplicateStreams_Batch +from finn.custom_op.fpgadataflow.vector_vector_activate_batch import ( + Vector_Vector_Activate_Batch, +) +from finn.custom_op.fpgadataflow.channelwise_op_batch import ChannelwiseOp_Batch +from finn.custom_op.fpgadataflow.iodma import IODMA + +custom_op = dict() + +# make sure new HLSCustomOp subclasses are imported here so that they get +# registered and plug in correctly into the infrastructure +custom_op["DownSampler"] = DownSampler +custom_op["StreamingMaxPool_Batch"] = StreamingMaxPool_Batch +custom_op["StreamingFCLayer_Batch"] = StreamingFCLayer_Batch +custom_op["ConvolutionInputGenerator"] = ConvolutionInputGenerator +custom_op["TLastMarker"] = TLastMarker +custom_op["StreamingDataWidthConverter_Batch"] = StreamingDataWidthConverter_Batch +custom_op["StreamingFIFO"] = StreamingFIFO +custom_op["GlobalAccPool_Batch"] = GlobalAccPool_Batch +custom_op["Pool_Batch"] = Pool_Batch +custom_op["FMPadding_Batch"] = FMPadding_Batch +custom_op["Thresholding_Batch"] = Thresholding_Batch +custom_op["AddStreams_Batch"] = AddStreams_Batch +custom_op["LabelSelect_Batch"] = LabelSelect_Batch +custom_op["DuplicateStreams_Batch"] = DuplicateStreams_Batch +custom_op["Vector_Vector_Activate_Batch"] = Vector_Vector_Activate_Batch +custom_op["ChannelwiseOp_Batch"] = ChannelwiseOp_Batch +custom_op["IODMA"] = IODMA diff --git a/src/finn/custom_op/fpgadataflow/addstreams_batch.py b/src/finn/custom_op/fpgadataflow/addstreams_batch.py index 593f9f4fdf574aa2a2b4e70de5fe6ece2ce2085d..9222720543bb463f62be76e980c222194d237f44 100644 --- a/src/finn/custom_op/fpgadataflow/addstreams_batch.py +++ b/src/finn/custom_op/fpgadataflow/addstreams_batch.py @@ -31,7 +31,7 @@ import os import numpy as np from finn.core.datatype import DataType -from finn.custom_op.fpgadataflow import HLSCustomOp +from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp from onnx import TensorProto, helper from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy @@ -109,13 +109,6 @@ class AddStreams_Batch(HLSCustomOp): def verify_node(self): info_messages = [] - # verify that "domain" is set to "finn" - domain_value = self.onnx_node.domain - if domain_value == "finn": - info_messages.append("Attribute domain is set correctly") - else: - info_messages.append('Attribute domain should be set to "finn"') - # verify that "backend" is set to "fpgadataflow" backend_value = self.get_nodeattr("backend") if backend_value == "fpgadataflow": diff --git a/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py b/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py index 88b55aaec8fa834abe274b703a404b4419571401..635f37d5695a56d7c22f2287030ccb7331ab347b 100644 --- a/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py +++ b/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py @@ -33,7 +33,7 @@ import numpy as np from onnx import TensorProto, helper from finn.core.datatype import DataType -from finn.custom_op.fpgadataflow import HLSCustomOp +from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp from finn.util.data_packing import ( npy_to_rtlsim_input, numpy_to_hls_code, @@ -95,11 +95,11 @@ class ChannelwiseOp_Batch(HLSCustomOp): my_attrs = { # channelwise "map" function to apply: # one of cmp_le, cmp_ge, add, mul - "Func": ("s", False, "cmp_le"), + "Func": ("s", False, "cmp_le", {"cmp_le", "cmp_ge", "add", "mul"}), "PE": ("i", True, 0), "NumChannels": ("i", True, 0), # string defining memory resource type for parameters - "ram_style": ("s", False, "distributed"), + "ram_style": ("s", False, "distributed", {"distributed", "block"}), # FINN DataTypes for inputs, weights, outputs "inputDataType": ("s", True, ""), "paramDataType": ("s", True, ""), @@ -178,13 +178,6 @@ class ChannelwiseOp_Batch(HLSCustomOp): def verify_node(self): info_messages = [] - # verify that "domain" is set to "finn" - domain_value = self.onnx_node.domain - if domain_value == "finn": - info_messages.append("Attribute domain is set correctly") - else: - info_messages.append('Attribute domain should be set to "finn"') - # verify that "backend" is set to "fpgadataflow" backend_value = self.get_nodeattr("backend") if backend_value == "fpgadataflow": diff --git a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py index d33d6c963c0c55309f7f258c9ec1d7723e112282..3f400053df8de6ec1e53e39fb5a3edee15f3ab30 100644 --- a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py +++ b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py @@ -28,11 +28,12 @@ import os +import math import numpy as np from finn.core.datatype import DataType -from finn.custom_op.fpgadataflow import HLSCustomOp -from finn.custom_op.im2col import compute_conv_output_dim +from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp +from finn.custom_op.general.im2col import compute_conv_output_dim from onnx import TensorProto, helper from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy @@ -69,13 +70,18 @@ class ConvolutionInputGenerator(HLSCustomOp): # FINN DataTypes for inputs, weights, outputs "inputDataType": ("s", True, ""), "outputDataType": ("s", True, ""), - "depthwise": ("i", False, 0), + "depthwise": ("i", False, 0, {0, 1}), # FPGA resource type for ConvolutionInputGenerator input buffer # auto -- let Vivado HLS decide # block -- use BRAM # distributed -- use LUTRAM # ultra -- use URAM - "ram_style": ("s", False, "distributed"), + "ram_style": ( + "s", + False, + "distributed", + {"auto", "block", "distributed", "ultra"}, + ), } my_attrs.update(super().get_nodeattr_types()) return my_attrs @@ -194,6 +200,75 @@ class ConvolutionInputGenerator(HLSCustomOp): return int(exp_cycles) + def bram_estimation(self): + simd = self.get_nodeattr("SIMD") + ifm_ch = self.get_nodeattr("IFMChannels") + ifm_dim = self.get_nodeattr("IFMDim") + k = self.get_nodeattr("ConvKernelDim") + stride = self.get_nodeattr("Stride") + ram_style = self.get_nodeattr("ram_style") + if ram_style == "block" or ram_style == "auto": + ram_depth = ifm_dim * ifm_ch / simd + if ram_depth <= 512: + ram_width = 36 + elif ram_depth <= 1024: + ram_width = 18 + elif ram_depth <= 2048: + ram_width = 9 + elif ram_depth <= 4096: + ram_width = 4 + elif ram_depth <= 8192: + ram_width = 2 + else: + ram_width = 1 + return int( + (k + stride) + * ( + math.ceil(simd * self.get_input_datatype().bitwidth() / ram_width) + * math.ceil(ifm_dim * ifm_ch / simd / ram_depth) + ) + ) + else: + return 0 + + def lut_estimation(self): + simd = self.get_nodeattr("SIMD") + ifm_ch = self.get_nodeattr("IFMChannels") + ifm_dim = self.get_nodeattr("IFMDim") + k = self.get_nodeattr("ConvKernelDim") + stride = self.get_nodeattr("Stride") + ram_style = self.get_nodeattr("ram_style") + if ram_style == "distributed": + ram_luts = int( + (k + stride) + * ( + simd + * self.get_input_datatype().bitwidth() + * math.ceil(ifm_dim * ifm_ch / simd / 64) + ) + ) + else: + ram_luts = 0 + return 300 + ram_luts + + def uram_estimation(self): + simd = self.get_nodeattr("SIMD") + ifm_ch = self.get_nodeattr("IFMChannels") + ifm_dim = self.get_nodeattr("IFMDim") + k = self.get_nodeattr("ConvKernelDim") + stride = self.get_nodeattr("Stride") + ram_style = self.get_nodeattr("ram_style") + if ram_style == "ultra": + return int( + (k + stride) + * ( + math.ceil(simd * self.get_input_datatype().bitwidth() / 64) + * math.ceil(ifm_dim * ifm_ch / simd / 4096) + ) + ) + else: + return 0 + def execute_node(self, context, graph): mode = self.get_nodeattr("exec_mode") node = self.onnx_node diff --git a/src/finn/custom_op/fpgadataflow/downsampler.py b/src/finn/custom_op/fpgadataflow/downsampler.py index 15d55653b4e431dead885d75650b1500150d8775..e7e0c00ccd0b82643dbff15a0426fdc3831bd685 100644 --- a/src/finn/custom_op/fpgadataflow/downsampler.py +++ b/src/finn/custom_op/fpgadataflow/downsampler.py @@ -2,7 +2,7 @@ import os import numpy as np from onnx import TensorProto, helper from finn.core.datatype import DataType -from finn.custom_op.fpgadataflow import HLSCustomOp +from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy diff --git a/src/finn/custom_op/fpgadataflow/duplicatestreams_batch.py b/src/finn/custom_op/fpgadataflow/duplicatestreams_batch.py index 603fef78df561b301ffd20725febdc35daa78f6f..370c87c8618da2bb2eac5ee4c20ad86d64b03703 100644 --- a/src/finn/custom_op/fpgadataflow/duplicatestreams_batch.py +++ b/src/finn/custom_op/fpgadataflow/duplicatestreams_batch.py @@ -31,7 +31,7 @@ import os import numpy as np from finn.core.datatype import DataType -from finn.custom_op.fpgadataflow import HLSCustomOp +from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp from onnx import helper, TensorProto from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy @@ -110,13 +110,6 @@ class DuplicateStreams_Batch(HLSCustomOp): def verify_node(self): info_messages = [] - # verify that "domain" is set to "finn" - domain_value = self.onnx_node.domain - if domain_value == "finn": - info_messages.append("Attribute domain is set correctly") - else: - info_messages.append('Attribute domain should be set to "finn"') - # verify that "backend" is set to "fpgadataflow" backend_value = self.get_nodeattr("backend") if backend_value == "fpgadataflow": diff --git a/src/finn/custom_op/fpgadataflow/fmpadding_batch.py b/src/finn/custom_op/fpgadataflow/fmpadding_batch.py index 95ecc5f10525456e7f5a6d838e0850adaee5415f..e8efa3abb4e75830bf31cd88c8cb21f517e0a9f7 100644 --- a/src/finn/custom_op/fpgadataflow/fmpadding_batch.py +++ b/src/finn/custom_op/fpgadataflow/fmpadding_batch.py @@ -2,7 +2,7 @@ import os import numpy as np from onnx import TensorProto, helper from finn.core.datatype import DataType -from finn.custom_op.fpgadataflow import HLSCustomOp +from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy @@ -28,7 +28,7 @@ class FMPadding_Batch(HLSCustomOp): # controls distribution of padded pixels # in case of uneven padding -- see FMPadding fxn # in hlslib - "PaddingStyle": ("i", False, 2), + "PaddingStyle": ("i", False, 2, {2, 1}), # shape describing input vecs per execution "numInputVectors": ("i", False, 1), } diff --git a/src/finn/custom_op/fpgadataflow/globalaccpool_batch.py b/src/finn/custom_op/fpgadataflow/globalaccpool_batch.py index 56f1a9d56d9da7057e3cbe61f3d92877e58087d6..6035ad75d8037b6f93eb38700930c535a5409298 100644 --- a/src/finn/custom_op/fpgadataflow/globalaccpool_batch.py +++ b/src/finn/custom_op/fpgadataflow/globalaccpool_batch.py @@ -31,7 +31,7 @@ import os import numpy as np from finn.core.datatype import DataType -from finn.custom_op.fpgadataflow import HLSCustomOp +from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp from onnx import TensorProto, helper from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy @@ -115,13 +115,6 @@ class GlobalAccPool_Batch(HLSCustomOp): def verify_node(self): info_messages = [] - # verify that "domain" is set to "finn" - domain_value = self.onnx_node.domain - if domain_value == "finn": - info_messages.append("Attribute domain is set correctly") - else: - info_messages.append('Attribute domain should be set to "finn"') - # verify that "backend" is set to "fpgadataflow" backend_value = self.get_nodeattr("backend") if backend_value == "fpgadataflow": diff --git a/src/finn/custom_op/fpgadataflow/hlscustomop.py b/src/finn/custom_op/fpgadataflow/hlscustomop.py new file mode 100644 index 0000000000000000000000000000000000000000..3431061e772e7eda310733f1a0d31f4b2db154ac --- /dev/null +++ b/src/finn/custom_op/fpgadataflow/hlscustomop.py @@ -0,0 +1,634 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# namespace package, extend path + +from abc import abstractmethod +import numpy as np +import os +import subprocess +from finn.custom_op.base import CustomOp +from finn.util.basic import ( + CppBuilder, + make_build_dir, + roundup_to_integer_multiple, + get_rtlsim_trace_depth, +) +from finn.util.fpgadataflow import ( + IPGenBuilder, + pyverilate_get_liveness_threshold_cycles, + rtlsim_multi_io, +) +from . import templates + +try: + from pyverilator import PyVerilator +except ModuleNotFoundError: + PyVerilator = None + + +class HLSCustomOp(CustomOp): + """HLSCustomOp class all custom ops that correspond to a finn-hlslib + function are based on. Contains different functions every fpgadataflow + custom node should have. Some as abstract methods, these have to be filled + when writing a new fpgadataflow custom op node.""" + + def __init__(self, onnx_node): + super().__init__(onnx_node) + + self.code_gen_dict = {} + + # getting templates from templates.py + + # template for single node execution + self.docompute_template = templates.docompute_template + + # templates for single node ip generation + # cpp file + self.ipgen_template = templates.ipgen_template + # tcl script + self.ipgentcl_template = templates.ipgentcl_template + + def get_nodeattr_types(self): + return { + "backend": ("s", True, "fpgadataflow"), + "code_gen_dir_cppsim": ("s", False, ""), + "code_gen_dir_ipgen": ("s", False, ""), + "executable_path": ("s", False, ""), + "ipgen_path": ("s", False, ""), + "ip_path": ("s", False, ""), + "ip_vlnv": ("s", False, ""), + "exec_mode": ("s", False, "", {"", "rtlsim", "cppsim"}), + "cycles_rtlsim": ("i", False, 0), + "cycles_estimate": ("i", False, 0), + "rtlsim_trace": ("s", False, ""), + "res_estimate": ("s", False, ""), + "res_hls": ("s", False, ""), + "res_synth": ("s", False, ""), + "rtlsim_so": ("s", False, ""), + # partitioning info + "partition_id": ("i", False, 0), + # input and output FIFO depths + "inFIFODepth": ("i", False, 2), + "outFIFODepth": ("i", False, 2), + } + + def get_verilog_top_module_name(self): + "Return the Verilog top module name for this node." + + node = self.onnx_node + prefixed_top_name = "%s_%s" % (node.name, node.name) + return prefixed_top_name + + def get_verilog_top_module_intf_names(self): + """Return a dict of names of input and output interfaces. + The keys reflect the protocols each interface implements: + 'clk', 'rst', 'm_axis', 's_axis', 'aximm', 'axilite'. + Values are lists of names: + 's_axis' names correspond to the list of node inputs in order, + 'm_axis' names correspond to the list of node outputs in order' + Each block must have at most one aximm and one axilite.""" + intf_names = {} + intf_names["clk"] = ["ap_clk"] + intf_names["rst"] = ["ap_rst_n"] + intf_names["s_axis"] = [("in0_V_V", self.get_instream_width_padded())] + intf_names["m_axis"] = [("out_V_V", self.get_outstream_width_padded())] + intf_names["aximm"] = [] + intf_names["axilite"] = [] + return intf_names + + def get_verilog_top_filename(self): + "Return the Verilog top module filename for this node." + + verilog_file = "{}/project_{}/sol1/impl/verilog/{}.v".format( + self.get_nodeattr("code_gen_dir_ipgen"), + self.onnx_node.name, + self.get_verilog_top_module_name(), + ) + return verilog_file + + def get_all_verilog_paths(self): + "Return list of all folders containing Verilog code for this node." + + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + assert ( + code_gen_dir != "" + ), """Node attribute "code_gen_dir_ipgen" is + not set. Please run HLSSynthIP first.""" + verilog_path = "{}/project_{}/sol1/impl/verilog/".format( + code_gen_dir, self.onnx_node.name + ) + # default impl only returns the HLS verilog codegen dir + return [verilog_path] + + def get_all_verilog_filenames(self): + "Return list of all Verilog files used for this node." + + verilog_files = [] + verilog_paths = self.get_all_verilog_paths() + for verilog_path in verilog_paths: + for f in os.listdir(verilog_path): + if f.endswith(".v"): + verilog_files += [f] + return verilog_files + + def prepare_rtlsim(self): + """Creates a Verilator emulation library for the RTL code generated + for this node, sets the rtlsim_so attribute to its path and returns + a PyVerilator wrapper around it.""" + + if PyVerilator is None: + raise ImportError("Installation of PyVerilator is required.") + verilog_paths = self.get_all_verilog_paths() + verilog_files = self.get_all_verilog_filenames() + # build the Verilator emu library + sim = PyVerilator.build( + verilog_files, + build_dir=make_build_dir("pyverilator_" + self.onnx_node.name + "_"), + verilog_path=verilog_paths, + trace_depth=get_rtlsim_trace_depth(), + top_module_name=self.get_verilog_top_module_name(), + ) + # save generated lib filename in attribute + self.set_nodeattr("rtlsim_so", sim.lib._name) + return sim + + def get_rtlsim(self): + """Return a PyVerilator wrapper for the Verilator emulation library + for this node.""" + + rtlsim_so = self.get_nodeattr("rtlsim_so") + assert os.path.isfile(rtlsim_so), "Cannot find rtlsim library." + # create PyVerilator wrapper + sim = PyVerilator(rtlsim_so) + return sim + + def node_res_estimation(self): + """Returns summarized resource estimation of BRAMs and LUTs + of the node as a dictionary.""" + ret = dict() + ret["BRAM_18K"] = self.bram_estimation() + ret["BRAM_efficiency"] = self.bram_efficiency_estimation() + ret["LUT"] = self.lut_estimation() + ret["URAM"] = self.uram_estimation() + ret["DSP"] = self.dsp_estimation() + return ret + + def bram_efficiency_estimation(self): + """Function for BRAM efficiency estimation: actual parameter storage + needed divided by the allocated BRAM storage (from estimation)""" + return 1 + + def bram_estimation(self): + """Function for BRAM resource estimation, is member function of + HLSCustomOp class but has to be filled by every node""" + return 0 + + def uram_estimation(self): + """Function for UltraRAM resource estimation, is member function of + HLSCustomOp class but has to be filled by every node""" + return 0 + + def lut_estimation(self): + """Function for LUT resource estimation, is member function of + HLSCustomOp class but has to be filled by every node""" + return 0 + + def dsp_estimation(self): + """Function for DSP resource estimation, is member function of + HLSCustomOp class but has to be filled by every node""" + return 0 + + def get_exp_cycles(self): + """Function for estimation of expected cycles for set folding, + is member function of HLSCustomOp class but has to be filled + by every node""" + return 0 + + def code_generation_ipgen(self, model, fpgapart, clk): + """Generates c++ code and tcl script for ip generation.""" + node = self.onnx_node + + # generate top cpp file for ip generation + path = self.get_nodeattr("code_gen_dir_ipgen") + self.code_gen_dict["$AP_INT_MAX_W$"] = [str(self.get_ap_int_max_w())] + self.generate_params(model, path) + self.global_includes() + self.defines("ipgen") + self.blackboxfunction() + self.pragmas() + self.docompute() + + template = self.ipgen_template + + for key in self.code_gen_dict: + # transform list into long string separated by '\n' + code_gen_line = "\n".join(self.code_gen_dict[key]) + template = template.replace(key, code_gen_line) + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + f = open(os.path.join(code_gen_dir, "top_{}.cpp".format(node.name)), "w") + f.write(template) + f.close() + self.code_gen_dict.clear() + + # generate tcl script for ip generation + self.code_gen_dict["$PROJECTNAME$"] = ["project_{}".format(node.name)] + self.code_gen_dict["$HWSRCDIR$"] = [code_gen_dir] + self.code_gen_dict["$FPGAPART$"] = [fpgapart] + self.code_gen_dict["$FINNHLSLIBDIR$"] = ["/workspace/finn-hlslib"] + self.code_gen_dict["$TOPFXN$"] = [node.name] + self.code_gen_dict["$CLKPERIOD$"] = [str(clk)] + self.code_gen_dict["$EXTRA_DIRECTIVES$"] = self.ipgen_extra_directives() + + template = self.ipgentcl_template + + for key in self.code_gen_dict: + # transform list into long string separated by '\n' + code_gen_line = "\n".join(self.code_gen_dict[key]) + template = template.replace(key, code_gen_line) + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + f = open(os.path.join(code_gen_dir, "hls_syn_{}.tcl".format(node.name)), "w") + f.write(template) + f.close() + self.code_gen_dict.clear() + + def ipgen_extra_directives(self): + "Return a list of extra tcl directives for HLS synthesis." + return [] + + def ipgen_singlenode_code(self): + """Builds the bash script for ip generation using the IPGenBuilder from + finn.util.fpgadataflow.""" + node = self.onnx_node + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + builder = IPGenBuilder() + builder.append_tcl(code_gen_dir + "/hls_syn_{}.tcl".format(node.name)) + builder.set_ipgen_path(code_gen_dir + "/project_{}".format(node.name)) + builder.build(code_gen_dir) + self.set_nodeattr("ipgen_path", builder.ipgen_path) + self.set_nodeattr("ip_path", builder.ipgen_path + "/sol1/impl/ip") + vlnv = "xilinx.com:hls:%s:1.0" % node.name + self.set_nodeattr("ip_vlnv", vlnv) + + def code_generation_cppsim(self, model): + """Generates c++ code for simulation (cppsim).""" + node = self.onnx_node + path = self.get_nodeattr("code_gen_dir_cppsim") + self.code_gen_dict["$AP_INT_MAX_W$"] = [str(self.get_ap_int_max_w())] + self.generate_params(model, path) + self.global_includes() + self.defines("cppsim") + self.read_npy_data() + self.strm_decl() + self.pragmas() + self.docompute() + self.dataoutstrm() + self.save_as_npy() + + template = self.docompute_template + + for key in self.code_gen_dict: + # transform list into long string separated by '\n' + code_gen_line = "\n".join(self.code_gen_dict[key]) + template = template.replace(key, code_gen_line) + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + f = open(os.path.join(code_gen_dir, "execute_{}.cpp".format(node.op_type)), "w") + f.write(template) + f.close() + self.code_gen_dict.clear() + + def code_generation_ipi(self): + """Constructs and returns the TCL for node instantiation in Vivado IPI.""" + vlnv = self.get_nodeattr("ip_vlnv") + cmd = ["create_bd_cell -type ip -vlnv %s %s" % (vlnv, self.onnx_node.name)] + return cmd + + def compile_singlenode_code(self): + """Builds the bash script for compilation using the CppBuilder from + finn.util.basic and executes the script to produce the executable.""" + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + builder = CppBuilder() + # to enable additional debug features please uncommand the next line + # builder.append_includes("-DDEBUG") + builder.append_includes("-I/workspace/finn/src/finn/qnn-data/cpp") + builder.append_includes("-I/workspace/cnpy/") + builder.append_includes("-I/workspace/finn-hlslib") + builder.append_includes("-I{}/include".format(os.environ["VIVADO_PATH"])) + builder.append_includes("--std=c++11") + builder.append_includes("-O3") + builder.append_sources(code_gen_dir + "/*.cpp") + builder.append_sources("/workspace/cnpy/cnpy.cpp") + builder.append_includes("-lz") + builder.set_executable_path(code_gen_dir + "/node_model") + builder.build(code_gen_dir) + self.set_nodeattr("executable_path", builder.executable_path) + + def dynamic_input_to_npy(self, context, count): + """Saves input (given context) into .npy files. + + Count indicates the number of inputs that have to be saved.""" + node = self.onnx_node + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + if code_gen_dir == "": + raise Exception( + """ +Found no codegen dir for this node, did you run the prepare_cppsim transformation? + """ + ) + # create a npy file for each input of the node (in_ind is input index) + # assuming dynamic inputs start from 0 + for in_ind in range(count): + current_input_name = node.input[in_ind] + # make copy before saving array + input_array = context[current_input_name].copy() + np.save( + os.path.join(code_gen_dir, "input_{}.npy".format(in_ind)), input_array + ) + + def npy_to_dynamic_output(self, context): + """Reads the output from an output.npy file generated from cppsim and + places its content into the context dictionary.""" + node = self.onnx_node + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + output = np.load("{}/output.npy".format(code_gen_dir)) + context[node.output[0]] = output + + def npy_to_dynamic_outputs(self, context, npy_list): + """Reads the output from .npy files generated from cppsim and places + their content into the context dictionary. + npy_list is a list specifying which files to read, and its order must + match the order of node outputs.""" + node = self.onnx_node + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + for i in range(len(npy_list)): + output = np.load("{}/{}".format(code_gen_dir, npy_list[i])) + context[node.output[i]] = output + + def exec_precompiled_singlenode_model(self): + """Executes precompiled executable.""" + executable_path = self.get_nodeattr("executable_path") + if executable_path == "": + raise Exception( + """ +Found no executable for this node, did you run the codegen and +compilation transformations? + """ + ) + process_execute = subprocess.Popen(executable_path, stdout=subprocess.PIPE) + process_execute.communicate() + + def reset_rtlsim(self, sim): + """Sets reset input in pyverilator to zero, toggles the clock and set it + back to one""" + sim.io.ap_rst_n = 0 + sim.io.ap_clk = 1 + sim.io.ap_clk = 0 + sim.io.ap_rst_n = 1 + + def toggle_clk(self, sim): + """Toggles the clock input in pyverilator once.""" + sim.io.ap_clk = 1 + sim.io.ap_clk = 0 + + def rtlsim(self, sim, inp, inp2=None): + """Runs the pyverilator simulation by passing the input values to the simulation, + toggle the clock and observing the execution time. Function contains also an + observation loop that can abort the simulation if no output value is produced + after 100 cycles.""" + + trace_file = self.get_nodeattr("rtlsim_trace") + if trace_file != "": + if trace_file == "default": + trace_file = self.onnx_node.name + ".vcd" + sim.start_vcd_trace(trace_file) + inputs = inp + outputs = [] + sim.io.out_V_V_TREADY = 1 + + # observe if output is completely calculated + # observation_count will contain the number of cycles the calculation ran + num_out_values = self.get_number_output_values() + output_observed = False + observation_count = 0 + + # avoid infinite looping of simulation by aborting when there is no change in + # output values after 100 cycles + no_change_count = 0 + old_outputs = outputs + liveness_threshold = pyverilate_get_liveness_threshold_cycles() + + while not (output_observed): + sim.io.in0_V_V_TVALID = 1 if len(inputs) > 0 else 0 + sim.io.in0_V_V_TDATA = inputs[0] if len(inputs) > 0 else 0 + if sim.io.in0_V_V_TREADY == 1 and sim.io.in0_V_V_TVALID == 1: + inputs = inputs[1:] + + if inp2 is not None: + sim.io.in1_V_V_TVALID = 1 if len(inp2) > 0 else 0 + sim.io.in1_V_V_TDATA = inp2[0] if len(inp2) > 0 else 0 + if sim.io.in1_V_V_TREADY == 1 and sim.io.in1_V_V_TVALID == 1: + inp2 = inp2[1:] + + if sim.io.out_V_V_TVALID == 1 and sim.io.out_V_V_TREADY == 1: + outputs = outputs + [sim.io.out_V_V_TDATA] + sim.io.ap_clk = 1 + sim.io.ap_clk = 0 + + observation_count = observation_count + 1 + no_change_count = no_change_count + 1 + + if len(outputs) == num_out_values: + self.set_nodeattr("cycles_rtlsim", observation_count) + output_observed = True + + if no_change_count == liveness_threshold: + if old_outputs == outputs: + if trace_file != "": + sim.flush_vcd_trace() + sim.stop_vcd_trace() + raise Exception( + "Error in simulation! Takes too long to produce output. " + "Consider setting the LIVENESS_THRESHOLD env.var. to a " + "larger value." + ) + else: + no_change_count = 0 + old_outputs = outputs + if trace_file != "": + sim.flush_vcd_trace() + sim.stop_vcd_trace() + return outputs + + def rtlsim_multi_io(self, sim, io_dict): + "Run rtlsim for this node, supports multiple i/o streams." + + trace_file = self.get_nodeattr("rtlsim_trace") + if trace_file == "default": + trace_file = self.onnx_node.name + ".vcd" + num_out_values = self.get_number_output_values() + total_cycle_count = rtlsim_multi_io(sim, io_dict, num_out_values, trace_file) + self.set_nodeattr("cycles_rtlsim", total_cycle_count) + + def execute_node(self, context, graph): + """Executes single node using cppsim or rtlsim.""" + mode = self.get_nodeattr("exec_mode") + if mode == "cppsim": + # save input(s) + self.dynamic_input_to_npy(context, 1) + # execute the precompiled model + self.exec_precompiled_singlenode_model() + # load output npy file + self.npy_to_dynamic_output(context) + elif mode == "rtlsim": + pass + + else: + raise Exception( + """Invalid value for attribute exec_mode! Is currently set to: {} + has to be set to one of the following value ("cppsim", "rtlsim")""".format( + mode + ) + ) + + def generate_params(self, model, path): + """Function to generate parameters (i.e. weights and thresholds), + is member function of HLSCustomOp class but has to be filled + by every node.""" + pass + + @abstractmethod + def get_number_output_values(self): + """Function to get the number of expected output values, + is member function of HLSCustomOp class but has to be filled + by every node.""" + pass + + @abstractmethod + def global_includes(self): + """Function to set the global includes for c++ code that has to be generated + for cppsim or rtlsim, is member function of HLSCustomOp class but has to + be filled by every node.""" + pass + + @abstractmethod + def defines(self, var): + """Function to set the define commands for c++ code that has to be generated + for cppsim or rtlsim, is member function of HLSCustomOp class but has to + be filled by every node. + + var: makes it possible to reuse the function for different c++ code generation. + I.e. if set to "ipgen" in StreamingFCLayer_Batch additional PRAGMA defines are + added.""" + pass + + @abstractmethod + def read_npy_data(self): + """Function to generate the commands for reading data from .npy file in c++, + is member function of HLSCustomOp class but has to be filled by every node.""" + pass + + @abstractmethod + def strm_decl(self): + """Function to generate the commands for the stream declaration in c++, + is member function of HLSCustomOp class but has to be filled + by every node.""" + pass + + @abstractmethod + def docompute(self): + """Function to generate the commands for the computational part of the + c++ code, is member function of HLSCustomOp class but has to be filled + by every node.""" + pass + + @abstractmethod + def dataoutstrm(self): + """Function to generate the commands for reading out data from c++ and convert + into npy format, is member function of HLSCustomOp class but has to be filled + by every node.""" + pass + + @abstractmethod + def save_as_npy(self): + """Function to generate the commands for saving data in .npy file in c++, + is member function of HLSCustomOp class but has to be filled by every node.""" + pass + + @abstractmethod + def blackboxfunction(self): + """Function to generate a blackbock function in c++ from which an IP block + will be generated, is member function of HLSCustomOp class but has to be filled + by every node.""" + pass + + @abstractmethod + def pragmas(self): + """Function to generate the pragma commands in c++, is member function of + HLSCustomOp class but has to be filled by every node.""" + pass + + def get_normal_input_shape(self): + """Returns normal input shape if implemented.""" + raise Exception("get_normal_input_shape not implemented for this op") + + def get_normal_output_shape(self): + """Returns folded output shape if implemented.""" + raise Exception("get_normal_output_shape not implemented for this op") + + def get_folded_input_shape(self): + """Returns folded input shape (according to synapse folding), if implemented.""" + raise Exception("get_folded_input_shape not implemented for this op") + + def get_folded_output_shape(self): + """Returns folded output shape (according to neuron folding), if implemented.""" + raise Exception("get_folded_output_shape not implemented for this op") + + def get_instream_width(self): + """Returns input stream width, if implemented.""" + raise Exception("get_instream_width not implemented for this op") + + def get_outstream_width(self): + """Returns output stream width, if implemented.""" + raise Exception("get_outstream_width not implemented for this op") + + def get_instream_width_padded(self): + """Returns input stream width padded to a multiple of 8. This is required + by the AXI Stream spec.""" + in_width = self.get_instream_width() + return roundup_to_integer_multiple(in_width, 8) + + def get_outstream_width_padded(self): + """Returns output stream width padded to a multiple of 8. This is required + by the AXI Stream spec.""" + out_width = self.get_outstream_width() + return roundup_to_integer_multiple(out_width, 8) + + def get_ap_int_max_w(self): + "Return the maximum width of any ap_int used in this module." + instream = self.get_instream_width() + outstream = self.get_outstream_width() + return max([instream, outstream]) diff --git a/src/finn/custom_op/fpgadataflow/iodma.py b/src/finn/custom_op/fpgadataflow/iodma.py index 67af0c5cb409c6deea9bacf247f803d119aa1b17..0ab8bf295927f233b5785f76a1d6894c7993f9ef 100644 --- a/src/finn/custom_op/fpgadataflow/iodma.py +++ b/src/finn/custom_op/fpgadataflow/iodma.py @@ -30,7 +30,7 @@ import numpy as np import math from onnx import TensorProto, helper from finn.core.datatype import DataType -from finn.custom_op.fpgadataflow import HLSCustomOp +from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp # the IODMA inerfaces a memory-mapped AXI interface and an AXI stream @@ -87,8 +87,8 @@ class IODMA(HLSCustomOp): "streamWidth": ("i", False, 32), # DMA-specific parameters "intfWidth": ("i", False, 32), - "burstMode": ("s", False, "increment"), - "direction": ("s", False, "in"), + "burstMode": ("s", False, "increment", {"wrap", "increment"}), + "direction": ("s", False, "in", {"in", "out"}), # shape describing input vecs per execution "numInputVectors": ("ints", False, [1]), # name of axi-mm interface diff --git a/src/finn/custom_op/fpgadataflow/labelselect_batch.py b/src/finn/custom_op/fpgadataflow/labelselect_batch.py index 6e206d2058076802a48b69f4c69cccf744489f31..39fa87baa08cb43ea7cb4f3d2aa2159b07b8522b 100644 --- a/src/finn/custom_op/fpgadataflow/labelselect_batch.py +++ b/src/finn/custom_op/fpgadataflow/labelselect_batch.py @@ -31,7 +31,7 @@ import os import numpy as np from finn.core.datatype import DataType -from finn.custom_op.fpgadataflow import HLSCustomOp +from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp from onnx import TensorProto, helper from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy from finn.util.basic import roundup_to_integer_multiple @@ -128,13 +128,6 @@ class LabelSelect_Batch(HLSCustomOp): def verify_node(self): info_messages = [] - # verify that "domain" is set to "finn" - domain_value = self.onnx_node.domain - if domain_value == "finn": - info_messages.append("Attribute domain is set correctly") - else: - info_messages.append('Attribute domain should be set to "finn"') - # verify that "backend" is set to "fpgadataflow" backend_value = self.get_nodeattr("backend") if backend_value == "fpgadataflow": diff --git a/src/finn/custom_op/fpgadataflow/pool_batch.py b/src/finn/custom_op/fpgadataflow/pool_batch.py index 4a2fa6889ae0ebb94976d50b0fc8362d01a63bea..edba084b5258de37198520257e438f90f8cc65e3 100644 --- a/src/finn/custom_op/fpgadataflow/pool_batch.py +++ b/src/finn/custom_op/fpgadataflow/pool_batch.py @@ -29,7 +29,7 @@ import os import numpy as np -from finn.custom_op.fpgadataflow import HLSCustomOp +from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp from finn.core.datatype import DataType from onnx import TensorProto, helper from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy @@ -60,9 +60,9 @@ class Pool_Batch(HLSCustomOp): "KernelSize": ("i", True, 0), # Function: # - MaxPool - # - AvgPool (not yet supported, but HLSLIB does) - # - AccPool (not yet supported, but HLSLIB does) - "Function": ("s", True, ""), + # - QuantAvgPool + # TODO add support for AvgPool and AccPool + "Function": ("s", True, "", {"MaxPool", "QuantAvgPool"}), "OutImgDim": ("i", True, 0), # FINN DataTypes for inputs/outputs "InputDataType": ("s", True, ""), @@ -185,14 +185,6 @@ class Pool_Batch(HLSCustomOp): def verify_node(self): info_messages = [] - - # verify that "domain" is set to "finn" - domain_value = self.onnx_node.domain - if domain_value == "finn": - info_messages.append("Attribute domain is set correctly") - else: - info_messages.append('Attribute domain should be set to "finn"') - # verify that "backend" is set to "fpgadataflow" backend_value = self.get_nodeattr("backend") if backend_value == "fpgadataflow": diff --git a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py index e80920551120e0e74aae217d9fe4e287e6cabd3d..e2d97a0eaa29604006790a542157639c5c776b22 100644 --- a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py +++ b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py @@ -30,7 +30,7 @@ import os import numpy as np import math -from finn.custom_op.fpgadataflow import HLSCustomOp +from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp from finn.core.datatype import DataType from onnx import TensorProto, helper from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy @@ -55,7 +55,7 @@ class StreamingDataWidthConverter_Batch(HLSCustomOp): # Toggle between hls or IPI implementation # hls - use the hls generated IP during stitching # vivado - use the AXI Infrastructure DWC - "impl_style": ("s", False, "hls"), + "impl_style": ("s", False, "hls", {"hls", "vivado"}), } my_attrs.update(super().get_nodeattr_types()) return my_attrs @@ -186,14 +186,6 @@ class StreamingDataWidthConverter_Batch(HLSCustomOp): def verify_node(self): info_messages = [] - - # verify that "domain" is set to "finn" - domain_value = self.onnx_node.domain - if domain_value == "finn": - info_messages.append("Attribute domain is set correctly") - else: - info_messages.append('Attribute domain should be set to "finn"') - # verify that "backend" is set to "fpgadataflow" backend_value = self.get_nodeattr("backend") if backend_value == "fpgadataflow": diff --git a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py index 9d63a6866269ddf6c5c7cf54de00b6dfd11505e6..10e0fbbde4f485a9fc9febb21308c9b0c49da041 100644 --- a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py +++ b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py @@ -32,7 +32,7 @@ import numpy as np from onnx import TensorProto, helper from finn.core.datatype import DataType -from finn.custom_op.fpgadataflow import HLSCustomOp +from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp from finn.util.basic import ( interleave_matrix_outer_dim_from_partitions, roundup_to_integer_multiple, @@ -68,7 +68,7 @@ class StreamingFCLayer_Batch(HLSCustomOp): "SIMD": ("i", True, 0), "MW": ("i", True, 0), "MH": ("i", True, 0), - "resType": ("s", True, ""), + "resType": ("s", False, "lut", {"auto", "lut", "dsp"}), "ActVal": ("i", False, 0), # FINN DataTypes for inputs, weights, outputs "inputDataType": ("s", True, ""), @@ -78,9 +78,9 @@ class StreamingFCLayer_Batch(HLSCustomOp): "accDataType": ("s", False, "INT32"), # use xnor-popcount for binary weights/inputs, thus treating them # as bipolar - "binaryXnorMode": ("i", False, 0), + "binaryXnorMode": ("i", False, 0, {0, 1}), # no-activation mode (produce accumulators) - "noActivation": ("i", False, 0), + "noActivation": ("i", False, 0, {0, 1}), # number of input vectors, examples: # [1] is a single vector (like a FC layer with batch=1) # [4] is four vectors (like a FC layer with batch=4) @@ -90,13 +90,13 @@ class StreamingFCLayer_Batch(HLSCustomOp): # const -- embedded weights, default, long compile/synth times # decoupled -- streaming weights with weight streamer packaged inside IP # external -- streaming weights with external streamer - "mem_mode": ("s", False, "const"), + "mem_mode": ("s", False, "const", {"const", "decoupled", "external"}), # FPGA resource type for memories in decoupled mode # auto -- let Vivado decide # block -- use BRAM # distributed -- use LUTRAM # see also https://www.xilinx.com/support/answers/38070.html - "ram_style": ("s", False, "auto"), + "ram_style": ("s", False, "auto", {"auto", "block", "distributed"}), # (mem_mode = decoupled only) whether weights will be writable through # an AXI-lite interface during runtime # 1 for enabled, 0 for disabled. @@ -106,7 +106,7 @@ class StreamingFCLayer_Batch(HLSCustomOp): # always "flush" the accelerator by first passing a dummy input # vector through the accelerator. This will get rid of any old # weight data from the weight FIFOs. - "runtime_writeable_weights": ("i", False, 0), + "runtime_writeable_weights": ("i", False, 0, {0, 1}), } my_attrs.update(super().get_nodeattr_types()) return my_attrs @@ -159,13 +159,6 @@ class StreamingFCLayer_Batch(HLSCustomOp): def verify_node(self): info_messages = [] - # verify that "domain" is set to "finn" - domain_value = self.onnx_node.domain - if domain_value == "finn": - info_messages.append("Attribute domain is set correctly") - else: - info_messages.append('Attribute domain should be set to "finn"') - # verify that "backend" is set to "fpgadataflow" backend_value = self.get_nodeattr("backend") if backend_value == "fpgadataflow": @@ -238,7 +231,27 @@ class StreamingFCLayer_Batch(HLSCustomOp): D_in = self.get_nodeattr("MW") D_out = self.get_nodeattr("MH") omega = (D_in * D_out) / (Q * P) - return P * (math.ceil(omega / 512)) * (math.ceil((Q * W) / 36)) + mem_width = Q * W * P + mmode = self.get_nodeattr("mem_mode") + mstyle = self.get_nodeattr("ram_style") + if (mmode == "decoupled" and mstyle == "distributed") or ( + mmode == "const" and self.calc_wmem() <= 128 + ): + return 0 + # assuming SDP mode RAMB18s (see UG573 Table 1-10) + # assuming decoupled (RTL) memory, which is more efficient than const (HLS) + if mem_width == 1: + return math.ceil(omega / 16384) + elif mem_width == 2: + return math.ceil(omega / 8192) + elif mem_width <= 4: + return (math.ceil(omega / 4096)) * (math.ceil(mem_width / 4)) + elif mem_width <= 9: + return (math.ceil(omega / 2048)) * (math.ceil(mem_width / 9)) + elif mem_width <= 18 or omega > 512: + return (math.ceil(omega / 1024)) * (math.ceil(mem_width / 18)) + else: + return (math.ceil(omega / 512)) * (math.ceil(mem_width / 36)) def bram_efficiency_estimation(self): wdt = self.get_weight_datatype() @@ -246,6 +259,8 @@ class StreamingFCLayer_Batch(HLSCustomOp): D_in = self.get_nodeattr("MW") D_out = self.get_nodeattr("MH") bram16_est = self.bram_estimation() + if bram16_est == 0: + return 1 wbits = W * D_in * D_out bram16_est_capacity = bram16_est * 36 * 512 return wbits / bram16_est_capacity @@ -261,6 +276,7 @@ class StreamingFCLayer_Batch(HLSCustomOp): # TODO add in/out FIFO contributions P = self.get_nodeattr("PE") Q = self.get_nodeattr("SIMD") + MW = self.get_nodeattr("MW") wdt = self.get_weight_datatype() W = wdt.bitwidth() # determine tdt with input and weight data types @@ -269,8 +285,55 @@ class StreamingFCLayer_Batch(HLSCustomOp): # parameters from experiments in paper mentioned above c0 = 300 c1 = 1.1 + c2 = 0 + mmode = self.get_nodeattr("mem_mode") + mstyle = self.get_nodeattr("ram_style") + if (mmode == "decoupled" and mstyle == "distributed") or ( + mmode == "const" and self.calc_wmem() <= 128 + ): + c2 = (P * Q * W) * math.ceil(self.calc_wmem() / 64) + + # multiplication + res_type = self.get_nodeattr("resType") + if res_type == "dsp": + mult_luts = 0 + else: + mult_luts = Q * (2 * math.ceil((W + A) / 6) - 1) * (W + A) + # adder tree + addertree_luts = (W + A) * (2 * Q - 1) + # accumulator + acc_bits = W + A + np.ceil(math.log(MW, 2)) + acc_luts = acc_bits + # thresholds and threshold comparators + thr_luts = 0 + comp_luts = 0 + noact = self.get_nodeattr("noActivation") + if noact == 0: + odt = self.get_output_datatype() + B = odt.bitwidth() + thr_luts = (2 ** B - 1) * acc_bits * math.ceil(self.calc_tmem() / 64) + comp_luts = (2 ** B - 1) * acc_bits + + return int( + c0 + + c1 * (P * (mult_luts + addertree_luts + acc_luts + thr_luts + comp_luts)) + + c2 + ) - return c0 + c1 * (P * Q) * (W * A) + def dsp_estimation(self): + # multiplication + P = self.get_nodeattr("PE") + res_type = self.get_nodeattr("resType") + Q = self.get_nodeattr("SIMD") + wdt = self.get_weight_datatype() + W = wdt.bitwidth() + idt = self.get_input_datatype() + A = idt.bitwidth() + if res_type == "dsp": + mult_dsp = P * Q * np.ceil((W + A) / 48) # TODO: more accurate modelling + else: + mult_dsp = 0 + return int(mult_dsp) def get_exp_cycles(self): pe = self.get_nodeattr("PE") @@ -934,6 +997,11 @@ class StreamingFCLayer_Batch(HLSCustomOp): def docompute(self): mem_mode = self.get_nodeattr("mem_mode") + map_to_hls_mult_style = { + "auto": "ap_resource_dflt()", + "lut": "ap_resource_lut()", + "dsp": "ap_resource_dsp()", + } tmpl_args = self.get_template_param_values() if self.calc_tmem() == 0: odtype_hls_str = self.get_output_datatype().get_hls_datatype_str() @@ -950,7 +1018,7 @@ class StreamingFCLayer_Batch(HLSCustomOp): tmpl_args["TDstI"], tmpl_args["TWeightI"], threshs, - self.get_nodeattr("resType"), + map_to_hls_mult_style[self.get_nodeattr("resType")], ) ] elif mem_mode == "decoupled" or mem_mode == "external": @@ -968,7 +1036,7 @@ class StreamingFCLayer_Batch(HLSCustomOp): tmpl_args["TWeightI"], wdtype_hls_str, threshs, - self.get_nodeattr("resType"), + map_to_hls_mult_style[self.get_nodeattr("resType")], ) ] diff --git a/src/finn/custom_op/fpgadataflow/streamingfifo.py b/src/finn/custom_op/fpgadataflow/streamingfifo.py index 56a7e86854a903a608c253122880ce6ef2e68ef4..9063f018bdcf64c9664e92eeabec539ee2c721af 100644 --- a/src/finn/custom_op/fpgadataflow/streamingfifo.py +++ b/src/finn/custom_op/fpgadataflow/streamingfifo.py @@ -32,7 +32,7 @@ import subprocess import math import warnings -from finn.custom_op.fpgadataflow import HLSCustomOp +from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp from finn.core.datatype import DataType from onnx import TensorProto, helper from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy @@ -56,13 +56,18 @@ class StreamingFIFO(HLSCustomOp): # Toggle between hls or IPI implementation # rtl - use the hls generated IP during stitching # vivado - use the AXI Infrastructure FIFO - "impl_style": ("s", False, "rtl"), + "impl_style": ("s", False, "rtl", {"rtl", "vivado"}), # FPGA resource type for FIFOs when impl_style is vivado # auto -- let Vivado decide # block -- use BRAM # distributed -- use LUTRAM # ultra -- use URAM (on UltraScale+) - "ram_style": ("s", False, "auto"), + "ram_style": ( + "s", + False, + "auto", + {"auto", "block", "distributed", "ultra"}, + ), } my_attrs.update(super().get_nodeattr_types()) diff --git a/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py b/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py index 53bcab993b25173c8620d7f4a6694a8efaf74c4d..7850a85ccf61c7e4a26c25b807d6613a1ad66c5a 100644 --- a/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py +++ b/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py @@ -29,8 +29,8 @@ import os import numpy as np -from finn.custom_op.fpgadataflow import HLSCustomOp -from finn.custom_op.im2col import compute_conv_output_dim +from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp +from finn.custom_op.general.im2col import compute_conv_output_dim from finn.core.datatype import DataType from onnx import TensorProto, helper from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy @@ -138,14 +138,6 @@ class StreamingMaxPool_Batch(HLSCustomOp): def verify_node(self): info_messages = [] - - # verify that "domain" is set to "finn" - domain_value = self.onnx_node.domain - if domain_value == "finn": - info_messages.append("Attribute domain is set correctly") - else: - info_messages.append('Attribute domain should be set to "finn"') - # verify that "backend" is set to "fpgadataflow" backend_value = self.get_nodeattr("backend") if backend_value == "fpgadataflow": diff --git a/src/finn/custom_op/fpgadataflow/thresholding_batch.py b/src/finn/custom_op/fpgadataflow/thresholding_batch.py index ccb065f62a8340b916bfa5f6cf96c23c65d19d12..8a944fe77dc938db4154bb0a2ffcff8fdaefbd72 100644 --- a/src/finn/custom_op/fpgadataflow/thresholding_batch.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_batch.py @@ -34,7 +34,7 @@ import numpy as np from onnx import TensorProto, helper from finn.core.datatype import DataType -from finn.custom_op.fpgadataflow import HLSCustomOp +from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp from finn.util.basic import ( interleave_matrix_outer_dim_from_partitions, roundup_to_integer_multiple, @@ -70,7 +70,7 @@ class Thresholding_Batch(HLSCustomOp): # number of steps in thresholding function "numSteps": ("i", True, 1), # string defining memory type - "ram_style": ("s", False, "distributed"), + "ram_style": ("s", False, "distributed", {"distributed", "block"}), # FINN DataTypes for inputs, outputs "inputDataType": ("s", True, ""), "weightDataType": ("s", True, ""), @@ -88,7 +88,7 @@ class Thresholding_Batch(HLSCustomOp): # memory mode for the thresholds # const -- embedded thresholds, default # decoupled -- streaming thresholds with streamer packaged inside IP - "mem_mode": ("s", False, "const"), + "mem_mode": ("s", False, "const", {"const", "decoupled"}), # (mem_mode = decoupled only) whether weights (thresholds) will be # writable through an AXI-lite interface during runtime # 1 for enabled, 0 for disabled. @@ -98,7 +98,7 @@ class Thresholding_Batch(HLSCustomOp): # always "flush" the accelerator by first passing a dummy input # vector through the accelerator. This will get rid of any old # weight data from the weight FIFOs. - "runtime_writeable_weights": ("i", False, 0), + "runtime_writeable_weights": ("i", False, 0, {0, 1}), } my_attrs.update(super().get_nodeattr_types()) return my_attrs @@ -137,13 +137,6 @@ class Thresholding_Batch(HLSCustomOp): def verify_node(self): info_messages = [] - # verify that "domain" is set to "finn" - domain_value = self.onnx_node.domain - if domain_value == "finn": - info_messages.append("Attribute domain is set correctly") - else: - info_messages.append('Attribute domain should be set to "finn"') - # verify that "backend" is set to "fpgadataflow" backend_value = self.get_nodeattr("backend") if backend_value == "fpgadataflow": diff --git a/src/finn/custom_op/fpgadataflow/tlastmarker.py b/src/finn/custom_op/fpgadataflow/tlastmarker.py index 38a139c279701ae7892f41b63c3c717a3e736691..bedaf0984c39ef7603e6829961d7a3efb6ff489f 100644 --- a/src/finn/custom_op/fpgadataflow/tlastmarker.py +++ b/src/finn/custom_op/fpgadataflow/tlastmarker.py @@ -26,7 +26,7 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from finn.custom_op.fpgadataflow import HLSCustomOp +from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp class TLastMarker(HLSCustomOp): @@ -47,14 +47,14 @@ class TLastMarker(HLSCustomOp): # whether static or dynamic (from AXI lite) number of iterations are used "DynIters": ("i", False, 1), # direction: whether to insert or remove TLAST - "Direction": ("s", False, "out"), + "Direction": ("s", False, "out", {"out", "in"}), # width of input-output data streams, in bits "StreamWidth": ("i", True, 0), # width of individual element in stream, in bits "ElemWidth": ("i", True, 0), # Protocol: external or internal # Vitis docs recommend using qdma_axis for external, ap_axiu for internal - "Protocol": ("s", False, "external"), + "Protocol": ("s", False, "external", {"external", "internal"}), } my_attrs.update(super().get_nodeattr_types()) return my_attrs diff --git a/src/finn/custom_op/fpgadataflow/vector_vector_activate_batch.py b/src/finn/custom_op/fpgadataflow/vector_vector_activate_batch.py index c7c08d081a04ff72ae2a198e65091d042bd8d599..333884f361983e2a465715f3f4119c9c6384558e 100644 --- a/src/finn/custom_op/fpgadataflow/vector_vector_activate_batch.py +++ b/src/finn/custom_op/fpgadataflow/vector_vector_activate_batch.py @@ -1,9 +1,10 @@ import os import numpy as np +import math from onnx import TensorProto, helper from finn.core.datatype import DataType -from finn.custom_op.fpgadataflow import HLSCustomOp +from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp from finn.util.basic import interleave_matrix_outer_dim_from_partitions from finn.util.data_packing import ( npy_to_rtlsim_input, @@ -24,14 +25,14 @@ class Vector_Vector_Activate_Batch(HLSCustomOp): "Dim": ("i", True, 0), "Channels": ("i", True, 0), "Kernel": ("i", True, 0), - "resType": ("s", True, ""), + "resType": ("s", False, "auto", {"auto", "lut", "dsp"}), "ActVal": ("i", False, 0), # FINN DataTypes for inputs, weights, outputs "inputDataType": ("s", True, ""), "weightDataType": ("s", True, ""), "outputDataType": ("s", True, ""), # no-activation mode (produce accumulators) - "noActivation": ("i", False, 0), + "noActivation": ("i", False, 0, {0, 1}), } my_attrs.update(super().get_nodeattr_types()) return my_attrs @@ -408,6 +409,11 @@ class Vector_Vector_Activate_Batch(HLSCustomOp): ) def docompute(self): + map_to_hls_mult_style = { + "auto": "ap_resource_dflt()", + "lut": "ap_resource_lut()", + "dsp": "ap_resource_dsp()", + } tmpl_args = self.get_template_param_values() if self.calc_tmem() == 0: odtype_hls_str = self.get_output_datatype().get_hls_datatype_str() @@ -423,7 +429,7 @@ class Vector_Vector_Activate_Batch(HLSCustomOp): tmpl_args["TDstI"], tmpl_args["TWeightI"], threshs, - self.get_nodeattr("resType"), + map_to_hls_mult_style[self.get_nodeattr("resType")], ) ] @@ -504,3 +510,99 @@ class Vector_Vector_Activate_Batch(HLSCustomOp): "complete dim=3" ) ) + + def bram_estimation(self): + """Calculates resource estimation for BRAM""" + # TODO add in/out FIFO contributions + P = self.get_nodeattr("PE") + wdt = self.get_weight_datatype() + W = wdt.bitwidth() + omega = self.calc_wmem() + # assuming SDP mode RAMB18s (see UG573 Table 1-10) + # since this is HLS memory, not using the full width of a BRAM + # assuming memories up to 128 deep get implemented in LUTs + if self.calc_wmem() <= 128: + return 0 + + if W == 1: + return math.ceil(omega / 16384) * P + elif W == 2: + return math.ceil(omega / 8192) * P + elif W <= 4: + return (math.ceil(omega / 4096)) * (math.ceil(W / 4)) * P + elif W <= 9: + return (math.ceil(omega / 2048)) * (math.ceil(W / 8)) * P + elif W <= 18 or omega > 512: + return (math.ceil(omega / 1024)) * (math.ceil(W / 16)) * P + else: + return (math.ceil(omega / 512)) * (math.ceil(W / 32)) * P + + def bram_efficiency_estimation(self): + P = self.get_nodeattr("PE") + wdt = self.get_weight_datatype() + W = wdt.bitwidth() + omega = self.calc_wmem() + bram16_est = self.bram_estimation() + if bram16_est == 0: + return 1 + wbits = W * P * omega + bram16_est_capacity = bram16_est * 36 * 512 + return wbits / bram16_est_capacity + + def lut_estimation(self): + """Calculates resource estimations for LUTs based on: + - FINN-R: An End-to-End Deep-Learning Framework for Fast + Exploration of Quantized Neural Networks + - M. Blott, T. B. Preusser, N. J. Fraser, G. Gambardella, K. O'Brien, + Y. Umuroglu, M. Leeser and K. Vissers + - 12. Sep 2018 + """ + # TODO add in/out FIFO contributions + P = self.get_nodeattr("PE") + wdt = self.get_weight_datatype() + W = wdt.bitwidth() + # determine tdt with input and weight data types + idt = self.get_input_datatype() + A = idt.bitwidth() + # parameters from experiments in paper mentioned above + c0 = 300 + c1 = 1.1 + c2 = 0 + if self.calc_wmem() <= 128: + c2 = P * W * math.ceil(self.calc_wmem() / 64) + + # multiplication + res_type = self.get_nodeattr("resType") + if res_type == "dsp": + mult_luts = 0 + else: + mult_luts = (2 * math.ceil((W + A) / 6) - 1) * (W + A) + # accumulator + k = self.get_nodeattr("Kernel") + acc_bits = W + A + math.ceil(math.log(k * k, 2)) + acc_luts = acc_bits + # thresholds and threshold comparators + thr_luts = 0 + comp_luts = 0 + noact = self.get_nodeattr("noActivation") + if noact == 0: + odt = self.get_output_datatype() + B = odt.bitwidth() + thr_luts = (2 ** B - 1) * acc_bits * math.ceil(self.calc_tmem() / 64) + comp_luts = (2 ** B - 1) * acc_bits + + return int(c0 + c1 * (P * (mult_luts + acc_luts + thr_luts + comp_luts)) + c2) + + def dsp_estimation(self): + # multiplication + P = self.get_nodeattr("PE") + res_type = self.get_nodeattr("resType") + wdt = self.get_weight_datatype() + W = wdt.bitwidth() + idt = self.get_input_datatype() + A = idt.bitwidth() + if res_type == "dsp": + mult_dsp = P * np.ceil((W + A) / 48) # TODO: more accurate modelling + else: + mult_dsp = 0 + return int(mult_dsp) diff --git a/src/finn/qnn-data/onnx/finn-hls-model/finn-hls-onnx-model.onnx b/src/finn/qnn-data/onnx/finn-hls-model/finn-hls-onnx-model.onnx deleted file mode 100644 index c2db9153f4a0269025da64f54b491ee6d511dbdd..0000000000000000000000000000000000000000 --- a/src/finn/qnn-data/onnx/finn-hls-model/finn-hls-onnx-model.onnx +++ /dev/null @@ -1,207 +0,0 @@ -finn-hls-onnx-model:º -R -inp memInStrm memInStrm"FIFO* -backend"fpgadataflow * -depth€ :finn -Ò - memInStrm -weights0 -thresh0out1"StreamingFCLayer_Batch* - -MH€ * - -MWÀ * -PE * -SIMD@ * -backend"fpgadataflow *! -resDataType"Recast<XnorMul> * -resType"ap_resource_lut() :finn -L -out1inter0inter0"FIFO* -backend"fpgadataflow * -depth :finn -Ï -inter0 -weights1 -thresh1out2"StreamingFCLayer_Batch* - -MH€ * - -MW€ * -PE@ * -SIMD * -backend"fpgadataflow *! -resDataType"Recast<XnorMul> * -resType"ap_resource_lut() :finn -L -out2inter1inter1"FIFO* -backend"fpgadataflow * -depth :finn -Ï -inter1 -weights2 -thresh2out3"StreamingFCLayer_Batch* - -MH€ * - -MW€ * -PE * -SIMD@ * -backend"fpgadataflow *! -resDataType"Recast<XnorMul> * -resType"ap_resource_lut() :finn -L -out3inter2inter2"FIFO* -backend"fpgadataflow * -depth :finn -Î -inter2 -weights3 -thresh3out4"StreamingFCLayer_Batch* -MH@ * - -MW€ * -PE * -SIMD * -backend"fpgadataflow *! -resDataType"Recast<XnorMul> * -resType"ap_resource_lut() :finn -O -out4outp -memOutStrm"FIFO* -backend"fpgadataflow * -depth€ :finnfinn_hls_onnx_graphZ -inp - - - -@b -outp - - - -@j - memInStrm - - - -@j -weights0 - -@ - - j% -thresh0 - - - - - -j -out1 - - - - j -inter0 - - - - j -weights1 - - -@ -€j% -thresh1 - - -@ - - -j -out2 - - - -@j -inter1 - - - -@j -weights2 - -@ - -€j% -thresh2 - - - - - -j -out3 - - - - j -inter2 - - - - j -weights3 - - - -€j% -thresh3 - - - - - -j -out4 - - - -@r -inp - finn_datatypeBIPOLARr -outp - finn_datatypeBIPOLARr% - memInStrm - finn_datatypeBIPOLARr$ -weights0 - finn_datatypeBIPOLARr# -thresh0 - finn_datatypeBIPOLARr -out1 - finn_datatypeBIPOLARr" -inter0 - finn_datatypeBIPOLARr$ -weights1 - finn_datatypeBIPOLARr# -thresh1 - finn_datatypeBIPOLARr -out2 - finn_datatypeBIPOLARr" -inter1 - finn_datatypeBIPOLARr$ -weights2 - finn_datatypeBIPOLARr# -thresh2 - finn_datatypeBIPOLARr -out3 - finn_datatypeBIPOLARr" -inter2 - finn_datatypeBIPOLARr$ -weights3 - finn_datatypeBIPOLARr# -thresh3 - finn_datatypeBIPOLARr -out4 - finn_datatypeBIPOLARB \ No newline at end of file diff --git a/src/finn/qnn-data/onnx/finn-hls-model/tfc_w1_a1_after_conv_to_hls.onnx b/src/finn/qnn-data/onnx/finn-hls-model/tfc_w1_a1_after_conv_to_hls.onnx deleted file mode 100644 index aada6f07e9d3910122d2eb357d8a8c1224e9fbab..0000000000000000000000000000000000000000 Binary files a/src/finn/qnn-data/onnx/finn-hls-model/tfc_w1_a1_after_conv_to_hls.onnx and /dev/null differ diff --git a/src/finn/transformation/fpgadataflow/cleanup.py b/src/finn/transformation/fpgadataflow/cleanup.py index f089317074eb2bded4675f6fd2e22fdaeb4b6a82..5dbe5f0517d07bef07e5ecff6e4c7afff0293d86 100644 --- a/src/finn/transformation/fpgadataflow/cleanup.py +++ b/src/finn/transformation/fpgadataflow/cleanup.py @@ -56,7 +56,7 @@ class CleanUp(Transformation): if is_fpgadataflow_node(node) is True: try: # lookup op_type in registry of CustomOps - inst = registry.custom_op[op_type](node) + inst = registry.getCustomOp(node) # delete code_gen_dir from cppsim code_gen_dir = inst.get_nodeattr("code_gen_dir_cppsim") if os.path.isdir(code_gen_dir): diff --git a/src/finn/transformation/fpgadataflow/compile_cppsim.py b/src/finn/transformation/fpgadataflow/compile_cppsim.py index e17feb4683189ad2f8174f0564a877f84870b51d..6321b3335907948fb49de966c80eb21637e0a6ec 100644 --- a/src/finn/transformation/fpgadataflow/compile_cppsim.py +++ b/src/finn/transformation/fpgadataflow/compile_cppsim.py @@ -52,7 +52,7 @@ class CompileCppSim(NodeLocalTransformation): if is_fpgadataflow_node(node) is True: try: # lookup op_type in registry of CustomOps - inst = registry.custom_op[op_type](node) + inst = registry.getCustomOp(node) # ensure that code is generated assert ( inst.get_nodeattr("code_gen_dir_cppsim") != "" diff --git a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py index f27ebc645dbee20ff97b64aa942e375250f60cbd..749cf6c91a975a2ffaffedefa77b2f3fcb793e32 100644 --- a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py +++ b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py @@ -105,7 +105,7 @@ class InferConvInpGen(Transformation): "FMPadding_Batch", [i2c_input], [padding_out], - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", ImgDim=ifm_dim, Padding=2 * pad, @@ -121,7 +121,7 @@ class InferConvInpGen(Transformation): "DownSampler", [ConvInpGen_input], [i2c_output], - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", ImgDim=ConvInpGen_idim, NumChannels=ifm_ch, @@ -136,7 +136,7 @@ class InferConvInpGen(Transformation): "ConvolutionInputGenerator", [ConvInpGen_input], [i2c_output], - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", ConvKernelDim=k, IFMChannels=ifm_ch, @@ -187,7 +187,7 @@ class InferStreamingMaxPool(Transformation): "StreamingMaxPool_Batch", [mp_input], [mp_output], - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", PoolDim=k, NumChannels=ifm_ch, @@ -314,7 +314,7 @@ class InferPool_Batch(Transformation): "Im2Col", [inp_trans_out], [im2col_out], - domain="finn", + domain="finn.custom_op.general", stride=stride, kernel_size=k, pad_amount=pad, @@ -331,7 +331,7 @@ class InferPool_Batch(Transformation): "Pool_Batch", [im2col_out], [pool_output], - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", InputDataType=idt.name, OutputDataType=odt.name, @@ -440,9 +440,8 @@ class InferBinaryStreamingFCLayer(Transformation): "StreamingFCLayer_Batch", [mm_input, mm_weight, mt_thres], [mt_output], - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", - resType="ap_resource_lut()", MW=mw, MH=mh, SIMD=simd, @@ -471,9 +470,8 @@ class InferBinaryStreamingFCLayer(Transformation): "StreamingFCLayer_Batch", [mm_input, mm_weight], [mm_output], - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", - resType="ap_resource_lut()", MW=mw, MH=mh, SIMD=simd, @@ -575,9 +573,8 @@ class InferQuantizedStreamingFCLayer(Transformation): "StreamingFCLayer_Batch", [mm_input, mm_weight, mt_thres], [mt_output], - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", - resType="ap_resource_lut()", MW=mw, MH=mh, SIMD=simd, @@ -606,9 +603,8 @@ class InferQuantizedStreamingFCLayer(Transformation): "StreamingFCLayer_Batch", [mm_input, mm_weight], [mm_output], - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", - resType="ap_resource_lut()", MW=mw, MH=mh, SIMD=simd, @@ -726,9 +722,9 @@ class InferVVAU(Transformation): "Vector_Vector_Activate_Batch", [mm_input, mm_weight, mt_thres], [mt_output], - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", - resType="ap_resource_lut()", + resType="lut", PE=pe, Dim=mm_in_shape[1], Channels=channels, @@ -754,9 +750,9 @@ class InferVVAU(Transformation): "Vector_Vector_Activate_Batch", [mm_input, mm_weight], [mm_output], - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", - resType="ap_resource_lut()", + resType="lut", PE=pe, Dim=mm_in_shape[1], Channels=channels, @@ -842,7 +838,7 @@ class InferThresholdingLayer(Transformation): "Thresholding_Batch", [thl_input, thl_threshold], [thl_output], - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", NumChannels=ifc, PE=pe, @@ -935,7 +931,7 @@ class InferAddStreamsLayer(Transformation): "AddStreams_Batch", [in0, in1], [result], - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", NumChannels=num_channels, PE=pe, @@ -995,7 +991,7 @@ class InferDuplicateStreamsLayer(Transformation): "DuplicateStreams_Batch", [output_tensor], out_tensor_clones, - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", NumChannels=num_ch, PE=pe, @@ -1160,7 +1156,7 @@ class InferChannelwiseLinearLayer(Transformation): "ChannelwiseOp_Batch", [ll_input, ll_const], [ll_output], - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", Func=func, NumChannels=ch, @@ -1221,7 +1217,7 @@ class InferLabelSelectLayer(Transformation): "LabelSelect_Batch", [fc_input], [idx_output], - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", Labels=num_labels, PE=pe, @@ -1297,7 +1293,7 @@ class InferGlobalAccPoolLayer(Transformation): "GlobalAccPool_Batch", [in0], [pool_out], - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", NumChannels=num_ch, PE=pe, diff --git a/src/finn/transformation/fpgadataflow/create_dataflow_partition.py b/src/finn/transformation/fpgadataflow/create_dataflow_partition.py index 90a92d11ce621897e3e6c687f57b1cdf77d08fba..6df9e6d1e62270b13f31560a99109c9b108f8025 100644 --- a/src/finn/transformation/fpgadataflow/create_dataflow_partition.py +++ b/src/finn/transformation/fpgadataflow/create_dataflow_partition.py @@ -126,7 +126,7 @@ class CreateDataflowPartition(Transformation): [df_out], # use the model attribute to mark the df model model=df_model_filename, - domain="finn", + domain="finn.custom_op.general", ) non_df_model.graph.node.insert(df_start_ind, df_node) model = non_df_model diff --git a/src/finn/transformation/fpgadataflow/create_stitched_ip.py b/src/finn/transformation/fpgadataflow/create_stitched_ip.py index f7643673a0ba326ab77e4379d524fc831fbbc9ca..cbd353e4ad9099d13f10deadb4c99c290713d370 100644 --- a/src/finn/transformation/fpgadataflow/create_stitched_ip.py +++ b/src/finn/transformation/fpgadataflow/create_stitched_ip.py @@ -32,7 +32,7 @@ import subprocess import json from finn.transformation.base import Transformation -from finn.util.basic import get_by_name, make_build_dir +from finn.util.basic import get_by_name, make_build_dir, is_finn_op from finn.custom_op.registry import getCustomOp from finn.util.basic import get_num_default_workers import multiprocessing as mp @@ -223,7 +223,7 @@ class CreateStitchedIP(Transformation): ip_dirs.append("/workspace/finn/finn-rtllib/memstream") # ensure that all nodes are fpgadataflow, and that IPs are generated for node in model.graph.node: - assert node.domain == "finn", 'Node domain is not set to "finn"' + assert is_finn_op(node.domain), "Found non-FINN node" backend_attribute = get_by_name(node.attribute, "backend") assert backend_attribute is not None, "Backend node attribute is not set." backend_value = backend_attribute.s.decode("UTF-8") diff --git a/src/finn/transformation/fpgadataflow/hlssynth_ip.py b/src/finn/transformation/fpgadataflow/hlssynth_ip.py index e79d70544c5e8d2b9060e354d7713b8405ae9c7f..bbd012a715e49b61c19daad65f8de889112f92a7 100644 --- a/src/finn/transformation/fpgadataflow/hlssynth_ip.py +++ b/src/finn/transformation/fpgadataflow/hlssynth_ip.py @@ -56,7 +56,7 @@ class HLSSynthIP(NodeLocalTransformation): if is_fpgadataflow_node(node) is True: try: # lookup op_type in registry of CustomOps - inst = registry.custom_op[op_type](node) + inst = registry.getCustomOp(node) # ensure that code is generated assert ( inst.get_nodeattr("code_gen_dir_ipgen") != "" diff --git a/src/finn/transformation/fpgadataflow/insert_dwc.py b/src/finn/transformation/fpgadataflow/insert_dwc.py index 195a005ff87b43c6b64017354895693cd811a48e..e26e92391edd8ac420e89c72fb34c5554c601967 100644 --- a/src/finn/transformation/fpgadataflow/insert_dwc.py +++ b/src/finn/transformation/fpgadataflow/insert_dwc.py @@ -4,6 +4,7 @@ from onnx import helper as oh from finn.custom_op.registry import getCustomOp from finn.transformation.base import Transformation from finn.util.fpgadataflow import is_fpgadataflow_node +import warnings def _is_dwc_node(node): @@ -40,48 +41,59 @@ class InsertDWC(Transformation): for n in graph.node: node_ind += 1 if _suitable_node(n): - n_output = n.output[0] - consumer = model.find_consumer(n_output) - if _suitable_node(consumer) is True: - n0 = getCustomOp(n) - n1 = getCustomOp(consumer) - n0_out_shape = n0.get_folded_output_shape() - n1_in_shape = n1.get_folded_input_shape() - if n0_out_shape[-1] != n1_in_shape[-1]: - graph_modified = True - # determine dwc inwidth - dwc_in_width = n0.get_outstream_width() - # determine dwc outwidth - dwc_out_width = n1.get_instream_width() - - # determine shape for dwc - dwc_shape = n0.get_normal_output_shape() - - # determine dtype for dwc - dtype = n0.get_output_datatype() - - dwc_output_tensor = oh.make_tensor_value_info( - model.make_new_valueinfo_name(), - TensorProto.FLOAT, - dwc_shape, + for n_output in n.output: + consumers = model.find_consumers(n_output) + if consumers is None: + continue + if len(consumers) > 1: + warnings.warn( + n.name + + ": HLS node with fan-out higher than 1 cannot be stitched" ) - graph.value_info.append(dwc_output_tensor) - - dwc_node = oh.make_node( - "StreamingDataWidthConverter_Batch", - [n_output], - [dwc_output_tensor.name], - domain="finn", - backend="fpgadataflow", - shape=dwc_shape, - inWidth=dwc_in_width, - outWidth=dwc_out_width, - dataType=str(dtype.name), - ) - # insert dwc - graph.node.insert(node_ind + 1, dwc_node) - # set dwc output tensor as new input tensor of second node - consumer.input[0] = dwc_output_tensor.name + consumer = consumers[0] + if _suitable_node(consumer) is True: + n0 = getCustomOp(n) + n1 = getCustomOp(consumer) + n0_out_shape = n0.get_folded_output_shape() + n1_in_shape = n1.get_folded_input_shape() + if n0_out_shape[-1] != n1_in_shape[-1]: + graph_modified = True + # determine dwc inwidth + dwc_in_width = n0.get_outstream_width() + # determine dwc outwidth + dwc_out_width = n1.get_instream_width() + + # determine shape for dwc + dwc_shape = n0.get_normal_output_shape() + + # determine dtype for dwc + dtype = n0.get_output_datatype() + + dwc_output_tensor = oh.make_tensor_value_info( + model.make_new_valueinfo_name(), + TensorProto.FLOAT, + dwc_shape, + ) + graph.value_info.append(dwc_output_tensor) + + dwc_node = oh.make_node( + "StreamingDataWidthConverter_Batch", + [n_output], + [dwc_output_tensor.name], + domain="finn.custom_op.fpgadataflow", + backend="fpgadataflow", + shape=dwc_shape, + inWidth=dwc_in_width, + outWidth=dwc_out_width, + dataType=str(dtype.name), + ) + # insert dwc + graph.node.insert(node_ind + 1, dwc_node) + + # set dwc output tensor as new input tensor of second node + for idx, inp in enumerate(consumer.input): + if inp == n_output: + consumer.input[idx] = dwc_output_tensor.name return (model, graph_modified) diff --git a/src/finn/transformation/fpgadataflow/insert_fifo.py b/src/finn/transformation/fpgadataflow/insert_fifo.py index a3056aaa15a5f00cdc7b33f5dba83820c76dfa10..def6babf82f8fb4bc290daa19efb4aeec074541c 100644 --- a/src/finn/transformation/fpgadataflow/insert_fifo.py +++ b/src/finn/transformation/fpgadataflow/insert_fifo.py @@ -116,7 +116,7 @@ class InsertFIFO(Transformation): "StreamingFIFO", [n_output], [fifo_output_tensor.name], - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", depth=fifo_depth, folded_shape=fld_shape, @@ -164,7 +164,7 @@ class InsertFIFO(Transformation): "StreamingFIFO", [n_input], [fifo_output_tensor.name], - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", depth=fifo_depth, folded_shape=fld_shape, @@ -210,7 +210,7 @@ class InsertFIFO(Transformation): "StreamingFIFO", [fifo_input_tensor.name], [graph_out_name], - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", depth=fifo_depth, folded_shape=fld_shape, diff --git a/src/finn/transformation/fpgadataflow/insert_iodma.py b/src/finn/transformation/fpgadataflow/insert_iodma.py index 1196035b22cf21c2de4901dc544875ebc80525d4..fe53bd39639462b8cebcdf5febe3b11e7eda96dc 100644 --- a/src/finn/transformation/fpgadataflow/insert_iodma.py +++ b/src/finn/transformation/fpgadataflow/insert_iodma.py @@ -139,7 +139,7 @@ class InsertIODMA(Transformation): intfWidth=intfwidth, streamWidth=streamWidth, direction="out", - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", ) model.graph.node.append(dma_node) @@ -172,7 +172,7 @@ class InsertIODMA(Transformation): intfWidth=intfwidth, streamWidth=streamWidth, direction="in", - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", ) model.graph.node.insert(0, dma_node) @@ -212,7 +212,7 @@ class InsertIODMA(Transformation): streamWidth=streamWidth, direction="in", burstMode="wrap", - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", ) fc_node.input[1] = fc_node_in.name diff --git a/src/finn/transformation/fpgadataflow/insert_tlastmarker.py b/src/finn/transformation/fpgadataflow/insert_tlastmarker.py index 8ffb083217bb3a7e379112b3da102487c0cd50c2..3ce9824b14a54f502c90650e7b3b75e9cdaab77f 100644 --- a/src/finn/transformation/fpgadataflow/insert_tlastmarker.py +++ b/src/finn/transformation/fpgadataflow/insert_tlastmarker.py @@ -84,7 +84,7 @@ class InsertTLastMarker(Transformation): DynIters=(1 if self.dyniters else 0), Direction="out", Protocol=("external" if self.external else "internal"), - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", ) model.graph.node.append(tlast_node) @@ -159,7 +159,7 @@ class InsertTLastMarker(Transformation): DynIters=(1 if self.dyniters else 0), Direction="in", Protocol=("external" if self.external else "internal"), - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", ) model.graph.node.insert(insert_idx, tlast_node) diff --git a/src/finn/transformation/fpgadataflow/prepare_cppsim.py b/src/finn/transformation/fpgadataflow/prepare_cppsim.py index 26354bdf70e10bfcddfbaf732a214865c6feb8f5..653ec02ff306bf35d5fd3f7265404e61641077ac 100644 --- a/src/finn/transformation/fpgadataflow/prepare_cppsim.py +++ b/src/finn/transformation/fpgadataflow/prepare_cppsim.py @@ -44,7 +44,7 @@ def _codegen_single_node(node, model): op_type = node.op_type try: # lookup op_type in registry of CustomOps - inst = registry.custom_op[op_type](node) + inst = registry.getCustomOp(node) # get the path of the code generation directory code_gen_dir = inst.get_nodeattr("code_gen_dir_cppsim") # ensure that there is a directory diff --git a/src/finn/transformation/fpgadataflow/prepare_ip.py b/src/finn/transformation/fpgadataflow/prepare_ip.py index 53cb0af163b853c0a0352d8562cca66b3ecf6068..4ed5e80aa7baa585f83314ec42233d5885dff32d 100644 --- a/src/finn/transformation/fpgadataflow/prepare_ip.py +++ b/src/finn/transformation/fpgadataflow/prepare_ip.py @@ -41,7 +41,7 @@ def _codegen_single_node(node, model, fpgapart, clk): op_type = node.op_type try: # lookup op_type in registry of CustomOps - inst = registry.custom_op[op_type](node) + inst = registry.getCustomOp(node) # get the path of the code generation directory code_gen_dir = inst.get_nodeattr("code_gen_dir_ipgen") # ensure that there is a directory diff --git a/src/finn/transformation/fpgadataflow/prepare_rtlsim.py b/src/finn/transformation/fpgadataflow/prepare_rtlsim.py index d2ec5561a349d5fc83f02870c1a682dba8433e43..eaa85b9102b55bf8ecdf3a9f284f87468581e113 100644 --- a/src/finn/transformation/fpgadataflow/prepare_rtlsim.py +++ b/src/finn/transformation/fpgadataflow/prepare_rtlsim.py @@ -65,7 +65,7 @@ class PrepareRTLSim(NodeLocalTransformation): if is_fpgadataflow_node(node) is True: try: # lookup op_type in registry of CustomOps - inst = registry.custom_op[op_type](node) + inst = registry.getCustomOp(node) inst.prepare_rtlsim() # ensure that executable path is now set assert ( diff --git a/src/finn/transformation/fpgadataflow/replace_verilog_relpaths.py b/src/finn/transformation/fpgadataflow/replace_verilog_relpaths.py index c577704129fa564f5e0e1e256623ff10125cf5ac..cc7c305b3ec94482e64235a1b1cf4eee543c46e1 100644 --- a/src/finn/transformation/fpgadataflow/replace_verilog_relpaths.py +++ b/src/finn/transformation/fpgadataflow/replace_verilog_relpaths.py @@ -41,11 +41,10 @@ class ReplaceVerilogRelPaths(Transformation): def apply(self, model): for node in model.graph.node: - op_type = node.op_type if is_fpgadataflow_node(node) is True: try: # lookup op_type in registry of CustomOps - inst = registry.custom_op[op_type](node) + inst = registry.getCustomOp(node) # find the IP gen dir ipgen_path = inst.get_nodeattr("ipgen_path") if ipgen_path is not None and os.path.isdir(ipgen_path): diff --git a/src/finn/transformation/fpgadataflow/set_exec_mode.py b/src/finn/transformation/fpgadataflow/set_exec_mode.py index 6a76031f4c76831f514b77aee6cd3c560b3b9910..4677e59f7b35fec38aeaae65485ed16ba1e18f06 100644 --- a/src/finn/transformation/fpgadataflow/set_exec_mode.py +++ b/src/finn/transformation/fpgadataflow/set_exec_mode.py @@ -45,7 +45,7 @@ class SetExecMode(Transformation): if is_fpgadataflow_node(node) is True: try: # lookup op_type in registry of CustomOps - inst = registry.custom_op[op_type](node) + inst = registry.getCustomOp(node) # set sim_mode accordingly to argument mode inst.set_nodeattr("exec_mode", self.mode) # ensure that sim_mode is now set diff --git a/src/finn/transformation/move_reshape.py b/src/finn/transformation/move_reshape.py index a07eaf142293487237b3f2b93460ba492eb5368d..cb8deaeec4b79d3c47d7705ff8f9bf72a085dfc0 100644 --- a/src/finn/transformation/move_reshape.py +++ b/src/finn/transformation/move_reshape.py @@ -1,10 +1,10 @@ from finn.transformation.base import Transformation -from finn.util.basic import get_by_name +from finn.util.basic import get_by_name, is_finn_op def _is_fpgadataflow_node(node): if node is not None: - if node.domain == "finn": + if is_finn_op(node.domain): n_backend = get_by_name(node.attribute, "backend") if n_backend is None: return False diff --git a/src/finn/transformation/streamline/reorder.py b/src/finn/transformation/streamline/reorder.py index bae3c9f22f4e5b2a525f15d1d948e42a4087953a..08a01171364c6f9c1ecc36b9f12f7447ad24e56c 100644 --- a/src/finn/transformation/streamline/reorder.py +++ b/src/finn/transformation/streamline/reorder.py @@ -584,7 +584,7 @@ class MakeMaxPoolNHWC(Transformation): perms = list(get_by_name(consumer.attribute, "perm").ints) if perms == [0, 2, 3, 1]: n.op_type = "MaxPoolNHWC" - n.domain = "finn" + n.domain = "finn.custom_op.general" start_name = n.input[0] mid_name = consumer.input[0] end_name = consumer.output[0] diff --git a/src/finn/transformation/streamline/sign_to_thres.py b/src/finn/transformation/streamline/sign_to_thres.py index 4e35012ceb4f84284ff2a96a60e4a9bd58a65cce..13f2e8524af7ce2d3457d0637f1c6d02733f504b 100644 --- a/src/finn/transformation/streamline/sign_to_thres.py +++ b/src/finn/transformation/streamline/sign_to_thres.py @@ -60,7 +60,7 @@ class ConvertSignToThres(Transformation): "MultiThreshold", [sign_in_name, thres_param_name], [sign_out_name], - domain="finn", + domain="finn.custom_op.general", out_scale=2.0, out_bias=-1.0, out_dtype="BIPOLAR", diff --git a/src/finn/util/create.py b/src/finn/util/create.py new file mode 100644 index 0000000000000000000000000000000000000000..d9c5d7b1b59916edfc8730992535f3ddb57c4d60 --- /dev/null +++ b/src/finn/util/create.py @@ -0,0 +1,178 @@ +# Copyright (c) 2020 Xilinx, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of Xilinx nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import numpy as np +from onnx import TensorProto, helper + +from finn.core.datatype import DataType +from finn.core.modelwrapper import ModelWrapper +from finn.util.basic import calculate_signed_dot_prod_range, gen_finn_dt_tensor + + +def hls_random_mlp_maker(layer_spec): + """Create an MLP of given specification using HLSCustomOp instances. + Generate random weights/thresholds of appropriate size.""" + ret = [] + for lyr in layer_spec: + idt = lyr["idt"] + wdt = lyr["wdt"] + mw = lyr["mw"] + mh = lyr["mh"] + act = lyr["act"] + lyr["W"] = gen_finn_dt_tensor(wdt, (mw, mh)) + if act is None: + # no activation, produce accumulators + T = None + tdt = None + if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR: + odt = DataType.UINT32 + else: + odt = DataType.INT32 + else: + odt = act + (min, max) = calculate_signed_dot_prod_range(idt, wdt, mw) + n_steps = act.get_num_possible_values() - 1 + T = np.random.randint(min, max - 1, (mh, n_steps)).astype(np.float32) + # provide non-decreasing thresholds + T = np.sort(T, axis=1) + # generate thresholds for activation + if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR: + tdt = DataType.UINT32 + # bias thresholds to be positive + T = np.ceil((T + mw) / 2) + assert (T >= 0).all() + else: + tdt = DataType.INT32 + lyr["T"] = T + lyr["tdt"] = tdt + lyr["odt"] = odt + ret.append(lyr) + + return hls_mlp_maker(ret) + + +def hls_mlp_maker(layer_spec): + """Create an MLP of given specification using HLSCustomOp instances.""" + + current_in_name = "" + current_out_name = "" + i = 0 + + graph = helper.make_graph(nodes=[], name="mlp", inputs=[], outputs=[]) + + model = helper.make_model(graph, producer_name="finn") + model = ModelWrapper(model) + + for lyr in layer_spec: + current_W_name = "W_%d" % i + current_T_name = "T_%d" % i + current_in_name = "act_%d" % i + current_out_name = "act_%d" % (i + 1) + + W = lyr["W"] + (mw, mh) = W.shape + T = lyr["T"] + pe = lyr["pe"] + simd = lyr["simd"] + wdt = lyr["wdt"] + idt = lyr["idt"] + tdt = lyr["tdt"] + odt = lyr["odt"] + + if i == 0: + global_in = helper.make_tensor_value_info( + current_in_name, TensorProto.FLOAT, [1, mw] + ) + model.graph.input.append(global_in) + + if i == len(layer_spec) - 1: + global_out = helper.make_tensor_value_info( + current_out_name, TensorProto.FLOAT, [1, mh] + ) + model.graph.output.append(global_out) + + # there are two ways to implement bipolar weights and inputs for + # StreamingFC: + # - specify their datatypes as such + # - specify their datatypes as BINARY as use binaryXnorMode + if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR: + # we'll internally convert weights/inputs to binary and specify the + # datatypes as such, and also set the binaryXnorMode attribute to 1 + export_wdt = DataType.BINARY + export_idt = DataType.BINARY + binary_xnor_mode = 1 + else: + export_wdt = wdt + export_idt = idt + binary_xnor_mode = 0 + + if T is not None: + no_act = 0 + node_inp_list = [current_in_name, current_W_name, current_T_name] + if odt == DataType.BIPOLAR: + actval = 0 + else: + actval = odt.min() + else: + # no thresholds + node_inp_list = [current_in_name, current_W_name] + actval = 0 + no_act = 1 + FCLayer_node = helper.make_node( + "StreamingFCLayer_Batch", + node_inp_list, + [current_out_name], + domain="finn.custom_op.fpgadataflow", + backend="fpgadataflow", + MW=mw, + MH=mh, + SIMD=simd, + PE=pe, + inputDataType=export_idt.name, + weightDataType=export_wdt.name, + outputDataType=odt.name, + ActVal=actval, + binaryXnorMode=binary_xnor_mode, + noActivation=no_act, + ) + + model.graph.node.append(FCLayer_node) + model.set_tensor_datatype(current_in_name, idt) + model.set_tensor_datatype(current_out_name, odt) + model.set_tensor_datatype(current_W_name, wdt) + if binary_xnor_mode: + # convert bipolar to binary + model.set_initializer(current_W_name, (W + 1) / 2) + else: + model.set_initializer(current_W_name, W) + if T is not None: + model.set_tensor_datatype(current_T_name, tdt) + model.set_initializer(current_T_name, T) + i += 1 + + return model diff --git a/tests/fpgadataflow/test_code_gen_trafo.py b/tests/fpgadataflow/test_code_gen_trafo.py index 24933759830535dfcec768d47a6020b4f3e2de35..cf3e064804216e192909eae75f01880554f03d9f 100644 --- a/tests/fpgadataflow/test_code_gen_trafo.py +++ b/tests/fpgadataflow/test_code_gen_trafo.py @@ -51,11 +51,10 @@ def test_code_gen_trafo(): "StreamingFCLayer_Batch", node_inp_list, ["outp"], - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", code_gen_dir="", executable_path="", - resType="ap_resource_lut()", MW=mw, MH=mh, SIMD=simd, diff --git a/tests/fpgadataflow/test_compilation_trafo.py b/tests/fpgadataflow/test_compilation_trafo.py index 65894e02e490f6931e5b03a9aa67b8f22e32583a..a12c69285b7b335f075d8ffd7ba27e039ebc6f8c 100644 --- a/tests/fpgadataflow/test_compilation_trafo.py +++ b/tests/fpgadataflow/test_compilation_trafo.py @@ -53,11 +53,10 @@ def test_compilation_trafo(): "StreamingFCLayer_Batch", node_inp_list, ["outp"], - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", code_gen_dir="", executable_path="", - resType="ap_resource_lut()", MW=mw, MH=mh, SIMD=simd, diff --git a/tests/fpgadataflow/test_convert_to_hls_conv_layer.py b/tests/fpgadataflow/test_convert_to_hls_conv_layer.py index aaffa3f7ed28116a9c1de9dd3b9dacba19954ee1..9d350a9342e3de56cbbb5b3fc4abec69bfc254dc 100644 --- a/tests/fpgadataflow/test_convert_to_hls_conv_layer.py +++ b/tests/fpgadataflow/test_convert_to_hls_conv_layer.py @@ -47,7 +47,7 @@ import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode -from finn.custom_op.im2col import compute_conv_output_dim +from finn.custom_op.general.im2col import compute_conv_output_dim from finn.custom_op.registry import getCustomOp from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer diff --git a/tests/fpgadataflow/test_convert_to_hls_pool_batch.py b/tests/fpgadataflow/test_convert_to_hls_pool_batch.py index 86409feffd120b1baeeee471415e93f29d9e655a..e8f3c3ae3290b5bdc23e46f7e9991222fdfac000 100644 --- a/tests/fpgadataflow/test_convert_to_hls_pool_batch.py +++ b/tests/fpgadataflow/test_convert_to_hls_pool_batch.py @@ -90,7 +90,7 @@ def make_single_quantavpool_modelwrapper(k, stride, ifm_ch, ifm_dim, ofm_dim, id "QuantAvgPool2d", ["inp"], ["outp"], - domain="finn", + domain="finn.custom_op.general", stride=stride, kernel=k, ibits=idt.bitwidth(), diff --git a/tests/fpgadataflow/test_create_dataflow_partition.py b/tests/fpgadataflow/test_create_dataflow_partition.py deleted file mode 100644 index 6732b92ae0865e390002bd3c65dfefe3890610e2..0000000000000000000000000000000000000000 --- a/tests/fpgadataflow/test_create_dataflow_partition.py +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright (c) 2020, Xilinx -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of FINN nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import os.path -from pkgutil import get_data - - -from finn.core.modelwrapper import ModelWrapper -from finn.custom_op.registry import getCustomOp -from finn.transformation.fpgadataflow.create_dataflow_partition import ( - CreateDataflowPartition, -) -from finn.transformation.fpgadataflow.insert_tlastmarker import InsertTLastMarker -from finn.util.basic import make_build_dir -from finn.util.test import load_test_checkpoint_or_skip - -build_dir = make_build_dir("test_dataflow_partition_") - - -def test_dataflow_partition_create(): - # load the onnx model - raw_m = get_data( - "finn.qnn-data", "onnx/finn-hls-model/tfc_w1_a1_after_conv_to_hls.onnx" - ) - model = ModelWrapper(raw_m) - model = model.transform(CreateDataflowPartition()) - assert model.graph.node[2].op_type == "StreamingDataflowPartition" - sdp_node = getCustomOp(model.graph.node[2]) - assert sdp_node.__class__.__name__ == "StreamingDataflowPartition" - assert os.path.isfile(sdp_node.get_nodeattr("model")) - model.save(build_dir + "/test_dataflow_partition_create.onnx") - - -def test_dataflow_partition_tlastmarker(): - model = load_test_checkpoint_or_skip( - build_dir + "/test_dataflow_partition_create.onnx" - ) - model_path = getCustomOp(model.graph.node[2]).get_nodeattr("model") - model = ModelWrapper(model_path) - model = model.transform(InsertTLastMarker()) - assert model.graph.node[-1].op_type == "TLastMarker" - assert model.graph.node[-1].domain == "finn" - tl_node = getCustomOp(model.graph.node[-1]) - assert tl_node.get_nodeattr("NumIters") == 1 - assert tl_node.get_nodeattr("StreamWidth") == 320 - assert tl_node.get_nodeattr("ElemWidth") == 32 - model.save(build_dir + "/test_dataflow_partition_tlastmarker.onnx") - model = model.transform(InsertTLastMarker()) - model.save(build_dir + "/test_dataflow_partition_tlastmarker2.onnx") diff --git a/tests/fpgadataflow/test_depthwise_convolution.py b/tests/fpgadataflow/test_depthwise_convolution.py index f269a1ed7247503f561425b97115694503522171..7c608fc3863ab72d1097f49b793af73664b2be48 100644 --- a/tests/fpgadataflow/test_depthwise_convolution.py +++ b/tests/fpgadataflow/test_depthwise_convolution.py @@ -43,7 +43,7 @@ from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode import finn.core.onnx_exec as oxe -from finn.custom_op.im2col import compute_conv_output_dim +from finn.custom_op.general.im2col import compute_conv_output_dim from finn.util.basic import calculate_signed_dot_prod_range, gen_finn_dt_tensor from finn.custom_op.registry import getCustomOp @@ -70,7 +70,7 @@ def set_up_reference_model(act, idt, wdt, k, ifm_dim, ifm_ch, stride, padding): tdt = DataType.INT32 thresh_node = oh.make_node( "MultiThreshold", - domain="finn", + domain="finn.custom_op.general", inputs=["outp", "T"], outputs=["out_act"], data_layout="NHWC", @@ -93,7 +93,7 @@ def set_up_reference_model(act, idt, wdt, k, ifm_dim, ifm_ch, stride, padding): im2col_node = oh.make_node( "Im2Col", - domain="finn", + domain="finn.custom_op.general", inputs=["inp"], outputs=["im2col_out"], kernel_size=k, diff --git a/tests/fpgadataflow/test_fpgadataflow_addstreams.py b/tests/fpgadataflow/test_fpgadataflow_addstreams.py index e4191c75d6249d22b36e41fed50c5f7896f13c22..0fa156e23b4a01270297e4e8e1fdc13a75eb5a59 100644 --- a/tests/fpgadataflow/test_fpgadataflow_addstreams.py +++ b/tests/fpgadataflow/test_fpgadataflow_addstreams.py @@ -55,7 +55,7 @@ def make_addstreams_modelwrapper(ch, pe, idt): "AddStreams_Batch", ["inp1", "inp2"], ["outp"], - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", NumChannels=ch, PE=pe, diff --git a/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py b/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py index 0e0ce7d542f605cc6af5df13b45d670cfcafa5a9..e45dfe07c3abc0ce218dee0563055acb4458ccd0 100644 --- a/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py +++ b/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py @@ -61,7 +61,7 @@ def make_modelwrapper(C, pe, idt, odt, pdt, func, vecs): "ChannelwiseOp_Batch", node_inp_list, ["outp"], - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", NumChannels=NumChannels, Func=func, diff --git a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py index 020a2a545dadaf32c469789c90d0ea530688812c..0e2e60534bcc871592128fdbbd5ca52b3cc0fe4f 100644 --- a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py +++ b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py @@ -60,7 +60,7 @@ def make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, simd, stride, i "Im2Col", ["inp"], ["outp"], - domain="finn", + domain="finn.custom_op.general", backend="fpgadataflow", stride=stride, kernel_size=k, @@ -96,7 +96,7 @@ def make_single_slidingwindow_modelwrapper( "ConvolutionInputGenerator", ["inp"], ["outp"], - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", ConvKernelDim=k, IFMChannels=ifm_ch, diff --git a/tests/fpgadataflow/test_fpgadataflow_duplicatestreams.py b/tests/fpgadataflow/test_fpgadataflow_duplicatestreams.py index 47401ddb9546d5b32a5d36c6731981aabe0ca7cd..12505fdf456aa55f881fb5f3d2d609080cc97074 100644 --- a/tests/fpgadataflow/test_fpgadataflow_duplicatestreams.py +++ b/tests/fpgadataflow/test_fpgadataflow_duplicatestreams.py @@ -58,7 +58,7 @@ def make_dupstreams_modelwrapper(ch, pe, idim, idt): "DuplicateStreams_Batch", ["inp"], ["outp0", "outp1"], - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", NumChannels=ch, PE=pe, diff --git a/tests/fpgadataflow/test_fpgadataflow_dwc.py b/tests/fpgadataflow/test_fpgadataflow_dwc.py index 90b3145805f0c1ba59c7225b121b14b124ffe878..34930e672f3ff9816d3328da102b1bc1daa8a3b1 100644 --- a/tests/fpgadataflow/test_fpgadataflow_dwc.py +++ b/tests/fpgadataflow/test_fpgadataflow_dwc.py @@ -50,7 +50,7 @@ def make_single_dwc_modelwrapper(Shape, INWidth, OUTWidth, finn_dtype): "StreamingDataWidthConverter_Batch", ["inp"], ["outp"], - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", shape=Shape, inWidth=INWidth, diff --git a/tests/fpgadataflow/test_fpgadataflow_fclayer.py b/tests/fpgadataflow/test_fpgadataflow_fclayer.py index feff580002c317a3a8754dba2b6a9f291e408ac5..00f1ba5d59288b1a463fadbd684ff872269d6970 100644 --- a/tests/fpgadataflow/test_fpgadataflow_fclayer.py +++ b/tests/fpgadataflow/test_fpgadataflow_fclayer.py @@ -33,11 +33,11 @@ from onnx import TensorProto, helper from finn.custom_op.registry import getCustomOp import finn.core.onnx_exec as oxe -import finn.custom_op.xnorpopcount as xp +import finn.custom_op.general.xnorpopcount as xp from finn.analysis.fpgadataflow.hls_synth_res_estimation import hls_synth_res_estimation from finn.core.datatype import DataType from finn.core.modelwrapper import ModelWrapper -from finn.custom_op.multithreshold import multithreshold +from finn.custom_op.general.multithreshold import multithreshold from finn.transformation.fpgadataflow.prepare_ip import PrepareIP from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim @@ -88,9 +88,8 @@ def make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T=None, tdt=Non "StreamingFCLayer_Batch", node_inp_list, ["outp"], - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", - resType="ap_resource_lut()", MW=mw, MH=mh, SIMD=simd, diff --git a/tests/fpgadataflow/test_fpgadataflow_fifo.py b/tests/fpgadataflow/test_fpgadataflow_fifo.py index 77c518966c15ae002b6e88c244c1ee9e853c29aa..1f1c9936139df4160bd08a0e168d1f4b7e639077 100644 --- a/tests/fpgadataflow/test_fpgadataflow_fifo.py +++ b/tests/fpgadataflow/test_fpgadataflow_fifo.py @@ -55,7 +55,7 @@ def make_single_fifo_modelwrapper(Shape, Depth, fld_shape, finn_dtype): "StreamingFIFO", ["inp"], ["outp"], - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", depth=Depth, folded_shape=fld_shape, diff --git a/tests/fpgadataflow/test_fpgadataflow_fmpadding.py b/tests/fpgadataflow/test_fpgadataflow_fmpadding.py index 8b38b2520c2239be822093da70fb29f6545c0b43..b2835d578b03ee689330d53a9a7b233c9b9f4222 100644 --- a/tests/fpgadataflow/test_fpgadataflow_fmpadding.py +++ b/tests/fpgadataflow/test_fpgadataflow_fmpadding.py @@ -69,7 +69,7 @@ def make_single_fmpadding_modelwrapper(idim, padding, num_ch, simd, idt, pad_sty "FMPadding_Batch", ["inp"], ["outp"], - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", ImgDim=idim, Padding=padding, diff --git a/tests/fpgadataflow/test_fpgadataflow_globalaccpool.py b/tests/fpgadataflow/test_fpgadataflow_globalaccpool.py index 191e00022a0b0ab11fcf4d1a476442cbd824408d..7fca91925a63a5da4294adb002a3cc97831a88ca 100644 --- a/tests/fpgadataflow/test_fpgadataflow_globalaccpool.py +++ b/tests/fpgadataflow/test_fpgadataflow_globalaccpool.py @@ -54,7 +54,7 @@ def make_accpool_modelwrapper(ch, pe, idim, idt): "GlobalAccPool_Batch", ["inp"], ["outp"], - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", NumChannels=ch, PE=pe, diff --git a/tests/fpgadataflow/test_fpgadataflow_ipstitch.py b/tests/fpgadataflow/test_fpgadataflow_ipstitch.py index 2e9d25778147b1aa774c56f877c35d094c62e2bc..306844c7ef3828d8483d3b0006491864f1525e21 100644 --- a/tests/fpgadataflow/test_fpgadataflow_ipstitch.py +++ b/tests/fpgadataflow/test_fpgadataflow_ipstitch.py @@ -88,9 +88,8 @@ def create_one_fc_model(mem_mode="const"): "StreamingFCLayer_Batch", ["inp", "w0"], ["outp"], - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", - resType="ap_resource_lut()", MW=m, MH=m, SIMD=simd, @@ -143,9 +142,8 @@ def create_two_fc_model(mem_mode="decoupled"): "StreamingFCLayer_Batch", ["inp", "w0"], ["mid"], - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", - resType="ap_resource_lut()", MW=m, MH=m, SIMD=simd, @@ -163,9 +161,8 @@ def create_two_fc_model(mem_mode="decoupled"): "StreamingFCLayer_Batch", ["mid", "w1"], ["outp"], - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", - resType="ap_resource_lut()", MW=m, MH=m, SIMD=simd, @@ -263,23 +260,23 @@ def test_fpgadataflow_ipstitch_rtlsim(mem_mode): "m_axis_0_tlast", "m_axis_0_tready", "m_axis_0_tvalid", - "s_axi_control_araddr", - "s_axi_control_arready", - "s_axi_control_arvalid", - "s_axi_control_awaddr", - "s_axi_control_awready", - "s_axi_control_awvalid", - "s_axi_control_bready", - "s_axi_control_bresp", - "s_axi_control_bvalid", - "s_axi_control_rdata", - "s_axi_control_rready", - "s_axi_control_rresp", - "s_axi_control_rvalid", - "s_axi_control_wdata", - "s_axi_control_wready", - "s_axi_control_wstrb", - "s_axi_control_wvalid", + "s_axi_control_0_araddr", + "s_axi_control_0_arready", + "s_axi_control_0_arvalid", + "s_axi_control_0_awaddr", + "s_axi_control_0_awready", + "s_axi_control_0_awvalid", + "s_axi_control_0_bready", + "s_axi_control_0_bresp", + "s_axi_control_0_bvalid", + "s_axi_control_0_rdata", + "s_axi_control_0_rready", + "s_axi_control_0_rresp", + "s_axi_control_0_rvalid", + "s_axi_control_0_wdata", + "s_axi_control_0_wready", + "s_axi_control_0_wstrb", + "s_axi_control_0_wvalid", ] assert sorted(dir(sim.io)) == sorted(exp_io) model.set_metadata_prop("exec_mode", "rtlsim") diff --git a/tests/fpgadataflow/test_fpgadataflow_labelselect.py b/tests/fpgadataflow/test_fpgadataflow_labelselect.py index dae91b94120e94eb86bbc4b958decd581f36e671..5d496dbb33d21c9092fb2076cac75b3ccbbaa1e9 100644 --- a/tests/fpgadataflow/test_fpgadataflow_labelselect.py +++ b/tests/fpgadataflow/test_fpgadataflow_labelselect.py @@ -53,7 +53,7 @@ def make_labelselect_modelwrapper(labels, pe, k, idt): "LabelSelect_Batch", ["inp"], ["outp"], - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", Labels=labels, PE=pe, diff --git a/tests/fpgadataflow/test_fpgadataflow_res_estimate.py b/tests/fpgadataflow/test_fpgadataflow_res_estimate.py index 398a17132a2ef6c92e600102ff5c0b71a1f65aaa..06ebd90000e7466b2781d3284c5a0a0e56733dea 100644 --- a/tests/fpgadataflow/test_fpgadataflow_res_estimate.py +++ b/tests/fpgadataflow/test_fpgadataflow_res_estimate.py @@ -28,7 +28,10 @@ from onnx import TensorProto, helper -from finn.analysis.fpgadataflow.res_estimation import res_estimation +from finn.analysis.fpgadataflow.res_estimation import ( + res_estimation, + res_estimation_complete, +) from finn.core.datatype import DataType from finn.core.modelwrapper import ModelWrapper from finn.transformation.general import GiveUniqueNodeNames @@ -53,7 +56,7 @@ def test_res_estimate(): pe = 1 idt = DataType.INT2 wdt = DataType.INT2 - odt = DataType.INT32 + odt = DataType.INT2 actval = odt.min() inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, mw]) @@ -64,9 +67,8 @@ def test_res_estimate(): "StreamingFCLayer_Batch", node_inp_list, ["outp"], - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", - resType="ap_resource_lut()", MW=mw, MH=mh, SIMD=simd, @@ -92,10 +94,29 @@ def test_res_estimate(): model = model.transform(GiveUniqueNodeNames()) prod_resource_estimation = model.analysis(res_estimation) expect_resource_estimation = { - "StreamingFCLayer_Batch_0": {"BRAM_18K": 1, 'BRAM_efficiency': 0.001736111111111111, "LUT": 304.4} + "StreamingFCLayer_Batch_0": { + "BRAM_18K": 0, + "BRAM_efficiency": 1, + "LUT": 357, + "DSP": 0, + "URAM": 0, + } } assert check_two_dict_for_equality( prod_resource_estimation, expect_resource_estimation ), """The produced output of - the resource estimation analysis pass is not equal to the expected one""" + the res_estimation analysis pass is not equal to the expected one""" + + prod_resource_estimation = model.analysis(res_estimation_complete) + expect_resource_estimation = { + "StreamingFCLayer_Batch_0": [ + {"BRAM_18K": 0, "BRAM_efficiency": 1, "LUT": 352, "DSP": 1, "URAM": 0}, + {"BRAM_18K": 0, "BRAM_efficiency": 1, "LUT": 357, "DSP": 0, "URAM": 0}, + ] + } + + assert check_two_dict_for_equality( + prod_resource_estimation, expect_resource_estimation + ), """The produced output of + the res_estimation_complete analysis pass is not equal to the expected one""" diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding.py b/tests/fpgadataflow/test_fpgadataflow_thresholding.py index 8461efd15576fc04906b7f48b2629ad83835de38..5d46f4c3db35c159458dfc9e0eb8aae8ee89cb20 100644 --- a/tests/fpgadataflow/test_fpgadataflow_thresholding.py +++ b/tests/fpgadataflow/test_fpgadataflow_thresholding.py @@ -35,7 +35,7 @@ import finn.core.onnx_exec as oxe from finn.analysis.fpgadataflow.hls_synth_res_estimation import hls_synth_res_estimation from finn.core.datatype import DataType from finn.core.modelwrapper import ModelWrapper -from finn.custom_op.multithreshold import multithreshold +from finn.custom_op.general.multithreshold import multithreshold from finn.transformation.fpgadataflow.prepare_ip import PrepareIP from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim @@ -67,7 +67,7 @@ def make_single_thresholding_modelwrapper(T, pe, idt, odt, actval, mem_mode): "Thresholding_Batch", node_inp_list, ["outp"], - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", NumChannels=NumChannels, PE=pe, diff --git a/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py b/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py index d61edc86dd6b5669c334e6b7f78ea9a8550cae93..ff88536f477e80e5c92a2c352f0af81488997c7f 100644 --- a/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py +++ b/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py @@ -59,7 +59,7 @@ def make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt): "MaxPoolNHWC", ["inp"], ["outp"], - domain="finn", + domain="finn.custom_op.general", kernel_shape=[k, k], strides=[k, k], pads=[0, 0, 0, 0], @@ -90,7 +90,7 @@ def make_single_streamingmaxpool_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt): "StreamingMaxPool_Batch", ["inp"], ["outp"], - domain="finn", + domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", PoolDim=k, NumChannels=ifm_ch, diff --git a/tests/transformation/streamline/test_move_chw_add_past_conv.py b/tests/transformation/streamline/test_move_chw_add_past_conv.py index b626f7e5b8564739ec383aaddfc262d642bf47cc..fc64a04e40036eae7057c15f4e628155bd563e51 100644 --- a/tests/transformation/streamline/test_move_chw_add_past_conv.py +++ b/tests/transformation/streamline/test_move_chw_add_past_conv.py @@ -34,7 +34,7 @@ from onnx import helper, TensorProto from finn.core.modelwrapper import ModelWrapper from finn.transformation.infer_shapes import InferShapes from finn.transformation.streamline.reorder import MoveAddPastConv -from finn.custom_op.im2col import compute_conv_output_dim +from finn.custom_op.general.im2col import compute_conv_output_dim import finn.core.onnx_exec as oxe diff --git a/tests/transformation/streamline/test_move_maxpool_past_multithreshold.py b/tests/transformation/streamline/test_move_maxpool_past_multithreshold.py index 2fc19debf8d6fc89d15e3d731f1e54daa491c321..7c49baf8cd9d5b85b3b76f3513d42483d3bbeb0c 100644 --- a/tests/transformation/streamline/test_move_maxpool_past_multithreshold.py +++ b/tests/transformation/streamline/test_move_maxpool_past_multithreshold.py @@ -52,7 +52,7 @@ def test_move_maxpool_past_multithreshold(): "MultiThreshold", ["t1", "thres1"], ["t2"], - domain="finn", + domain="finn.custom_op.general", out_dtype="BIPOLAR", out_bias=-1.0, out_scale=1.0, @@ -64,7 +64,7 @@ def test_move_maxpool_past_multithreshold(): "MultiThreshold", ["t3", "thres2"], ["top_out"], - domain="finn", + domain="finn.custom_op.general", out_dtype="UINT4", ) ] diff --git a/tests/transformation/streamline/test_move_mul_past_dw_conv.py b/tests/transformation/streamline/test_move_mul_past_dw_conv.py index 1ae8fbfe89986d58d3d71f5f8735a98469d9d1e3..5e96d15867b087fbb5f4f1b467aea34cb33e3ff4 100644 --- a/tests/transformation/streamline/test_move_mul_past_dw_conv.py +++ b/tests/transformation/streamline/test_move_mul_past_dw_conv.py @@ -1,7 +1,7 @@ import pytest from onnx import helper, TensorProto -from finn.custom_op.im2col import compute_conv_output_dim +from finn.custom_op.general.im2col import compute_conv_output_dim import finn.core.onnx_exec as oxe from finn.core.datatype import DataType from finn.core.modelwrapper import ModelWrapper diff --git a/tests/transformation/streamline/test_round_thresholds.py b/tests/transformation/streamline/test_round_thresholds.py index d59aba996201f8c2fc67cf6e40497b5d43611d39..f9259908a2b4e4d716e3fb9ae7ec28cd9ec85d03 100644 --- a/tests/transformation/streamline/test_round_thresholds.py +++ b/tests/transformation/streamline/test_round_thresholds.py @@ -40,7 +40,7 @@ def test_round_thresholds(): thresholds = helper.make_tensor_value_info("thresholds", TensorProto.FLOAT, [4, 1]) out = helper.make_tensor_value_info("out", TensorProto.FLOAT, [1, 4]) node_def = helper.make_node( - "MultiThreshold", ["v", "thresholds"], ["out"], domain="finn" + "MultiThreshold", ["v", "thresholds"], ["out"], domain="finn.custom_op.general" ) graph_def = helper.make_graph([node_def], "test_model", [v, thresholds], [out]) model_def = helper.make_model(graph_def) diff --git a/tests/util/test_create.py b/tests/util/test_create.py new file mode 100644 index 0000000000000000000000000000000000000000..42a288b74ecda9746296519b1b86563c75b2752e --- /dev/null +++ b/tests/util/test_create.py @@ -0,0 +1,65 @@ +# Copyright (c) 2020 Xilinx, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of Xilinx nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import pytest + +import finn.util.create as create +from finn.core.datatype import DataType + + +@pytest.mark.parametrize("bitwidth", [DataType.BIPOLAR, DataType.INT2, DataType.INT4]) +def test_hls_random_mlp_maker(bitwidth): + w = bitwidth + a = bitwidth + layer_spec = [ + { + "mw": 185, + "mh": 100, + "simd": 185, + "pe": 100, + "idt": DataType.BIPOLAR, + "wdt": w, + "act": a, + }, + {"mw": 100, "mh": 100, "simd": 100, "pe": 100, "idt": a, "wdt": w, "act": a}, + {"mw": 100, "mh": 100, "simd": 100, "pe": 100, "idt": a, "wdt": w, "act": a}, + {"mw": 100, "mh": 100, "simd": 100, "pe": 100, "idt": a, "wdt": w, "act": a}, + { + "mw": 100, + "mh": 1, + "simd": 100, + "pe": 1, + "idt": a, + "wdt": w, + "act": DataType.BIPOLAR, + }, + ] + + ret = create.hls_random_mlp_maker(layer_spec) + assert len(ret.graph.node) == 5 + # ret.save("mlp-%s.onnx" % str(bitwidth))