diff --git a/docker/Dockerfile.finn_dev b/docker/Dockerfile.finn_dev index f8e15f34fb4da3dc4ee353a29d26866b68879144..db49dceb2d06670dfc43059d3a4fa6160a8ded58 100644 --- a/docker/Dockerfile.finn_dev +++ b/docker/Dockerfile.finn_dev @@ -49,13 +49,14 @@ RUN echo "StrictHostKeyChecking no" >> /etc/ssh/ssh_config COPY requirements.txt . RUN pip install -r requirements.txt RUN rm requirements.txt -RUN pip install jupyter -RUN pip install matplotlib -RUN pip install pytest-dependency -RUN pip install sphinx -RUN pip install sphinx_rtd_theme -RUN pip install pytest-xdist -RUN pip install pytest-parallel +RUN pip install jupyter==1.0.0 +RUN pip install matplotlib==3.3.1 --ignore-installed certifi +RUN pip install pytest-dependency==0.5.1 +RUN pip install sphinx==3.1.2 +RUN pip install sphinx_rtd_theme==0.5.0 +RUN pip install pytest-xdist==2.0.0 +RUN pip install pytest-parallel==0.1.0 +RUN pip install netron==4.4.7 # switch user RUN groupadd -g $GID $GNAME @@ -80,19 +81,6 @@ RUN git clone https://github.com/maltanar/pyverilator /workspace/pyverilator RUN git clone https://github.com/maltanar/PYNQ-HelloWorld.git /workspace/PYNQ-HelloWorld # oh-my-xilinx RUN git clone https://bitbucket.org/maltanar/oh-my-xilinx.git /workspace/oh-my-xilinx -# netron -RUN git clone https://github.com/lutzroeder/netron.git /workspace/netron - -# build and install netron -USER root -RUN curl -sL https://deb.nodesource.com/setup_12.x | bash - -RUN apt-get install -y nodejs -WORKDIR /workspace/netron -RUN git checkout 376e9d33733a3eacfe3c432808fd46e6cd1460cb -RUN npm install -RUN python setup.py build -RUN pip install /workspace/netron -USER $UNAME # for this developer-oriented Docker container we assume the FINN repo is cloned and mounted from the host # at /workspace/finn -- see run-docker.sh for an example of how to do this. diff --git a/docker/quicktest.sh b/docker/quicktest.sh index 02e014cd3cc7bb88eebd02f03ff599913079152b..b06feccdc578a59c8ef00531871e1211c2a407e5 100755 --- a/docker/quicktest.sh +++ b/docker/quicktest.sh @@ -16,6 +16,11 @@ elif [ $1 = "rtlsim" ]; then elif [ $1 = "end2end" ]; then echo "Running end2end test suite with no parallelism" python setup.py test --addopts "-k end2end" +elif [ $1 = "full" ]; then + echo "Running full test suite, each step with appropriate parallelism" + $0 main; + $0 rtlsim; + $0 end2end; else echo "Unrecognized argument to quicktest.sh" fi diff --git a/requirements.txt b/requirements.txt index b15d86ed89f7b0e76b772ce42aba6481937310b0..4aa1cbe3484a3447851879d7da9ce9d48b066592 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,13 +1,11 @@ -bitstring -docrep -future +bitstring==3.1.7 +docrep==0.2.7 +future==0.18.2 numpy==1.18.0 onnx==1.6.0 onnxruntime==1.2.0 -pre-commit -pyverilator -scipy -sphinx -toposort -vcdvcd -wget +pre-commit==2.6.0 +scipy==1.5.2 +toposort==1.5 +vcdvcd==1.0.5 +wget==3.2 diff --git a/src/finn/core/datatype.py b/src/finn/core/datatype.py index 222d11a8872f9be757fd60fbfa5f8abea683311a..df895a1ad446d6b2cc3ebb24f1179944f4cfe9ab 100644 --- a/src/finn/core/datatype.py +++ b/src/finn/core/datatype.py @@ -50,17 +50,69 @@ class DataType(Enum): UINT2 = auto() UINT3 = auto() UINT4 = auto() + UINT5 = auto() + UINT6 = auto() + UINT7 = auto() UINT8 = auto() + UINT9 = auto() + UINT10 = auto() + UINT11 = auto() + UINT12 = auto() + UINT13 = auto() + UINT14 = auto() + UINT15 = auto() UINT16 = auto() + UINT17 = auto() + UINT18 = auto() + UINT19 = auto() + UINT20 = auto() + UINT21 = auto() + UINT22 = auto() + UINT23 = auto() + UINT24 = auto() + UINT25 = auto() + UINT26 = auto() + UINT27 = auto() + UINT28 = auto() + UINT29 = auto() + UINT30 = auto() + UINT31 = auto() UINT32 = auto() + UINT64 = auto() BIPOLAR = auto() TERNARY = auto() INT2 = auto() INT3 = auto() INT4 = auto() + INT5 = auto() + INT6 = auto() + INT7 = auto() INT8 = auto() + INT9 = auto() + INT10 = auto() + INT11 = auto() + INT12 = auto() + INT13 = auto() + INT14 = auto() + INT15 = auto() INT16 = auto() + INT17 = auto() + INT18 = auto() + INT19 = auto() + INT20 = auto() + INT21 = auto() + INT22 = auto() + INT23 = auto() + INT24 = auto() + INT25 = auto() + INT26 = auto() + INT27 = auto() + INT28 = auto() + INT29 = auto() + INT30 = auto() + INT31 = auto() INT32 = auto() + INT64 = auto() FLOAT32 = auto() def bitwidth(self): diff --git a/src/finn/core/modelwrapper.py b/src/finn/core/modelwrapper.py index 646add188c5d475cf37ccd33cf24d29d61754ae1..98b234592ebe0c704fafd1eed980325d8566e7e2 100644 --- a/src/finn/core/modelwrapper.py +++ b/src/finn/core/modelwrapper.py @@ -36,6 +36,11 @@ from onnx import TensorProto import finn.util.basic as util import finn.util.onnx as onnxutil from finn.core.datatype import DataType +from finn.transformation.general import ( + RemoveUnusedTensors, + RemoveStaticGraphInputs, + SortGraph, +) class ModelWrapper: @@ -87,7 +92,7 @@ class ModelWrapper: """Runs given anaylsis_fxn on this model and return resulting dict.""" return analysis_fxn(self) - def transform(self, transformation, make_deepcopy=True): + def transform(self, transformation, make_deepcopy=True, cleanup=True): """Applies given Transformation repeatedly until no more changes can be made and returns a transformed ModelWrapper instance. @@ -101,6 +106,22 @@ class ModelWrapper: (transformed_model, model_was_changed) = transformation.apply( transformed_model ) + if cleanup: + transformed_model.cleanup() + return transformed_model + + def cleanup(self): + "Run cleanup transformations on the model." + transformed_model = self + cleanup_transforms = [ + RemoveUnusedTensors(), + RemoveStaticGraphInputs(), + SortGraph(), + ] + for trn in cleanup_transforms: + transformed_model = transformed_model.transform( + trn, cleanup=False, make_deepcopy=False + ) return transformed_model def check_compatibility(self): diff --git a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py index 87f52eeea591ba42bf5374df3c93bcc3e4f8e944..ea6922123a1334a7ea0d0568e09c043e06490f38 100644 --- a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py +++ b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py @@ -36,6 +36,7 @@ from finn.custom_op.fpgadataflow import HLSCustomOp from finn.util.basic import ( interleave_matrix_outer_dim_from_partitions, roundup_to_integer_multiple, + calculate_matvec_accumulator_range, ) from finn.util.data_packing import ( npy_to_rtlsim_input, @@ -72,6 +73,8 @@ class StreamingFCLayer_Batch(HLSCustomOp): "inputDataType": ("s", True, ""), "weightDataType": ("s", True, ""), "outputDataType": ("s", True, ""), + # FINN DataType for accumulator -- auto-computed and updated + "accDataType": ("s", False, "INT32"), # use xnor-popcount for binary weights/inputs, thus treating them # as bipolar "binaryXnorMode": ("i", False, 0), @@ -424,6 +427,51 @@ class StreamingFCLayer_Batch(HLSCustomOp): ret = np.flip(ret, axis=-1) return ret + def minimize_accumulator_width(self, model): + weights = model.get_initializer(self.onnx_node.input[1]) + if len(self.onnx_node.input) > 2: + thresholds = model.get_initializer(self.onnx_node.input[2]) + else: + thresholds = None + idt = self.get_input_datatype() + # calculate minimum and maximum values of accumulator + (acc_min, acc_max) = calculate_matvec_accumulator_range(weights, idt) + if thresholds is not None: + threshold_tensor = self.get_hls_compatible_threshold_tensor(thresholds) + # set threshold datatype (and accumulator datatype implicitly) + min_threshold = thresholds.min() + max_threshold = thresholds.max() + # get range required by threshold values + tdt_min = min(acc_min, min_threshold) + tdt_max = max(acc_max, max_threshold) + if tdt_min < 0: + if abs(tdt_min) > tdt_max: + tdt = DataType.get_smallest_possible(tdt_min) + else: + tdt = DataType.get_smallest_possible(0 - tdt_max) + else: + tdt = DataType.get_smallest_possible(tdt_max) + assert np.vectorize(tdt.allowed)( + threshold_tensor + ).all(), "Thresholds can't be expressed with type %s" % str(tdt) + self.set_nodeattr("accDataType", tdt.name) + else: + if acc_min < 0: + if abs(acc_min) > acc_max: + adt = DataType.get_smallest_possible(acc_min) + else: + adt = DataType.get_smallest_possible(0 - acc_max) + else: + adt = DataType.get_smallest_possible(acc_max) + # ensure a datatype divisible by 8-bits in case this is the last node + bw = roundup_to_integer_multiple(adt.bitwidth(), 8) + new_adt_name = adt.name.replace(str(adt.bitwidth()), str(bw)) + adt = DataType[new_adt_name] + self.set_nodeattr("accDataType", adt.name) + # for no-activation nodes, output dt = acc dt + self.set_nodeattr("outputDataType", adt.name) + return DataType[self.get_nodeattr("accDataType")] + def get_hls_compatible_threshold_tensor(self, orig_thres_matrix): """Convert the original numpy weight matrix orig_weight_matrix into a form suitable for passing to the hlslib call: @@ -573,7 +621,6 @@ class StreamingFCLayer_Batch(HLSCustomOp): thresholds = model.get_initializer(self.onnx_node.input[2]) if thresholds is not None: threshold_tensor = self.get_hls_compatible_threshold_tensor(thresholds) - tdt = DataType.INT32 # use UINT32 threshold export for bipolar times bipolar inp_is_bipolar = self.get_input_datatype() == DataType.BIPOLAR wt_is_bipolar = self.get_weight_datatype() == DataType.BIPOLAR @@ -583,11 +630,12 @@ class StreamingFCLayer_Batch(HLSCustomOp): bin_xnor_mode = self.get_nodeattr("binaryXnorMode") == 1 inp_is_bipolar = inp_is_bipolar or (inp_is_binary and bin_xnor_mode) wt_is_bipolar = wt_is_bipolar or (wt_is_binary and bin_xnor_mode) - if inp_is_bipolar and wt_is_bipolar: - tdt = DataType.UINT32 + # get computed threshold datatype from attribute + tdt = DataType[self.get_nodeattr("accDataType")] + assert np.vectorize(tdt.allowed)( threshold_tensor - ).all(), "Thresholds are not int" + ).all(), "Thresholds can't be expressed with type %s" % str(tdt) thresholds_hls_code = numpy_to_hls_code( threshold_tensor, tdt, "thresholds", False, True ) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_batch.py b/src/finn/custom_op/fpgadataflow/thresholding_batch.py index 379ebd92d86d54c6bc621c7f89b01eacba2b5d3f..562bab0f18990096f7364b3a4e2bcbbbf4ce2b58 100644 --- a/src/finn/custom_op/fpgadataflow/thresholding_batch.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_batch.py @@ -283,10 +283,25 @@ class Thresholding_Batch(HLSCustomOp): thresholds = model.get_initializer(self.onnx_node.input[1]) threshold_tensor = self.get_hls_compatible_threshold_tensor(thresholds) - tdt = DataType.INT32 + + min_threshold = thresholds.min() + max_threshold = thresholds.max() + min_input = self.get_input_datatype().min() + max_input = self.get_input_datatype().max() + # get range required by threshold values + tdt_min = min(min_input, min_threshold) + tdt_max = max(max_input, max_threshold) + if tdt_min < 0: + if abs(tdt_min) > tdt_max: + tdt = DataType.get_smallest_possible(tdt_min) + else: + tdt = DataType.get_smallest_possible(0 - tdt_max - 1) + else: + tdt = DataType.get_smallest_possible(tdt_max) assert np.vectorize(tdt.allowed)( threshold_tensor - ).all(), "Thresholds are not int" + ).all(), "Thresholds can't be expressed with type %s" % str(tdt) + thresholds_hls_code = numpy_to_hls_code( threshold_tensor, tdt, "thresholds", False, True ) diff --git a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py index e6dca0e4b05f943c971bc0f97af03f5038fd0dab..88f5fa926f73d5cb1919a02c83153cb8d1894711 100644 --- a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py +++ b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py @@ -40,6 +40,9 @@ from finn.transformation.general import SortGraph import finn.core.data_layout as DataLayout from finn.util.onnx import nchw_to_nhwc from finn.util.basic import get_by_name +from finn.transformation.fpgadataflow.minimize_accumulator_width import ( + MinimizeAccumulatorWidth, +) class InferConvInpGen(Transformation): @@ -489,6 +492,7 @@ class InferBinaryStreamingFCLayer(Transformation): graph.node.remove(n) graph_modified = True if graph_modified: + model = model.transform(MinimizeAccumulatorWidth()) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) return (model, graph_modified) @@ -623,6 +627,7 @@ class InferQuantizedStreamingFCLayer(Transformation): graph.node.remove(n) graph_modified = True if graph_modified: + model = model.transform(MinimizeAccumulatorWidth()) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) return (model, graph_modified) diff --git a/src/finn/transformation/fpgadataflow/create_dataflow_partition.py b/src/finn/transformation/fpgadataflow/create_dataflow_partition.py index 5ec4ab14d65d63523856a6bb107bf75c1ca5a261..fb8b4358abd772d13c355f797649dc3b51975b4d 100644 --- a/src/finn/transformation/fpgadataflow/create_dataflow_partition.py +++ b/src/finn/transformation/fpgadataflow/create_dataflow_partition.py @@ -112,6 +112,7 @@ class CreateDataflowPartition(Transformation): "dataflow_partition" + str(target_partition_id) + "_" ) df_model_filename = df_model_dir + "/df_model.onnx" + df_model.cleanup() df_model.save(df_model_filename) # remove all dataflow nodes from the non-dataflow model # keep track of where the dataflow part starts diff --git a/src/finn/transformation/fpgadataflow/make_zynq_proj.py b/src/finn/transformation/fpgadataflow/make_zynq_proj.py index 1558d7399fe5399053c3f347cd06c4e0d76753e7..095327be0d3c36f201bcf343d8aea61aa069b8e1 100644 --- a/src/finn/transformation/fpgadataflow/make_zynq_proj.py +++ b/src/finn/transformation/fpgadataflow/make_zynq_proj.py @@ -294,11 +294,12 @@ class ZynqBuild(Transformation): # Build each kernel individually sdp_nodes = model.get_nodes_by_op_type("StreamingDataflowPartition") for sdp_node in sdp_nodes: + prefix = sdp_node.name + "_" sdp_node = getCustomOp(sdp_node) dataflow_model_filename = sdp_node.get_nodeattr("model") kernel_model = ModelWrapper(dataflow_model_filename) kernel_model = kernel_model.transform(InsertFIFO()) - kernel_model = kernel_model.transform(GiveUniqueNodeNames()) + kernel_model = kernel_model.transform(GiveUniqueNodeNames(prefix)) kernel_model.save(dataflow_model_filename) kernel_model = kernel_model.transform( PrepareIP(self.fpga_part, self.period_ns) diff --git a/src/finn/transformation/fpgadataflow/minimize_accumulator_width.py b/src/finn/transformation/fpgadataflow/minimize_accumulator_width.py new file mode 100644 index 0000000000000000000000000000000000000000..2c54a5efbd3b28f0fbfd074b512929edab234e78 --- /dev/null +++ b/src/finn/transformation/fpgadataflow/minimize_accumulator_width.py @@ -0,0 +1,48 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from finn.custom_op.registry import getCustomOp +from finn.transformation import Transformation +from finn.util.fpgadataflow import is_fpgadataflow_node + + +class MinimizeAccumulatorWidth(Transformation): + """For relevant nodes, call the accumulator width minimization + functions to save on resources. May alter tensor DataType for + certain nodes if they produce an accumulator as result.""" + + def __init__(self): + super().__init__() + + def apply(self, model): + for node in model.graph.node: + if is_fpgadataflow_node(node) is True: + inst = getCustomOp(node) + if hasattr(inst, "minimize_accumulator_width"): + inst.minimize_accumulator_width(model) + return (model, False) diff --git a/src/finn/transformation/general.py b/src/finn/transformation/general.py index 4303eb17f39a9949f5729e895e449bbb6a633033..8ad59d2baf3015cfebffeff88a059f48d9428371 100644 --- a/src/finn/transformation/general.py +++ b/src/finn/transformation/general.py @@ -81,14 +81,19 @@ class RemoveStaticGraphInputs(Transformation): class GiveUniqueNodeNames(Transformation): - """Give unique names to each node in the graph using enumeration.""" + """Give unique names to each node in the graph using enumeration, starting + with given prefix (if specified in the constructor).""" + + def __init__(self, prefix=""): + super().__init__() + self.prefix = prefix def apply(self, model): optype_count = {} for n in model.graph.node: if n.op_type not in optype_count.keys(): optype_count[n.op_type] = 0 - n.name = "%s_%d" % (n.op_type, optype_count[n.op_type]) + n.name = "%s%s_%d" % (self.prefix, n.op_type, optype_count[n.op_type]) optype_count[n.op_type] += 1 # return model_was_changed = False as single iteration is always enough return (model, False) @@ -189,6 +194,9 @@ class SortGraph(Transformation): # Probably this is faster than copying initializers and more robust in general def apply(self, model): + if len(model.graph.node) == 1: + # single-node graph, nothing to sort + return (model, False) # Gather graph structure graph_dependencies = {} node_list = [ @@ -214,7 +222,7 @@ class SortGraph(Transformation): for new_idx, sorted_idx in enumerate(sorted_node_indexes): model.graph.node.insert(new_idx, node_list[sorted_idx]) - return model, False + return (model, False) class ConvertSubToAdd(Transformation): diff --git a/src/finn/transformation/merge_onnx_models.py b/src/finn/transformation/merge_onnx_models.py index 5dc6127ed189311c72a119932394aca4745e3608..ceacab197150fe6d32e3a9eda268aed186b1a8bc 100644 --- a/src/finn/transformation/merge_onnx_models.py +++ b/src/finn/transformation/merge_onnx_models.py @@ -31,12 +31,12 @@ from onnx import helper from finn.transformation import Transformation from finn.core.modelwrapper import ModelWrapper -import finn.util.basic as util from finn.transformation.infer_shapes import InferShapes from finn.transformation.infer_datatypes import InferDataTypes from finn.transformation.infer_data_layouts import InferDataLayouts from finn.transformation.general import ( GiveReadableTensorNames, + GiveRandomTensorNames, GiveUniqueNodeNames, GiveUniqueParameterTensors, ) @@ -59,6 +59,9 @@ class MergeONNXModels(Transformation): graph_modified = False pre_model = self.pre_model post_model = copy.deepcopy(model) + # to avoid mix-ups, start by giving all tensors random names + pre_model = pre_model.transform(GiveRandomTensorNames()) + post_model = post_model.transform(GiveRandomTensorNames()) # check for dynamic outputs of pre model dyn_outp = [] @@ -94,27 +97,6 @@ class MergeONNXModels(Transformation): for n in post_model.graph.node: n.name = "" - # randomize all tensor names - names1 = pre_model.get_all_tensor_names() - names2 = post_model.get_all_tensor_names() - used_names = names1 + names2 - - # pre_model - for tensor_name in names1: - new_name = util.random_string() - while new_name in used_names: - new_name = util.random_string() - pre_model.rename_tensor(tensor_name, new_name) - used_names.append(new_name) - - # post_model - for tensor in names2: - new_name = util.random_string() - while new_name in used_names: - new_name = util.random_string() - post_model.rename_tensor(tensor_name, new_name) - used_names.append(new_name) - # check if models can be merged output_model_a = dyn_outp[0].name input_model_b = dyn_inp[0].name @@ -124,6 +106,9 @@ class MergeONNXModels(Transformation): output_a_shape == input_b_shape ), "Models can't be merged! Shapes don't match." + pre_model.save("pre.onnx") + post_model.save("post.onnx") + # connect output of one model to input of the other for n in pre_model.graph.node: if output_model_a == n.output[0]: @@ -132,83 +117,43 @@ class MergeONNXModels(Transformation): # extract information for new model # nodes - node_list_a = pre_model.graph.node - node_list_b = post_model.graph.node - - node_list = node_list_a - for node in node_list_b: - node_list.append(node) + node_pre = [node for node in pre_model.graph.node] + node_post = [node for node in post_model.graph.node] + node_new = node_pre + node_post # in and output inp = pre_model.graph.input[0] outp = post_model.graph.output[0] + vi_pre = [x for x in pre_model.graph.value_info] + out_pre = [x for x in pre_model.graph.output] + qa_pre = [x for x in pre_model.graph.quantization_annotation] + init_pre = [x for x in pre_model.graph.initializer] + + vi_post = [x for x in post_model.graph.value_info] + qa_post = [x for x in post_model.graph.quantization_annotation] + init_post = [x for x in post_model.graph.initializer] + + vi_new = vi_pre + vi_post + out_pre + qa_new = qa_pre + qa_post + init_new = init_pre + init_post + # create new graph and model new_graph = helper.make_graph( - nodes=node_list, + nodes=node_new, name="fuse-graph", inputs=[inp], outputs=[outp], - value_info=[], + value_info=vi_new, ) new_model = helper.make_model(new_graph, producer_name="fuse_model") new_model = ModelWrapper(new_model) - # add value info from both models to new model - # pre model - vi_pre = [x for x in pre_model.graph.input] - vi_pre += [x for x in pre_model.graph.output] - vi_pre += [x for x in pre_model.graph.value_info] - for vi in vi_pre: - # preserve intializers, quantization/sparsity annotation, etc. - # initializer - init_val = pre_model.get_initializer(vi.name) - if init_val is not None: - new_model.set_initializer(vi.name, init_val) - # FINN datatype - dtype = pre_model.get_tensor_datatype(vi.name) - new_model.set_tensor_datatype(vi.name, dtype) - # data layout - data_layout = pre_model.get_tensor_layout(vi.name) - if data_layout is not None: - new_model.set_tensor_layout(vi.name, data_layout) - # sparsity - sparsity = pre_model.get_tensor_sparsity(vi.name) - if sparsity is not None: - new_model.set_tensor_sparsity(vi.name, sparsity) - # graph input should not be part of graph.value_info, so don't insert - # if current vi == inp, but the quantization annotation is preserved - if vi == inp: - continue - new_model.graph.value_info.append(vi) - - # post model - vi_model = [x for x in post_model.graph.input] - vi_model += [x for x in post_model.graph.output] - vi_model += [x for x in post_model.graph.value_info] - for vi in vi_model: - # preserve intializers, quantization/sparsity annotation, etc. - # initializer - init_val = post_model.get_initializer(vi.name) - if init_val is not None: - new_model.set_initializer(vi.name, init_val) - # FINN datatype - dtype = post_model.get_tensor_datatype(vi.name) - new_model.set_tensor_datatype(vi.name, dtype) - # data layout - data_layout = post_model.get_tensor_layout(vi.name) - if data_layout is not None: - new_model.set_tensor_layout(vi.name, data_layout) - # sparsity - sparsity = post_model.get_tensor_sparsity(vi.name) - if sparsity is not None: - new_model.set_tensor_sparsity(vi.name, sparsity) - # graph output should not be part of graph.value_info, so don't insert - # if current vi == outp, but the quantization annotation is preserved - if vi == outp: - continue - new_model.graph.value_info.append(vi) + for i in init_new: + new_model.graph.initializer.append(i) + for qa in qa_new: + new_model.graph.quantization_annotation.append(qa) # tidy-up new model model = new_model diff --git a/src/finn/transformation/streamline/round_thresholds.py b/src/finn/transformation/streamline/round_thresholds.py index c33281d85449c173a4631297fd1d67ac0aed8c81..8626ef40619b067c6672c9017ddcb747998c3f2c 100644 --- a/src/finn/transformation/streamline/round_thresholds.py +++ b/src/finn/transformation/streamline/round_thresholds.py @@ -51,10 +51,20 @@ class RoundAndClipThresholds(Transformation): model.set_tensor_datatype(n.input[1], idtype) graph_modified = True if idtype.is_integer() and not idtype.signed() and (Tnew < 0).any(): - # clip any negative thresholds + # clip any negative thresholds if input is unsigned Tnew = np.clip(Tnew, 0, None) model.set_initializer(n.input[1], Tnew) # use same datatype as inputs for thresholds model.set_tensor_datatype(n.input[1], idtype) graph_modified = True + if idtype.is_integer() and ( + (Tnew < (idtype.min() - 1)).any() + or (Tnew > (idtype.max() + 1)).any() + ): + # clip any large thresholds to input range + 1 + Tnew = np.clip(Tnew, idtype.min() - 1, idtype.max() + 1) + model.set_initializer(n.input[1], Tnew) + # use same datatype as inputs for thresholds + model.set_tensor_datatype(n.input[1], idtype) + graph_modified = True return (model, graph_modified) diff --git a/src/finn/util/basic.py b/src/finn/util/basic.py index 6c92e9b2765b1c2be6f95ee148964bccfb3cd7be..cc759bebb1b856a84e25978d442e460332092d23 100644 --- a/src/finn/util/basic.py +++ b/src/finn/util/basic.py @@ -156,13 +156,19 @@ def make_build_dir(prefix=""): def get_by_name(container, name, name_field="name"): - """Return item from container by .name field if it exists, None otherwise""" + """Return item from container by .name field if it exists, None otherwise. + Will throw an Exception if multiple items are found, since this violates the + ONNX standard.""" names = [getattr(x, name_field) for x in container] - try: - ind = names.index(name) - return container[ind] - except ValueError: + + inds = [i for i, e in enumerate(names) if e == name] + if len(inds) > 1: + raise Exception("Found multiple get_by_name matches, undefined behavior") + elif len(inds) == 0: return None + else: + ind = inds[0] + return container[ind] def remove_by_name(container, name, name_field="name"): @@ -259,6 +265,33 @@ def pad_tensor_to_multiple_of(ndarray, pad_to_dims, val=0, distr_pad=False): return ret +def calculate_matvec_accumulator_range(matrix, vec_dt): + """Calculate the minimum and maximum possible result (accumulator) values + for a dot product x * A, given matrix A of dims (MW, MH), and vector (1, MW) + with datatype vec_dt. Returns (acc_min, acc_max). + """ + min_weight = matrix.min() + max_weight = matrix.max() + perceptive_field_elems = matrix.shape[0] + min_input = vec_dt.min() + max_input = vec_dt.max() + # calculate minimum and maximum values of accumulator + # assume inputs span the whole range of the input datatype + acc_min = perceptive_field_elems * min( + min_weight * max_input, + min_weight * min_input, + max_weight * max_input, + max_weight * min_input, + ) + acc_max = perceptive_field_elems * max( + min_weight * max_input, + min_weight * min_input, + max_weight * max_input, + max_weight * min_input, + ) + return (acc_min, acc_max) + + def gen_finn_dt_tensor(finn_dt, tensor_shape): """Generates random tensor in given shape and with given FINN DataType.""" if type(tensor_shape) == list: diff --git a/tests/end2end/test_end2end_cnv_w1a1.py b/tests/end2end/test_end2end_cnv_w1a1.py index ebca224389550929cebd542cf4201cf62481a169..f931f91c89f738899ff9e6584be81a3b2d542227 100644 --- a/tests/end2end/test_end2end_cnv_w1a1.py +++ b/tests/end2end/test_end2end_cnv_w1a1.py @@ -78,6 +78,7 @@ from finn.transformation.fpgadataflow.annotate_resources import AnnotateResource from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO from finn.core.throughput_test import throughput_test_rtlsim +import warnings build_dir = "/tmp/" + os.environ["FINN_INST_NAME"] test_pynq_board = os.getenv("PYNQ_BOARD", default="Pynq-Z1") @@ -317,6 +318,10 @@ def test_end2end_cnv_w1a1_synth_pynq_project(): ) model = model.transform(SynthPYNQProject()) model = model.transform(AnnotateResources("synth")) + warnings.warn( + "Post-synthesis resources (excluding shell): " + + model.get_metadata_prop("res_total_synth") + ) model.save(build_dir + "/end2end_cnv_w1a1_synth.onnx") diff --git a/tests/end2end/test_end2end_cnv_w2a2.py b/tests/end2end/test_end2end_cnv_w2a2.py index 2e34990007677ce1b8e0a9ae4a1781d4527ee040..239094a3c931c16b3afe8d1874345e4dc90334ef 100644 --- a/tests/end2end/test_end2end_cnv_w2a2.py +++ b/tests/end2end/test_end2end_cnv_w2a2.py @@ -77,6 +77,7 @@ from finn.transformation.fpgadataflow.annotate_resources import AnnotateResource from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO from finn.core.throughput_test import throughput_test_rtlsim +import warnings build_dir = "/tmp/" + os.environ["FINN_INST_NAME"] test_pynq_board = os.getenv("PYNQ_BOARD", default="Pynq-Z1") @@ -315,6 +316,10 @@ def test_end2end_cnv_w2a2_synth_pynq_project(): ) model = model.transform(SynthPYNQProject()) model = model.transform(AnnotateResources("synth")) + warnings.warn( + "Post-synthesis resources (excluding shell): " + + model.get_metadata_prop("res_total_synth") + ) model.save(build_dir + "/end2end_cnv_w2a2_synth.onnx") diff --git a/tests/end2end/test_end2end_tfc_w1a1.py b/tests/end2end/test_end2end_tfc_w1a1.py index b827cbb1c31cc84de9fa5d4df4d6b23e02a02a5f..1a3cc4f1bb9232809e864bb0c784498534f63631 100644 --- a/tests/end2end/test_end2end_tfc_w1a1.py +++ b/tests/end2end/test_end2end_tfc_w1a1.py @@ -79,6 +79,7 @@ from finn.transformation.fpgadataflow.annotate_resources import AnnotateResource from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim from finn.core.throughput_test import throughput_test_rtlsim import finn.util.vcd as vcd +import warnings build_dir = "/tmp/" + os.environ["FINN_INST_NAME"] test_pynq_board = os.getenv("PYNQ_BOARD", default="Pynq-Z1") @@ -241,11 +242,11 @@ def test_end2end_tfc_w1a1_throughput_test_rtlsim(): # run through IP-stitched rtlsim with increasing batch sizes and # check the number of cycles it takes to execute ret = throughput_test_rtlsim(model, 1) - assert ret["cycles"] == 205 + assert np.isclose(ret["cycles"], 205, atol=5) ret = throughput_test_rtlsim(model, 10) - assert ret["cycles"] == 844 + assert np.isclose(ret["cycles"], 844, atol=10) ret = throughput_test_rtlsim(model, 100) - assert ret["cycles"] == 7234 + assert np.isclose(ret["cycles"], 7234, atol=100) @pytest.mark.vivado @@ -314,6 +315,10 @@ def test_end2end_tfc_w1a1_synth_pynq_project(): ) model = model.transform(SynthPYNQProject()) model = model.transform(AnnotateResources("synth")) + warnings.warn( + "Post-synthesis resources (excluding shell): " + + model.get_metadata_prop("res_total_synth") + ) model.save(build_dir + "/end2end_tfc_w1a1_synth.onnx") diff --git a/tests/end2end/test_end2end_tfc_w1a2.py b/tests/end2end/test_end2end_tfc_w1a2.py index 755650e3d4da6947a93495fd5bbe0464cf485193..0f066cb06c53ce118d0a357fce0999299d7f3305 100644 --- a/tests/end2end/test_end2end_tfc_w1a2.py +++ b/tests/end2end/test_end2end_tfc_w1a2.py @@ -74,6 +74,7 @@ from finn.util.basic import pynq_part_map from finn.util.test import get_test_model_trained, load_test_checkpoint_or_skip from finn.transformation.fpgadataflow.annotate_resources import AnnotateResources from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim +import warnings build_dir = "/tmp/" + os.environ["FINN_INST_NAME"] test_pynq_board = os.getenv("PYNQ_BOARD", default="Pynq-Z1") @@ -270,6 +271,10 @@ def test_end2end_tfc_w1a2_synth_pynq_project(): ) model = model.transform(SynthPYNQProject()) model = model.transform(AnnotateResources("synth")) + warnings.warn( + "Post-synthesis resources (excluding shell): " + + model.get_metadata_prop("res_total_synth") + ) model.save(build_dir + "/end2end_tfc_w1a2_synth.onnx") diff --git a/tests/end2end/test_end2end_tfc_w2a2.py b/tests/end2end/test_end2end_tfc_w2a2.py index 4b2dd9ef01850897d95ede1214f87e9aa5b79f63..6eb613fc877b6e6801140f2a03c3a9509c08c0cb 100644 --- a/tests/end2end/test_end2end_tfc_w2a2.py +++ b/tests/end2end/test_end2end_tfc_w2a2.py @@ -74,6 +74,7 @@ from finn.util.basic import pynq_part_map from finn.util.test import get_test_model_trained, load_test_checkpoint_or_skip from finn.transformation.fpgadataflow.annotate_resources import AnnotateResources from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim +import warnings build_dir = "/tmp/" + os.environ["FINN_INST_NAME"] test_pynq_board = os.getenv("PYNQ_BOARD", default="Pynq-Z1") @@ -270,6 +271,10 @@ def test_end2end_tfc_w2a2_synth_pynq_project(): ) model = model.transform(SynthPYNQProject()) model = model.transform(AnnotateResources("synth")) + warnings.warn( + "Post-synthesis resources (excluding shell): " + + model.get_metadata_prop("res_total_synth") + ) model.save(build_dir + "/end2end_tfc_w2a2_synth.onnx") diff --git a/tests/end2end/test_zynqbuild_end2end_cnv_w1a1.py b/tests/end2end/test_zynqbuild_end2end_cnv_w1a1.py index 25cafcfd4c552fb368cbaca2d1d2714cf2d14011..a272fadc12f095034693e555e4d791e9e73262ab 100644 --- a/tests/end2end/test_zynqbuild_end2end_cnv_w1a1.py +++ b/tests/end2end/test_zynqbuild_end2end_cnv_w1a1.py @@ -63,6 +63,7 @@ from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul from finn.transformation.streamline.reorder import MakeMaxPoolNHWC from finn.transformation.infer_data_layouts import InferDataLayouts from finn.transformation.fpgadataflow.annotate_cycles import AnnotateCycles +import warnings build_dir = "/tmp/" + os.environ["FINN_INST_NAME"] @@ -178,6 +179,10 @@ def test_end2end_zynqbuild_cnv_w1a1_build(): ) model = model.transform(ZynqBuild(test_pynq_board, target_clk_ns)) model = model.transform(AnnotateResources("synth")) + warnings.warn( + "Post-synthesis resources (excluding shell): " + + model.get_metadata_prop("res_total_synth") + ) model.save(build_dir + "/end2end_zynqbuild_cnv_w1a1_build.onnx") diff --git a/tests/end2end/test_zynqbuild_end2end_tfc_w1a1.py b/tests/end2end/test_zynqbuild_end2end_tfc_w1a1.py index ff2af70731d9248dd2593db5be9e465fa86157dd..8b298d5644d6d6cda038e8ca1757be7538ba9804 100644 --- a/tests/end2end/test_zynqbuild_end2end_tfc_w1a1.py +++ b/tests/end2end/test_zynqbuild_end2end_tfc_w1a1.py @@ -64,6 +64,7 @@ from finn.util.test import get_test_model_trained, load_test_checkpoint_or_skip from finn.transformation.fpgadataflow.annotate_resources import AnnotateResources from finn.transformation.infer_data_layouts import InferDataLayouts from finn.transformation.fpgadataflow.make_zynq_proj import ZynqBuild +import warnings build_dir = "/tmp/" + os.environ["FINN_INST_NAME"] test_pynq_board = os.getenv("PYNQ_BOARD", default="Pynq-Z1") @@ -160,6 +161,10 @@ def test_end2end_zynqbuild_tfc_w1a1_build(): ) model = model.transform(ZynqBuild(test_pynq_board, target_clk_ns)) model = model.transform(AnnotateResources("synth")) + warnings.warn( + "Post-synthesis resources (excluding shell): " + + model.get_metadata_prop("res_total_synth") + ) model.save(build_dir + "/end2end_zynqbuild_tfc_w1a1_build.onnx") diff --git a/tests/end2end/test_zynqbuild_end2end_tfc_w2a2.py b/tests/end2end/test_zynqbuild_end2end_tfc_w2a2.py index 7b28090854adbbcb6f400f73c2b6f6557f540e5e..bdb24d82dd639abe52aac9688b0b98430f72cabd 100644 --- a/tests/end2end/test_zynqbuild_end2end_tfc_w2a2.py +++ b/tests/end2end/test_zynqbuild_end2end_tfc_w2a2.py @@ -58,6 +58,7 @@ from finn.util.basic import pynq_part_map from finn.util.test import get_test_model_trained, load_test_checkpoint_or_skip from finn.transformation.fpgadataflow.annotate_resources import AnnotateResources from finn.transformation.fpgadataflow.make_zynq_proj import ZynqBuild +import warnings build_dir = "/tmp/" + os.environ["FINN_INST_NAME"] test_pynq_board = os.getenv("PYNQ_BOARD", default="Pynq-Z1") @@ -149,6 +150,10 @@ def test_end2end_zynqbuild_tfc_w2a2_build(): ) model = model.transform(ZynqBuild(test_pynq_board, target_clk_ns)) model = model.transform(AnnotateResources("synth")) + warnings.warn( + "Post-synthesis resources (excluding shell): " + + model.get_metadata_prop("res_total_synth") + ) model.save(build_dir + "/end2end_zynqbuild_tfc_w2a2_build.onnx") diff --git a/tests/fpgadataflow/test_convert_to_hls_layers_fc.py b/tests/fpgadataflow/test_convert_to_hls_layers_fc.py index d77065ad9396d0cc8dd57a39ed823fffcb30ee47..bd600c6c57d00d5fc03152f75b9f2f8c6beeeb2c 100644 --- a/tests/fpgadataflow/test_convert_to_hls_layers_fc.py +++ b/tests/fpgadataflow/test_convert_to_hls_layers_fc.py @@ -89,7 +89,6 @@ def test_convert_to_hls_layers_tfc_w1a1(): assert fc3.op_type == "StreamingFCLayer_Batch" assert model.get_tensor_shape(fc3.input[0]) == [1, 64] assert model.get_tensor_shape(fc3.input[1]) == [64, 10] - os.remove(export_onnx_path) fc0w = getCustomOp(fc0) fc0w.set_nodeattr("SIMD", 784) @@ -123,6 +122,7 @@ def test_convert_to_hls_layers_tfc_w1a1(): # do forward pass in PyTorch/Brevitas expected = tfc.forward(input_tensor).detach().numpy() assert np.isclose(produced, expected, atol=1e-3).all() + os.remove(export_onnx_path) @pytest.mark.vivado