diff --git a/src/finn/core/datatype.py b/src/finn/core/datatype.py index 222d11a8872f9be757fd60fbfa5f8abea683311a..df895a1ad446d6b2cc3ebb24f1179944f4cfe9ab 100644 --- a/src/finn/core/datatype.py +++ b/src/finn/core/datatype.py @@ -50,17 +50,69 @@ class DataType(Enum): UINT2 = auto() UINT3 = auto() UINT4 = auto() + UINT5 = auto() + UINT6 = auto() + UINT7 = auto() UINT8 = auto() + UINT9 = auto() + UINT10 = auto() + UINT11 = auto() + UINT12 = auto() + UINT13 = auto() + UINT14 = auto() + UINT15 = auto() UINT16 = auto() + UINT17 = auto() + UINT18 = auto() + UINT19 = auto() + UINT20 = auto() + UINT21 = auto() + UINT22 = auto() + UINT23 = auto() + UINT24 = auto() + UINT25 = auto() + UINT26 = auto() + UINT27 = auto() + UINT28 = auto() + UINT29 = auto() + UINT30 = auto() + UINT31 = auto() UINT32 = auto() + UINT64 = auto() BIPOLAR = auto() TERNARY = auto() INT2 = auto() INT3 = auto() INT4 = auto() + INT5 = auto() + INT6 = auto() + INT7 = auto() INT8 = auto() + INT9 = auto() + INT10 = auto() + INT11 = auto() + INT12 = auto() + INT13 = auto() + INT14 = auto() + INT15 = auto() INT16 = auto() + INT17 = auto() + INT18 = auto() + INT19 = auto() + INT20 = auto() + INT21 = auto() + INT22 = auto() + INT23 = auto() + INT24 = auto() + INT25 = auto() + INT26 = auto() + INT27 = auto() + INT28 = auto() + INT29 = auto() + INT30 = auto() + INT31 = auto() INT32 = auto() + INT64 = auto() FLOAT32 = auto() def bitwidth(self): diff --git a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py index 72aa322e0e44a6f4a5c11025d94bdfeb820338a3..bc266e4934c41d6f5f1261e0a30e90cb72ba83a8 100644 --- a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py +++ b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py @@ -39,6 +39,7 @@ from finn.custom_op.fpgadataflow import HLSCustomOp from finn.util.basic import ( interleave_matrix_outer_dim_from_partitions, roundup_to_integer_multiple, + calculate_matvec_accumulator_range, ) from finn.util.data_packing import ( npy_to_rtlsim_input, @@ -75,6 +76,8 @@ class StreamingFCLayer_Batch(HLSCustomOp): "inputDataType": ("s", True, ""), "weightDataType": ("s", True, ""), "outputDataType": ("s", True, ""), + # FINN DataType for accumulator -- auto-computed and updated + "accDataType": ("s", False, "DataType.INT32"), # use xnor-popcount for binary weights/inputs, thus treating them # as bipolar "binaryXnorMode": ("i", False, 0), @@ -444,6 +447,47 @@ class StreamingFCLayer_Batch(HLSCustomOp): ret = np.flip(ret, axis=-1) return ret + def minimize_accumulator_width(self, model): + weights = model.get_initializer(self.onnx_node.input[1]) + if len(self.onnx_node.input) > 2: + thresholds = model.get_initializer(self.onnx_node.input[2]) + else: + thresholds = None + idt = self.get_input_datatype() + # calculate minimum and maximum values of accumulator + (acc_min, acc_max) = calculate_matvec_accumulator_range(weights, idt) + if thresholds is not None: + threshold_tensor = self.get_hls_compatible_threshold_tensor(thresholds) + # set threshold datatype (and accumulator datatype implicitly) + min_threshold = thresholds.min() + max_threshold = thresholds.max() + # get range required by threshold values + tdt_min = min(acc_min, min_threshold) + tdt_max = max(acc_max, max_threshold) + if tdt_min < 0: + if abs(tdt_min) > tdt_max: + tdt = DataType.get_smallest_possible(tdt_min) + else: + tdt = DataType.get_smallest_possible(0 - tdt_max) + else: + tdt = DataType.get_smallest_possible(tdt_max) + assert np.vectorize(tdt.allowed)( + threshold_tensor + ).all(), "Thresholds can't be expressed with type %s" % str(tdt) + self.set_nodeattr("accDataType", tdt.name) + else: + if acc_min < 0: + if abs(acc_min) > acc_max: + adt = DataType.get_smallest_possible(acc_min) + else: + adt = DataType.get_smallest_possible(0 - acc_max) + else: + adt = DataType.get_smallest_possible(acc_max) + self.set_nodeattr("accDataType", adt.name) + # for no-activation nodes, output dt = acc dt + self.set_nodeattr("outputDataType", adt.name) + return DataType[self.get_nodeattr("accDataType")] + def get_hls_compatible_threshold_tensor(self, orig_thres_matrix): """Convert the original numpy weight matrix orig_weight_matrix into a form suitable for passing to the hlslib call: @@ -605,7 +649,6 @@ class StreamingFCLayer_Batch(HLSCustomOp): thresholds = model.get_initializer(self.onnx_node.input[2]) if thresholds is not None: threshold_tensor = self.get_hls_compatible_threshold_tensor(thresholds) - tdt = DataType.INT32 # use UINT32 threshold export for bipolar times bipolar inp_is_bipolar = self.get_input_datatype() == DataType.BIPOLAR wt_is_bipolar = self.get_weight_datatype() == DataType.BIPOLAR @@ -615,11 +658,12 @@ class StreamingFCLayer_Batch(HLSCustomOp): bin_xnor_mode = self.get_nodeattr("binaryXnorMode") == 1 inp_is_bipolar = inp_is_bipolar or (inp_is_binary and bin_xnor_mode) wt_is_bipolar = wt_is_bipolar or (wt_is_binary and bin_xnor_mode) - if inp_is_bipolar and wt_is_bipolar: - tdt = DataType.UINT32 + # get computed threshold datatype from attribute + tdt = DataType[self.get_nodeattr("accDataType")] + assert np.vectorize(tdt.allowed)( threshold_tensor - ).all(), "Thresholds are not int" + ).all(), "Thresholds can't be expressed with type %s" % str(tdt) thresholds_hls_code = numpy_to_hls_code( threshold_tensor, tdt, "thresholds", False, True ) diff --git a/src/finn/custom_op/fpgadataflow/thresholding_batch.py b/src/finn/custom_op/fpgadataflow/thresholding_batch.py index 379ebd92d86d54c6bc621c7f89b01eacba2b5d3f..562bab0f18990096f7364b3a4e2bcbbbf4ce2b58 100644 --- a/src/finn/custom_op/fpgadataflow/thresholding_batch.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_batch.py @@ -283,10 +283,25 @@ class Thresholding_Batch(HLSCustomOp): thresholds = model.get_initializer(self.onnx_node.input[1]) threshold_tensor = self.get_hls_compatible_threshold_tensor(thresholds) - tdt = DataType.INT32 + + min_threshold = thresholds.min() + max_threshold = thresholds.max() + min_input = self.get_input_datatype().min() + max_input = self.get_input_datatype().max() + # get range required by threshold values + tdt_min = min(min_input, min_threshold) + tdt_max = max(max_input, max_threshold) + if tdt_min < 0: + if abs(tdt_min) > tdt_max: + tdt = DataType.get_smallest_possible(tdt_min) + else: + tdt = DataType.get_smallest_possible(0 - tdt_max - 1) + else: + tdt = DataType.get_smallest_possible(tdt_max) assert np.vectorize(tdt.allowed)( threshold_tensor - ).all(), "Thresholds are not int" + ).all(), "Thresholds can't be expressed with type %s" % str(tdt) + thresholds_hls_code = numpy_to_hls_code( threshold_tensor, tdt, "thresholds", False, True ) diff --git a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py index e6dca0e4b05f943c971bc0f97af03f5038fd0dab..88f5fa926f73d5cb1919a02c83153cb8d1894711 100644 --- a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py +++ b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py @@ -40,6 +40,9 @@ from finn.transformation.general import SortGraph import finn.core.data_layout as DataLayout from finn.util.onnx import nchw_to_nhwc from finn.util.basic import get_by_name +from finn.transformation.fpgadataflow.minimize_accumulator_width import ( + MinimizeAccumulatorWidth, +) class InferConvInpGen(Transformation): @@ -489,6 +492,7 @@ class InferBinaryStreamingFCLayer(Transformation): graph.node.remove(n) graph_modified = True if graph_modified: + model = model.transform(MinimizeAccumulatorWidth()) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) return (model, graph_modified) @@ -623,6 +627,7 @@ class InferQuantizedStreamingFCLayer(Transformation): graph.node.remove(n) graph_modified = True if graph_modified: + model = model.transform(MinimizeAccumulatorWidth()) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) return (model, graph_modified) diff --git a/src/finn/transformation/fpgadataflow/minimize_accumulator_width.py b/src/finn/transformation/fpgadataflow/minimize_accumulator_width.py new file mode 100644 index 0000000000000000000000000000000000000000..2c54a5efbd3b28f0fbfd074b512929edab234e78 --- /dev/null +++ b/src/finn/transformation/fpgadataflow/minimize_accumulator_width.py @@ -0,0 +1,48 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from finn.custom_op.registry import getCustomOp +from finn.transformation import Transformation +from finn.util.fpgadataflow import is_fpgadataflow_node + + +class MinimizeAccumulatorWidth(Transformation): + """For relevant nodes, call the accumulator width minimization + functions to save on resources. May alter tensor DataType for + certain nodes if they produce an accumulator as result.""" + + def __init__(self): + super().__init__() + + def apply(self, model): + for node in model.graph.node: + if is_fpgadataflow_node(node) is True: + inst = getCustomOp(node) + if hasattr(inst, "minimize_accumulator_width"): + inst.minimize_accumulator_width(model) + return (model, False) diff --git a/src/finn/transformation/streamline/round_thresholds.py b/src/finn/transformation/streamline/round_thresholds.py index c33281d85449c173a4631297fd1d67ac0aed8c81..8626ef40619b067c6672c9017ddcb747998c3f2c 100644 --- a/src/finn/transformation/streamline/round_thresholds.py +++ b/src/finn/transformation/streamline/round_thresholds.py @@ -51,10 +51,20 @@ class RoundAndClipThresholds(Transformation): model.set_tensor_datatype(n.input[1], idtype) graph_modified = True if idtype.is_integer() and not idtype.signed() and (Tnew < 0).any(): - # clip any negative thresholds + # clip any negative thresholds if input is unsigned Tnew = np.clip(Tnew, 0, None) model.set_initializer(n.input[1], Tnew) # use same datatype as inputs for thresholds model.set_tensor_datatype(n.input[1], idtype) graph_modified = True + if idtype.is_integer() and ( + (Tnew < (idtype.min() - 1)).any() + or (Tnew > (idtype.max() + 1)).any() + ): + # clip any large thresholds to input range + 1 + Tnew = np.clip(Tnew, idtype.min() - 1, idtype.max() + 1) + model.set_initializer(n.input[1], Tnew) + # use same datatype as inputs for thresholds + model.set_tensor_datatype(n.input[1], idtype) + graph_modified = True return (model, graph_modified) diff --git a/src/finn/util/basic.py b/src/finn/util/basic.py index 6c92e9b2765b1c2be6f95ee148964bccfb3cd7be..62d5947e3b7e06375cc9d48a2cf32b4f685e7861 100644 --- a/src/finn/util/basic.py +++ b/src/finn/util/basic.py @@ -259,6 +259,33 @@ def pad_tensor_to_multiple_of(ndarray, pad_to_dims, val=0, distr_pad=False): return ret +def calculate_matvec_accumulator_range(matrix, vec_dt): + """Calculate the minimum and maximum possible result (accumulator) values + for a dot product x * A, given matrix A of dims (MW, MH), and vector (1, MW) + with datatype vec_dt. Returns (acc_min, acc_max). + """ + min_weight = matrix.min() + max_weight = matrix.max() + perceptive_field_elems = matrix.shape[0] + min_input = vec_dt.min() + max_input = vec_dt.max() + # calculate minimum and maximum values of accumulator + # assume inputs span the whole range of the input datatype + acc_min = perceptive_field_elems * min( + min_weight * max_input, + min_weight * min_input, + max_weight * max_input, + max_weight * min_input, + ) + acc_max = perceptive_field_elems * max( + min_weight * max_input, + min_weight * min_input, + max_weight * max_input, + max_weight * min_input, + ) + return (acc_min, acc_max) + + def gen_finn_dt_tensor(finn_dt, tensor_shape): """Generates random tensor in given shape and with given FINN DataType.""" if type(tensor_shape) == list: diff --git a/tests/fpgadataflow/test_convert_to_hls_layers_fc.py b/tests/fpgadataflow/test_convert_to_hls_layers_fc.py index d77065ad9396d0cc8dd57a39ed823fffcb30ee47..30d5ae64cfec84e089426d389a8cb607cd71c12f 100644 --- a/tests/fpgadataflow/test_convert_to_hls_layers_fc.py +++ b/tests/fpgadataflow/test_convert_to_hls_layers_fc.py @@ -123,6 +123,7 @@ def test_convert_to_hls_layers_tfc_w1a1(): # do forward pass in PyTorch/Brevitas expected = tfc.forward(input_tensor).detach().numpy() assert np.isclose(produced, expected, atol=1e-3).all() + os.remove(export_onnx_path) @pytest.mark.vivado