diff --git a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py index 73bc573c5e01d332bb731d00d3b051a4fc583dbd..bc266e4934c41d6f5f1261e0a30e90cb72ba83a8 100644 --- a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py +++ b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py @@ -39,6 +39,7 @@ from finn.custom_op.fpgadataflow import HLSCustomOp from finn.util.basic import ( interleave_matrix_outer_dim_from_partitions, roundup_to_integer_multiple, + calculate_matvec_accumulator_range, ) from finn.util.data_packing import ( npy_to_rtlsim_input, @@ -75,6 +76,8 @@ class StreamingFCLayer_Batch(HLSCustomOp): "inputDataType": ("s", True, ""), "weightDataType": ("s", True, ""), "outputDataType": ("s", True, ""), + # FINN DataType for accumulator -- auto-computed and updated + "accDataType": ("s", False, "DataType.INT32"), # use xnor-popcount for binary weights/inputs, thus treating them # as bipolar "binaryXnorMode": ("i", False, 0), @@ -444,6 +447,47 @@ class StreamingFCLayer_Batch(HLSCustomOp): ret = np.flip(ret, axis=-1) return ret + def minimize_accumulator_width(self, model): + weights = model.get_initializer(self.onnx_node.input[1]) + if len(self.onnx_node.input) > 2: + thresholds = model.get_initializer(self.onnx_node.input[2]) + else: + thresholds = None + idt = self.get_input_datatype() + # calculate minimum and maximum values of accumulator + (acc_min, acc_max) = calculate_matvec_accumulator_range(weights, idt) + if thresholds is not None: + threshold_tensor = self.get_hls_compatible_threshold_tensor(thresholds) + # set threshold datatype (and accumulator datatype implicitly) + min_threshold = thresholds.min() + max_threshold = thresholds.max() + # get range required by threshold values + tdt_min = min(acc_min, min_threshold) + tdt_max = max(acc_max, max_threshold) + if tdt_min < 0: + if abs(tdt_min) > tdt_max: + tdt = DataType.get_smallest_possible(tdt_min) + else: + tdt = DataType.get_smallest_possible(0 - tdt_max) + else: + tdt = DataType.get_smallest_possible(tdt_max) + assert np.vectorize(tdt.allowed)( + threshold_tensor + ).all(), "Thresholds can't be expressed with type %s" % str(tdt) + self.set_nodeattr("accDataType", tdt.name) + else: + if acc_min < 0: + if abs(acc_min) > acc_max: + adt = DataType.get_smallest_possible(acc_min) + else: + adt = DataType.get_smallest_possible(0 - acc_max) + else: + adt = DataType.get_smallest_possible(acc_max) + self.set_nodeattr("accDataType", adt.name) + # for no-activation nodes, output dt = acc dt + self.set_nodeattr("outputDataType", adt.name) + return DataType[self.get_nodeattr("accDataType")] + def get_hls_compatible_threshold_tensor(self, orig_thres_matrix): """Convert the original numpy weight matrix orig_weight_matrix into a form suitable for passing to the hlslib call: @@ -614,44 +658,12 @@ class StreamingFCLayer_Batch(HLSCustomOp): bin_xnor_mode = self.get_nodeattr("binaryXnorMode") == 1 inp_is_bipolar = inp_is_bipolar or (inp_is_binary and bin_xnor_mode) wt_is_bipolar = wt_is_bipolar or (wt_is_binary and bin_xnor_mode) - # set threshold datatype (and accumulator datatype implicitly) - min_threshold = thresholds.min() - max_threshold = thresholds.max() - min_weight = weights.min() - max_weight = weights.max() - perceptive_field_elems = self.get_nodeattr("MW") - min_input = self.get_input_datatype().min() - max_input = self.get_input_datatype().max() - # calculate minimum and maximum values of accumulator - # assume inputs span the whole range of the input datatype - acc_min = perceptive_field_elems * min( - min_weight * max_input, - min_weight * min_input, - max_weight * max_input, - max_weight * min_input, - ) - acc_max = perceptive_field_elems * max( - min_weight * max_input, - min_weight * min_input, - max_weight * max_input, - max_weight * min_input, - ) - - # get range required by threshold values - tdt_min = min(acc_min, min_threshold) - tdt_max = max(acc_max, max_threshold) - if tdt_min < 0: - if abs(tdt_min) > tdt_max: - tdt = DataType.get_smallest_possible(tdt_min) - else: - tdt = DataType.get_smallest_possible(0 - tdt_max) - else: - tdt = DataType.get_smallest_possible(tdt_max) + # get computed threshold datatype from attribute + tdt = DataType[self.get_nodeattr("accDataType")] assert np.vectorize(tdt.allowed)( threshold_tensor ).all(), "Thresholds can't be expressed with type %s" % str(tdt) - thresholds_hls_code = numpy_to_hls_code( threshold_tensor, tdt, "thresholds", False, True ) diff --git a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py index e6dca0e4b05f943c971bc0f97af03f5038fd0dab..88f5fa926f73d5cb1919a02c83153cb8d1894711 100644 --- a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py +++ b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py @@ -40,6 +40,9 @@ from finn.transformation.general import SortGraph import finn.core.data_layout as DataLayout from finn.util.onnx import nchw_to_nhwc from finn.util.basic import get_by_name +from finn.transformation.fpgadataflow.minimize_accumulator_width import ( + MinimizeAccumulatorWidth, +) class InferConvInpGen(Transformation): @@ -489,6 +492,7 @@ class InferBinaryStreamingFCLayer(Transformation): graph.node.remove(n) graph_modified = True if graph_modified: + model = model.transform(MinimizeAccumulatorWidth()) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) return (model, graph_modified) @@ -623,6 +627,7 @@ class InferQuantizedStreamingFCLayer(Transformation): graph.node.remove(n) graph_modified = True if graph_modified: + model = model.transform(MinimizeAccumulatorWidth()) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) return (model, graph_modified) diff --git a/src/finn/transformation/fpgadataflow/minimize_accumulator_width.py b/src/finn/transformation/fpgadataflow/minimize_accumulator_width.py new file mode 100644 index 0000000000000000000000000000000000000000..2c54a5efbd3b28f0fbfd074b512929edab234e78 --- /dev/null +++ b/src/finn/transformation/fpgadataflow/minimize_accumulator_width.py @@ -0,0 +1,48 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from finn.custom_op.registry import getCustomOp +from finn.transformation import Transformation +from finn.util.fpgadataflow import is_fpgadataflow_node + + +class MinimizeAccumulatorWidth(Transformation): + """For relevant nodes, call the accumulator width minimization + functions to save on resources. May alter tensor DataType for + certain nodes if they produce an accumulator as result.""" + + def __init__(self): + super().__init__() + + def apply(self, model): + for node in model.graph.node: + if is_fpgadataflow_node(node) is True: + inst = getCustomOp(node) + if hasattr(inst, "minimize_accumulator_width"): + inst.minimize_accumulator_width(model) + return (model, False)