[Transform] introduce and use MinimizeAccumulatorWidth transform

a53f9374 · Yaman Umuroglu · 75b414b0 · a53f9374 · a53f9374 · a53f9374
Commit a53f9374 authored 4 years ago by Yaman Umuroglu
--- a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
+++ b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
@@ -39,6 +39,7 @@ from finn.custom_op.fpgadataflow import HLSCustomOp
 from finn.util.basic import (
    interleave_matrix_outer_dim_from_partitions,
    roundup_to_integer_multiple,
+    calculate_matvec_accumulator_range,
 )
 from finn.util.data_packing import (
    npy_to_rtlsim_input,
@@ -75,6 +76,8 @@ class StreamingFCLayer_Batch(HLSCustomOp):
            "inputDataType": ("s", True, ""),
            "weightDataType": ("s", True, ""),
            "outputDataType": ("s", True, ""),
+            # FINN DataType for accumulator -- auto-computed and updated
+            "accDataType": ("s", False, "DataType.INT32"),
            # use xnor-popcount for binary weights/inputs, thus treating them
            # as bipolar
            "binaryXnorMode": ("i", False, 0),
@@ -444,6 +447,47 @@ class StreamingFCLayer_Batch(HLSCustomOp):
        ret = np.flip(ret, axis=-1)
        return ret

+    def minimize_accumulator_width(self, model):
+        weights = model.get_initializer(self.onnx_node.input[1])
+        if len(self.onnx_node.input) > 2:
+            thresholds = model.get_initializer(self.onnx_node.input[2])
+        else:
+            thresholds = None
+        idt = self.get_input_datatype()
+        # calculate minimum and maximum values of accumulator
+        (acc_min, acc_max) = calculate_matvec_accumulator_range(weights, idt)
+        if thresholds is not None:
+            threshold_tensor = self.get_hls_compatible_threshold_tensor(thresholds)
+            # set threshold datatype (and accumulator datatype implicitly)
+            min_threshold = thresholds.min()
+            max_threshold = thresholds.max()
+            # get range required by threshold values
+            tdt_min = min(acc_min, min_threshold)
+            tdt_max = max(acc_max, max_threshold)
+            if tdt_min < 0:
+                if abs(tdt_min) > tdt_max:
+                    tdt = DataType.get_smallest_possible(tdt_min)
+                else:
+                    tdt = DataType.get_smallest_possible(0 - tdt_max)
+            else:
+                tdt = DataType.get_smallest_possible(tdt_max)
+            assert np.vectorize(tdt.allowed)(
+                threshold_tensor
+            ).all(), "Thresholds can't be expressed with type %s" % str(tdt)
+            self.set_nodeattr("accDataType", tdt.name)
+        else:
+            if acc_min < 0:
+                if abs(acc_min) > acc_max:
+                    adt = DataType.get_smallest_possible(acc_min)
+                else:
+                    adt = DataType.get_smallest_possible(0 - acc_max)
+            else:
+                adt = DataType.get_smallest_possible(acc_max)
+            self.set_nodeattr("accDataType", adt.name)
+            # for no-activation nodes, output dt = acc dt
+            self.set_nodeattr("outputDataType", adt.name)
+        return DataType[self.get_nodeattr("accDataType")]
+
    def get_hls_compatible_threshold_tensor(self, orig_thres_matrix):
        """Convert the original numpy weight matrix orig_weight_matrix into
        a form suitable for passing to the hlslib call:
@@ -614,44 +658,12 @@ class StreamingFCLayer_Batch(HLSCustomOp):
                bin_xnor_mode = self.get_nodeattr("binaryXnorMode") == 1
                inp_is_bipolar = inp_is_bipolar or (inp_is_binary and bin_xnor_mode)
                wt_is_bipolar = wt_is_bipolar or (wt_is_binary and bin_xnor_mode)
-                # set threshold datatype (and accumulator datatype implicitly)
-                min_threshold = thresholds.min()
-                max_threshold = thresholds.max()
-                min_weight = weights.min()
-                max_weight = weights.max()
-                perceptive_field_elems = self.get_nodeattr("MW")
-                min_input = self.get_input_datatype().min()
-                max_input = self.get_input_datatype().max()
-                # calculate minimum and maximum values of accumulator
-                # assume inputs span the whole range of the input datatype
-                acc_min = perceptive_field_elems * min(
-                    min_weight * max_input,
-                    min_weight * min_input,
-                    max_weight * max_input,
-                    max_weight * min_input,
-                )
-                acc_max = perceptive_field_elems * max(
-                    min_weight * max_input,
-                    min_weight * min_input,
-                    max_weight * max_input,
-                    max_weight * min_input,
-                )
-
-                # get range required by threshold values
-                tdt_min = min(acc_min, min_threshold)
-                tdt_max = max(acc_max, max_threshold)
-                if tdt_min < 0:
-                    if abs(tdt_min) > tdt_max:
-                        tdt = DataType.get_smallest_possible(tdt_min)
-                    else:
-                        tdt = DataType.get_smallest_possible(0 - tdt_max)
-                else:
-                    tdt = DataType.get_smallest_possible(tdt_max)
+                # get computed threshold datatype from attribute
+                tdt = DataType[self.get_nodeattr("accDataType")]

                assert np.vectorize(tdt.allowed)(
                    threshold_tensor
                ).all(), "Thresholds can't be expressed with type %s" % str(tdt)
-
                thresholds_hls_code = numpy_to_hls_code(
                    threshold_tensor, tdt, "thresholds", False, True
                )

--- a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
+++ b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
@@ -40,6 +40,9 @@ from finn.transformation.general import SortGraph
 import finn.core.data_layout as DataLayout
 from finn.util.onnx import nchw_to_nhwc
 from finn.util.basic import get_by_name
+from finn.transformation.fpgadataflow.minimize_accumulator_width import (
+    MinimizeAccumulatorWidth,
+)


 class InferConvInpGen(Transformation):
@@ -489,6 +492,7 @@ class InferBinaryStreamingFCLayer(Transformation):
                    graph.node.remove(n)
                    graph_modified = True
        if graph_modified:
+            model = model.transform(MinimizeAccumulatorWidth())
            model = model.transform(InferShapes())
            model = model.transform(InferDataTypes())
        return (model, graph_modified)
@@ -623,6 +627,7 @@ class InferQuantizedStreamingFCLayer(Transformation):
                        graph.node.remove(n)
                        graph_modified = True
        if graph_modified:
+            model = model.transform(MinimizeAccumulatorWidth())
            model = model.transform(InferShapes())
            model = model.transform(InferDataTypes())
        return (model, graph_modified)

--- a/src/finn/transformation/fpgadataflow/minimize_accumulator_width.py
+++ b/src/finn/transformation/fpgadataflow/minimize_accumulator_width.py
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from finn.custom_op.registry import getCustomOp
+from finn.transformation import Transformation
+from finn.util.fpgadataflow import is_fpgadataflow_node
+
+
+class MinimizeAccumulatorWidth(Transformation):
+    """For relevant nodes, call the accumulator width minimization
+    functions to save on resources. May alter tensor DataType for
+    certain nodes if they produce an accumulator as result."""
+
+    def __init__(self):
+        super().__init__()
+
+    def apply(self, model):
+        for node in model.graph.node:
+            if is_fpgadataflow_node(node) is True:
+                inst = getCustomOp(node)
+                if hasattr(inst, "minimize_accumulator_width"):
+                    inst.minimize_accumulator_width(model)
+        return (model, False)