[Core] allow disabling qnt sanitization w SANITIZE_QUANT_TENSORS=0

79bff3ba · Yaman Umuroglu · b628d1a8 · 79bff3ba · 79bff3ba · 79bff3ba
Commit 79bff3ba authored 4 years ago by Yaman Umuroglu
--- a/docs/finn/internals.rst
+++ b/docs/finn/internals.rst
@@ -18,6 +18,7 @@ ONNX does not support datatypes smaller than 8-bit integers, whereas in FINN we

 Note that FINN uses floating point tensors as a carrier data type to represent integers. Floating point arithmetic can introduce rounding errors, e.g. (int_num * float_scale) / float_scale is not always equal to int_num.
 When using the custom ONNX execution flow, FINN will attempt to sanitize any rounding errors for integer tensors. See (:py:mod:`finn.util.basic.sanitize_quant_values`) for more information.
+This behavior can be disabled (not recommended!) by setting the environment variable SANITIZE_QUANT_TENSORS=0.

 Custom Operations/Nodes
 =======================

--- a/src/finn/core/onnx_exec.py
+++ b/src/finn/core/onnx_exec.py
@@ -39,7 +39,7 @@ from finn.core.remote_exec import remote_exec
 from finn.core.rtlsim_exec import rtlsim_exec
 from finn.custom_op.registry import getCustomOp
 import finn.analysis.topology as ta
-from finn.util.basic import sanitize_quant_values
+from finn.util.basic import sanitize_quant_values, get_sanitize_quant_tensors


 def execute_node(node, context, graph):
@@ -160,14 +160,17 @@ def execute_onnx(model, input_dict, return_full_exec_context=False):
        # we can simply walk down the list since the ONNX spec guarantees that it is
        # topologically sorted
        for node in graph.node:
-            # call util function match input values to quantization annotation
-            execution_context = sanitize_quant_values(
-                model, node.input, execution_context
-            )
+            if get_sanitize_quant_tensors() != 0:
+                # round input values to match quantization annotation
+                execution_context = sanitize_quant_values(
+                    model, node.input, execution_context
+                )
            execute_node(node, execution_context, graph)
-            execution_context = sanitize_quant_values(
-                model, node.output, execution_context
-            )
+            if get_sanitize_quant_tensors() != 0:
+                # round output values to quantization annotation
+                execution_context = sanitize_quant_values(
+                    model, node.output, execution_context
+                )
    elif model_exec_mode == "remote_pynq":
        # use remote exec metadata built into model to execute on a remote PYNQ
        remote_exec(model, execution_context)

--- a/src/finn/util/basic.py
+++ b/src/finn/util/basic.py
@@ -114,6 +114,17 @@ def get_execution_error_thresh():
        return 1e-2


+def get_sanitize_quant_tensors():
+    """Return whether tensors with quantization annotations should be sanitized.
+    Enabled by default, disabling will yield faster ONNX execution but may give
+    incorrect results. Use with caution."""
+    try:
+        return int(os.environ["SANITIZE_QUANT_TENSORS"])
+    except KeyError:
+        # enabled by default
+        return 1
+
+
 def make_build_dir(prefix=""):
    """Creates a temporary folder with given prefix to be used as a build dir.
    Use this function instead of tempfile.mkdtemp to ensure any generated files