diff --git a/docs/finn/internals.rst b/docs/finn/internals.rst
index 010cdece978cde078c3df4c64177fa1c5455aa0a..dee62f09a9253380e05300dac8fa34915c20dab5 100644
--- a/docs/finn/internals.rst
+++ b/docs/finn/internals.rst
@@ -18,6 +18,7 @@ ONNX does not support datatypes smaller than 8-bit integers, whereas in FINN we
 
 Note that FINN uses floating point tensors as a carrier data type to represent integers. Floating point arithmetic can introduce rounding errors, e.g. (int_num * float_scale) / float_scale is not always equal to int_num.
 When using the custom ONNX execution flow, FINN will attempt to sanitize any rounding errors for integer tensors. See (:py:mod:`finn.util.basic.sanitize_quant_values`) for more information.
+This behavior can be disabled (not recommended!) by setting the environment variable SANITIZE_QUANT_TENSORS=0.
 
 Custom Operations/Nodes
 =======================
diff --git a/src/finn/core/onnx_exec.py b/src/finn/core/onnx_exec.py
index 218df22e07537034b377abc077aa7902bc0c4cfc..efdfaa19d9f9e5dfa41911a2184e989337b3d9c2 100644
--- a/src/finn/core/onnx_exec.py
+++ b/src/finn/core/onnx_exec.py
@@ -39,7 +39,7 @@ from finn.core.remote_exec import remote_exec
 from finn.core.rtlsim_exec import rtlsim_exec
 from finn.custom_op.registry import getCustomOp
 import finn.analysis.topology as ta
-from finn.util.basic import sanitize_quant_values
+from finn.util.basic import sanitize_quant_values, get_sanitize_quant_tensors
 
 
 def execute_node(node, context, graph):
@@ -160,14 +160,17 @@ def execute_onnx(model, input_dict, return_full_exec_context=False):
         # we can simply walk down the list since the ONNX spec guarantees that it is
         # topologically sorted
         for node in graph.node:
-            # call util function match input values to quantization annotation
-            execution_context = sanitize_quant_values(
-                model, node.input, execution_context
-            )
+            if get_sanitize_quant_tensors() != 0:
+                # round input values to match quantization annotation
+                execution_context = sanitize_quant_values(
+                    model, node.input, execution_context
+                )
             execute_node(node, execution_context, graph)
-            execution_context = sanitize_quant_values(
-                model, node.output, execution_context
-            )
+            if get_sanitize_quant_tensors() != 0:
+                # round output values to quantization annotation
+                execution_context = sanitize_quant_values(
+                    model, node.output, execution_context
+                )
     elif model_exec_mode == "remote_pynq":
         # use remote exec metadata built into model to execute on a remote PYNQ
         remote_exec(model, execution_context)
diff --git a/src/finn/util/basic.py b/src/finn/util/basic.py
index 809b34157ee4b7890a4155bd09f33dcc85c6ceec..4a8277e08d3fc21e0b20668edf2ecad947b36647 100644
--- a/src/finn/util/basic.py
+++ b/src/finn/util/basic.py
@@ -114,6 +114,17 @@ def get_execution_error_thresh():
         return 1e-2
 
 
+def get_sanitize_quant_tensors():
+    """Return whether tensors with quantization annotations should be sanitized.
+    Enabled by default, disabling will yield faster ONNX execution but may give
+    incorrect results. Use with caution."""
+    try:
+        return int(os.environ["SANITIZE_QUANT_TENSORS"])
+    except KeyError:
+        # enabled by default
+        return 1
+
+
 def make_build_dir(prefix=""):
     """Creates a temporary folder with given prefix to be used as a build dir.
     Use this function instead of tempfile.mkdtemp to ensure any generated files