diff --git a/docs/finn/internals.rst b/docs/finn/internals.rst index 010cdece978cde078c3df4c64177fa1c5455aa0a..dee62f09a9253380e05300dac8fa34915c20dab5 100644 --- a/docs/finn/internals.rst +++ b/docs/finn/internals.rst @@ -18,6 +18,7 @@ ONNX does not support datatypes smaller than 8-bit integers, whereas in FINN we Note that FINN uses floating point tensors as a carrier data type to represent integers. Floating point arithmetic can introduce rounding errors, e.g. (int_num * float_scale) / float_scale is not always equal to int_num. When using the custom ONNX execution flow, FINN will attempt to sanitize any rounding errors for integer tensors. See (:py:mod:`finn.util.basic.sanitize_quant_values`) for more information. +This behavior can be disabled (not recommended!) by setting the environment variable SANITIZE_QUANT_TENSORS=0. Custom Operations/Nodes ======================= diff --git a/src/finn/core/onnx_exec.py b/src/finn/core/onnx_exec.py index 218df22e07537034b377abc077aa7902bc0c4cfc..efdfaa19d9f9e5dfa41911a2184e989337b3d9c2 100644 --- a/src/finn/core/onnx_exec.py +++ b/src/finn/core/onnx_exec.py @@ -39,7 +39,7 @@ from finn.core.remote_exec import remote_exec from finn.core.rtlsim_exec import rtlsim_exec from finn.custom_op.registry import getCustomOp import finn.analysis.topology as ta -from finn.util.basic import sanitize_quant_values +from finn.util.basic import sanitize_quant_values, get_sanitize_quant_tensors def execute_node(node, context, graph): @@ -160,14 +160,17 @@ def execute_onnx(model, input_dict, return_full_exec_context=False): # we can simply walk down the list since the ONNX spec guarantees that it is # topologically sorted for node in graph.node: - # call util function match input values to quantization annotation - execution_context = sanitize_quant_values( - model, node.input, execution_context - ) + if get_sanitize_quant_tensors() != 0: + # round input values to match quantization annotation + execution_context = sanitize_quant_values( + model, node.input, execution_context + ) execute_node(node, execution_context, graph) - execution_context = sanitize_quant_values( - model, node.output, execution_context - ) + if get_sanitize_quant_tensors() != 0: + # round output values to quantization annotation + execution_context = sanitize_quant_values( + model, node.output, execution_context + ) elif model_exec_mode == "remote_pynq": # use remote exec metadata built into model to execute on a remote PYNQ remote_exec(model, execution_context) diff --git a/src/finn/util/basic.py b/src/finn/util/basic.py index 809b34157ee4b7890a4155bd09f33dcc85c6ceec..4a8277e08d3fc21e0b20668edf2ecad947b36647 100644 --- a/src/finn/util/basic.py +++ b/src/finn/util/basic.py @@ -114,6 +114,17 @@ def get_execution_error_thresh(): return 1e-2 +def get_sanitize_quant_tensors(): + """Return whether tensors with quantization annotations should be sanitized. + Enabled by default, disabling will yield faster ONNX execution but may give + incorrect results. Use with caution.""" + try: + return int(os.environ["SANITIZE_QUANT_TENSORS"]) + except KeyError: + # enabled by default + return 1 + + def make_build_dir(prefix=""): """Creates a temporary folder with given prefix to be used as a build dir. Use this function instead of tempfile.mkdtemp to ensure any generated files