diff --git a/AUTHORS.rst b/AUTHORS.rst
index 533ed62e1dbda2799f74805f2100769f9c4fecfc..1d42d35a3b269176fcab79d8239b84ac8442fa43 100644
--- a/AUTHORS.rst
+++ b/AUTHORS.rst
@@ -13,3 +13,12 @@ Contributors
 * Suranga Mahesh (@surangamh)
 * Peter Lehnhardt (@pete-lennart)
 * Neil Kim Nielsen (@neilkimn)
+* Jon Ander Lezeta (@jalezeta)
+* John Terry (@jterry-x)
+* Alina Vasilciuc (@alinavalinav)
+* Alessandro Pappalardo (@volcacius)
+* Giuseppe Franco (@Giuseppe5)
+* Syed Asad Alam (@asadalam)
+* Javier Duarte (@jmduarte)
+* Uma Maheshwari (@umav1511)
+* JosÃ© Rosa (@pinxau1000)
diff --git a/README.md b/README.md
index 10ac25cb8f9e23520830efa4f2f7a58a21370e29..f36eac3a911315c260f1849a0406a9a467f0d53f 100644
--- a/README.md
+++ b/README.md
@@ -24,9 +24,9 @@ Please see the [Getting Started](https://finn.readthedocs.io/en/latest/getting_s
 
 ## What's New in FINN?
 
+* **2021-11-05:** v0.7 is released, introducing QONNX support, three new example networks and many other improvements. Read more on the [v0.7 release blog post](https://xilinx.github.io/finn//2021/11/05/finn-v07-is-released.html).
 * **2021-06-15:** v0.6 is released, with ResNet-50 on U250 and ZCU104 MobileNet-v1 in finn-examples showcasing new features plus a lot more. Read more on the [v0.6 release blog post](https://xilinx.github.io/finn//2021/06/15/finn-v06-is-released.html).
 * **2020-12-17:** v0.5b (beta) is released, with a new [examples repo](https://github.com/Xilinx/finn-examples) including MobileNet-v1. Read more on the <a href="https://xilinx.github.io/finn/2020/12/17/finn-v05b-beta-is-released.html">release blog post</a>.
-* **2020-09-21:** v0.4b (beta) is released. Read more on the <a href="https://xilinx.github.io/finn/2020/09/21/finn-v04b-beta-is-released.html">release blog post</a>.
 
 ## Documentation
 
diff --git a/docker/Dockerfile.finn b/docker/Dockerfile.finn
index 1572ba2872a46a8eaa9331a601a77aada0b0aa1c..4d03e2fbb5c4cce7dbda6a757aea8dce3e15e569 100644
--- a/docker/Dockerfile.finn
+++ b/docker/Dockerfile.finn
@@ -86,18 +86,24 @@ RUN pip install -e git+https://github.com/fbcotter/dataset_loading.git@0.0.4#egg
 
 # git-based Python repo dependencies
 # these are installed in editable mode for easier co-development
-ARG FINN_BASE_COMMIT="7c2603a95e90e4de2575020e575c24eab6a15889"
-ARG FINN_EXP_COMMIT="f82c0d9868bb88ea045dfadb28508d327d287221"
-ARG BREVITAS_COMMIT="462f86cdc60f9915baf13afd1676fb21da44c2ee"
+ARG FINN_BASE_COMMIT="e8facdd719b55839cca46da2cc4f4a4a372afb41"
+ARG QONNX_COMMIT="9f9eff95227cc57aadc6eafcbd44b7acda89f067"
+ARG FINN_EXP_COMMIT="af6102769226b82b639f243dc36f065340991513"
+ARG BREVITAS_COMMIT="a5b71d6de1389d3e7db898fef72e014842670f03"
 ARG PYVERILATOR_COMMIT="0c3eb9343500fc1352a02c020a736c8c2db47e8e"
 ARG CNPY_COMMIT="4e8810b1a8637695171ed346ce68f6984e585ef4"
-ARG HLSLIB_COMMIT="fbb07135b3d991602e8abe3f2c51212c11fd392b"
+ARG HLSLIB_COMMIT="966d17d3fddd801927b2167627d23a9a15ed1461"
 ARG OMX_COMMIT="1dfc4aa2f2895632742cd5751520c6b472feb74e"
 ARG AVNET_BDF_COMMIT="2d49cfc25766f07792c0b314489f21fe916b639b"
+
 # finn-base
 RUN git clone https://github.com/Xilinx/finn-base.git /workspace/finn-base
 RUN git -C /workspace/finn-base checkout $FINN_BASE_COMMIT
 RUN pip install -e /workspace/finn-base
+# Install qonnx without dependencies, currently its only dependency is finn-base
+RUN git clone https://github.com/fastmachinelearning/qonnx.git /workspace/qonnx
+RUN git -C /workspace/qonnx checkout $QONNX_COMMIT
+RUN pip install --no-dependencies -e /workspace/qonnx
 # finn-experimental
 RUN git clone https://github.com/Xilinx/finn-experimental.git /workspace/finn-experimental
 RUN git -C /workspace/finn-experimental checkout $FINN_EXP_COMMIT
diff --git a/docs/finn/brevitas_export.rst b/docs/finn/brevitas_export.rst
index 65f6ab6b3053d9f11239b3c048143b3d2f346808..408b14fd2b6c99ce3ec128a0361a25b3f2c193a5 100644
--- a/docs/finn/brevitas_export.rst
+++ b/docs/finn/brevitas_export.rst
@@ -8,7 +8,13 @@ Brevitas Export
    :scale: 70%
    :align: center
 
-FINN expects an ONNX model as input. This can be a model trained with `Brevitas <https://github.com/Xilinx/brevitas>`_. Brevitas is a PyTorch library for quantization-aware training and the FINN Docker image comes with several `example Brevitas networks <https://github.com/Xilinx/brevitas/tree/master/brevitas_examples/bnn_pynq>`_. Brevitas provides an export of a quantized network in ONNX representation. The resulting model consists only of `ONNX standard nodes <https://github.com/onnx/onnx/blob/master/docs/Operators.md>`_, but also contains additional attributes for the ONNX nodes to represent low precision datatypes. To work with the model it is wrapped into :ref:`modelwrapper` provided by FINN.
+FINN expects an ONNX model as input. This can be a model trained with `Brevitas <https://github.com/Xilinx/brevitas>`_. Brevitas is a PyTorch library for quantization-aware training and the FINN Docker image comes with several `example Brevitas networks <https://github.com/Xilinx/brevitas/tree/master/brevitas_examples/bnn_pynq>`_. Brevitas provides an export of a quantized network in ONNX representation in several flavors.
+Two of the Brevitas-exported ONNX variants can be ingested by FINN:
+
+   * FINN-ONNX: Quantized weights exported as tensors with additional attributes to mark low-precision datatypes. Quantized activations exported as MultiThreshold nodes.
+   * QONNX: All quantization is represented using Quant, BinaryQuant or Trunc nodes. QONNX must be converted into FINN-ONNX by :py:mod:`finn.transformation.qonnx.convert_qonnx_to_finn`
+
+To work with either type of ONNX model, it is loaded into a :ref:`modelwrapper` provided by FINN.
 
 At this stage we can already use the functional verification flow to simulate the model using Python, this is marked in the graphic with the dotted arrow. For more details please have look at :ref:`verification`.
 
diff --git a/docs/finn/developers.rst b/docs/finn/developers.rst
index 6e7fa0d920a943e468fd70464b050ab74cf8ec7d..508cd86a31b6284e072499987ae45864d3942e16 100644
--- a/docs/finn/developers.rst
+++ b/docs/finn/developers.rst
@@ -7,7 +7,7 @@ Developer documentation
 This page is intended to serve as a starting point for new FINN developers.
 Power users may also find this information useful.
 
-Getting started
+Prerequisites
 ================
 
 Before starting to do development on FINN it's a good idea to start
diff --git a/docs/finn/faq.rst b/docs/finn/faq.rst
index 87e36e0722e4db6b2efd5de5df343b7bdf68a719..e426bdb4e28dd02c83b47d59b59c318840815f78 100644
--- a/docs/finn/faq.rst
+++ b/docs/finn/faq.rst
@@ -4,68 +4,109 @@
 Frequently Asked Questions
 ***********************
 
-.. note:: **This page is under construction.**
+Can't find the answer to your question here? Check `FINN GitHub Discussions <https://github.com/Xilinx/finn/discussions>`_.
 
-Can I install FINN out of the Docker container?
-===============================================
 
-We do not support out of the Docker implementations at the moment. This is due
-to the high complexity of the FINN project dependencies.
+Can I install FINN out of the Docker container?
+    We do not support out of the Docker implementations at the moment. This is due
+    to the high complexity of the FINN project dependencies.
 
 Since FINN uses ONNX, can I compile any model from the ONNX Model Zoo to an FPGA accelerator?
-=============================================================================================
+    The short answer is no. FINN uses ONNX in a specific (non-standard) way, including custom layer
+    types and quantization annotations. Networks must be first quantized using Brevitas and exported
+    to FINN-ONNX to be converted to FPGA accelerators.
 
-The short answer is no. FINN uses ONNX in a specific (non-standard) way, including custom layer
-types and quantization annotations. Networks must be first quantized using Brevitas and exported
-to FINN-ONNX to be converted to FPGA accelerators.
 
+Can I install FINN out of the Docker container?
+    We do not support out of the Docker implementations at the moment. This is due
+    to the high complexity of the FINN project dependencies.
 
-Can I deploy custom NNs with arbitrary precisions and layers using FINN?
-=========================================================================
+Since FINN uses ONNX, can I compile any model from the ONNX Model Zoo to an FPGA accelerator?
+    The short answer is no. FINN uses ONNX in a specific (non-standard) way, including custom layer
+    types and quantization annotations. Networks must be first quantized using Brevitas and exported
+    to FINN-ONNX to be converted to FPGA accelerators.
 
-Yes, though the effort required and quality of results will vary.
-Although we do support arbitrary
-precision, the way we create the hardware isn't typically practical for more than
-4 bits, or very large networks for a single FPGA.
-In terms of layers, only a subset of quantized layers covered by the various FINN examples
-are currently supported.
-It is possible to add support for new layers, though we don't have tutorials for this in place
-just yet.
 
-Does FINN only work with the example networks?
-==============================================
+Can I deploy custom NNs with arbitrary precisions and layers using FINN?
+    Yes, though the effort required and quality of results will vary.
+    Although we do support arbitrary
+    precision, the way we create the hardware isn't typically practical for more than
+    4 bits, or very large networks for a single FPGA.
+    In terms of layers, only a subset of quantized layers covered by the various FINN examples
+    are currently supported.
+    It is possible to add support for new layers, though we don't have tutorials for this in place
+    just yet.
 
-FINN isn't restricted to the example networks;
-rather, it's restricted to certain patterns (e.g. certain layer types and their combinations).
-The current best practice for custom networks is to take a working network and gradually modify it.
+Does FINN only work with the example networks?
+    FINN isn't restricted to the example networks;
+    rather, it's restricted to certain patterns (e.g. certain layer types and their combinations).
+    The current best practice for custom networks is to take a working network and gradually modify it.
 
 What is the expected background for using FINN?
-===============================================
-
-Some general knowledge of Python, Docker, machine learning with neural networks and Jupyter notebooks
-is expected.
-Our goal is to make the tool in a shape and form so that no hardware/FPGA background
-should be necessary, although having some knowledge would give better results.
+    Some general knowledge of Python, Docker, machine learning with neural networks and Jupyter notebooks
+    is expected.
+    Our goal is to make the tool in a shape and form so that no hardware/FPGA background
+    should be necessary, although having some knowledge would give better results.
 
 What operating systems are supported by FINN?
-=============================================
-
-FINN should work fine under any Linux-based OS capable of running Vivado/Vitis, as long
-as you install Docker (``docker-ce``) on your machine .
+    FINN should work fine under any Linux-based OS capable of running Vivado/Vitis, as long
+    as you install Docker (``docker-ce``) on your machine.
 
 
 I am getting DocNav and Model_Composer errors when launching the Docker image.
-==============================================================================
-
-We do not mount those particular directories into the Docker container because they are not
-used. The errors are Vivado related but you can safely ignore them.
+    We do not mount those particular directories into the Docker container because they are not
+    used. The errors are Vivado related but you can safely ignore them.
 
 What board do you recommend to start working with FINN?
-=======================================================
-
-Our preferred target platforms are those supported by  `PYNQ <http://www.pynq.io/board.html>`_.
-For those boards we can offer end-to-end (DNN-to-bitstream) deployment,
-see the `finn-examples <https://github.com/Xilinx/finn-examples>`_ repository for some examples.
-However, FINN also supports Vivado IP Integrator designs. The IPs connect using AXI stream (FIFO)
-in-and-out interfaces. This means that it can be integrated onto any Xilinx FPGA board,
-though you will have to do the system integration manually.
+    Our preferred target platforms are those supported by  `PYNQ <http://www.pynq.io/board.html>`_.
+    For those boards we can offer end-to-end (DNN-to-bitstream) deployment,
+    see the `finn-examples <https://github.com/Xilinx/finn-examples>`_ repository for some examples.
+    However, FINN also supports Vivado IP Integrator designs. The IPs connect using AXI stream (FIFO)
+    in-and-out interfaces. This means that it can be integrated onto any Xilinx FPGA board,
+    though you will have to do the system integration manually.
+
+FINN-generated builds break after I restart my computer, because ``/tmp`` gets wiped.
+    See https://github.com/Xilinx/finn/discussions/404
+
+How can I target an arbitrary Xilinx FPGA without PYNQ support?
+    See https://github.com/Xilinx/finn/discussions/387
+
+Why does FINN-generated architectures need FIFOs between layers?
+    See https://github.com/Xilinx/finn/discussions/383
+
+How do I tell FINN to utilize DSPs instead of LUTs for MAC operations in particular layers?
+    This is done with the ``resType="dsp"`` attribute on ``StreamingFCLayer`` and ``Vector_Vector_Activate`` instances.
+    When using the ``build_dataflow`` system, this can be specified at a per layer basis by specifying it as part of one or more layersâ€™
+    folding config (:py:mod:`finn.builder.build_dataflow_config.DataflowBuildConfig.folding_config_file`).
+    This is a good idea for layers with more weight/input act bits and high PE*SIMD.
+    See the `MobileNet-v1 build config for ZCU104 in finn-examples <https://github.com/Xilinx/finn-examples/blob/main/build/mobilenet-v1/folding_config/ZCU104_folding_config.json#L15>`_ for reference.
+
+
+How do I tell FINN to utilize a particular type of memory resource in particular layers?
+    This is done with the ``ram_style`` attribute. Check the particular ``HLSCustomOp`` attribute definition to see
+    which modes are supported (`example for StreamingFCLayer <https://github.com/Xilinx/finn/blob/dev/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py#L95>`_).
+    When using the ``build_dataflow`` system, this can be specified at a per layer basis by specifying it as part of one or more layersâ€™
+    folding config (:py:mod:`finn.builder.build_dataflow_config.DataflowBuildConfig.folding_config_file`).
+    See the `MobileNet-v1 build config for ZCU104 in finn-examples <https://github.com/Xilinx/finn-examples/blob/main/build/mobilenet-v1/folding_config/ZCU104_folding_config.json#L15>`_ for reference.
+
+Which data layout do FINN-generated accelerators use? Big-endian? Little-endian?
+    The data layout used by FINN does not correspond to system-level big or little endian due to difficulties in defining what
+    the â€œword sizeâ€ is and bit packing for smaller datatypes. FINNâ€™s â€œword sizeâ€ is dependent on the parallelization of the
+    first/last layers. For instance, if the first HLS layer is using SIMD=3 this means the â€œinnermost dimensionâ€ in the
+    data packing functions will be of size 3.
+    When you use the verification infrastructure or the generated PYNQ Python drivers that FINN provides, the tool normally
+    takes care of any required data layout conversion on standard numpy arrays before presenting the data to the accelerator,
+    and vice versa on the output side. Doing this data packing and layout conversion manually can be messy at the moment.
+    If you need to do this manually, first examine how the `FINN PYNQ Python drivers <https://github.com/Xilinx/finn-examples/blob/main/finn_examples/driver.py#L379>`_ do this â€“ notice how the input data is
+    first reshaped to create the â€œfolded input shapeâ€ that reflects the word size of the first layer based on how much it
+    was parallelized, then data packing is applied to obtain a raw byte array (with some reversals going on) that can be
+    fed directly to the hardware. Another example of this is the `npy_to_rtlsim_input <https://github.com/Xilinx/finn-base/blob/dev/src/finn/util/data_packing.py#L289>`_ function, which converts npy arrays to lists of Python arbitrary-precision integers that we feed into pyverilator for rtl simulation:
+
+Why does FIFO sizing take so long for my network? Is something wrong?
+    The automatic FIFO sizing in FINN can take quite long. It unfortunately doesnâ€™t really parallelize on multiple cores since
+    itâ€™s based on running an rtl simulation with lots of inputs and very large FIFOs, then observing the max occupancy/count
+    in each FIFO.
+
+What's a good starting point for the folding configuration if I want to make manual changes?
+    First, enable automatic folding options in ``build_dataflow`` such ``target_fps``. This should find a decent set of
+    folding factors and save them to ``output_folder/auto_folding_config.json`` which you can use as a basis for creating the desired config.
diff --git a/docs/finn/getting_started.rst b/docs/finn/getting_started.rst
index 14a1ec44a00fc9448b067bae6480091897f47472..af7a05751b0b2f9c991849e2c808e089aaac68d9 100644
--- a/docs/finn/getting_started.rst
+++ b/docs/finn/getting_started.rst
@@ -12,7 +12,8 @@ Quickstart
 3. Clone the FINN compiler from the repo: ``git clone https://github.com/Xilinx/finn/`` and go into the directory where it is cloned
 4. Execute ``./run-docker.sh quicktest`` to verify your installation.
 5. Optionally, follow the instructions on :ref:`PYNQ board first-time setup` or :ref:`Alveo first-time setup` for board setup.
-6. All done! See :ref:`Running FINN in Docker` for the various options on how to run the FINN compiler.
+6. Optionally, set up a `Vivado/Vitis license`_.
+7. All done! See :ref:`Running FINN in Docker` for the various options on how to run the FINN compiler.
 
 
 How do I use FINN?
@@ -28,7 +29,7 @@ In general, the approach for using the FINN framework is as follows:
 
 1. Train your own quantized neural network (QNN) in `Brevitas <https://github.com/Xilinx/brevitas>`_. We have some `guidelines <https://bit.ly/finn-hls4ml-qat-guidelines>`_ on quantization-aware training (QAT).
 2. Export to FINN-ONNX by following `this tutorial <https://github.com/Xilinx/finn/blob/master/notebooks/basics/1_brevitas_network_import.ipynb>`_ .
-3. Use FINN's ``build_dataflow`` system on the exported model by following `this tutorial <https://github.com/Xilinx/finn/blob/master/notebooks/end2end_example/cybersecurity/3-build-accelerator-with-finn.ipynb>`_
+3. Use FINN's ``build_dataflow`` system on the exported model by following this `tutorial <https://github.com/Xilinx/finn/blob/master/notebooks/end2end_example/cybersecurity/3-build-accelerator-with-finn.ipynb>`_
 4. Adjust your QNN topology, quantization settings and ``build_dataflow`` configuration to get the desired results.
 
 Please note that the framework is still under development, and how well this works will depend on how similar your custom network is to the examples we provide.
@@ -111,6 +112,7 @@ These are summarized below:
 * (optional) ``FINN_DOCKER_TAG`` (autogenerated) specifies the Docker image tag to use.
 * (optional) ``FINN_DOCKER_RUN_AS_ROOT`` (default 0) if set to 1 then run Docker container as root, default is the current user.
 * (optional) ``FINN_DOCKER_GPU`` (autodetected) if not 0 then expose all Nvidia GPUs or those selected by ``NVIDIA_VISIBLE_DEVICES`` to Docker container for accelerated DNN training. Requires `Nvidia Container Toolkit <https://github.com/NVIDIA/nvidia-docker>`_
+* (optional) ``FINN_DOCKER_EXTRA`` (default "") pass extra arguments to the ``docker run`` command when executing ``./run-docker.sh``
 * (optional) ``NVIDIA_VISIBLE_DEVICES`` (default "") specifies specific Nvidia GPUs to use in Docker container. Possible values are a comma-separated list of GPU UUID(s) or index(es) e.g. ``0,1,2``, ``all``, ``none``, or void/empty/unset.
 * (optional) ``DOCKER_BUILDKIT`` (default "1") enables `Docker BuildKit <https://docs.docker.com/develop/develop-images/build_enhancements/>`_ for faster Docker image rebuilding (recommended).
 
@@ -181,15 +183,26 @@ On the host side:
 5. `Set up public key authentication <https://www.digitalocean.com/community/tutorials/how-to-configure-ssh-key-based-authentication-on-a-linux-server>`_. Copy your private key to the ``finn/ssh_keys`` folder on the host to get password-less deployment and remote execution.
 6. Done! You can try the ``test_end2end_vitis`` tests in the FINN Docker to verify your setup, although this will take some time.
 
+Vivado/Vitis license
+*********************
+If you are targeting Xilinx FPGA parts that needs specific licenses (non-WebPack) you can make these available to the
+FINN Docker container by passing extra arguments. To do this, you can use the ``FINN_DOCKER_EXTRA`` environment variable as follows:
 
+::
+
+  export FINN_DOCKER_EXTRA=" -v /path/to/licenses:/path/to/licenses -e XILINXD_LICENSE_FILE=/path/to/licenses "
+
+The above example mounts ``/path/to/licenses`` from the host into the same path on the Docker container, and sets the
+value of the ``XILINXD_LICENSE_FILE`` environment variable.
 
 System Requirements
 ====================
 
 * Ubuntu 18.04 with ``bash`` installed
 * Docker `without root <https://docs.docker.com/engine/install/linux-postinstall/#manage-docker-as-a-non-root-user>`_
-* A working Vivado 2019.1 or 2020.1 installation
-* A ``VIVADO_PATH`` environment variable pointing to the Vivado installation directory (e.g. the directory where settings64.sh is located)
+* A working Vivado 2020.1 installation
+* ``FINN_XILINX_PATH`` and ``FINN_XILINX_VERSION`` environment variables correctly set, see `Quickstart`_
+* *(optional)* `Vivado/Vitis license`_ if targeting non-WebPack FPGA parts.
 * *(optional)* A PYNQ board with a network connection, see `PYNQ board first-time setup`_
 * *(optional)* An Alveo board, and a working Vitis 2020.1 installation if you want to use Vitis and Alveo (see `Alveo first-time setup`_ )
 
diff --git a/docs/finn/internals.rst b/docs/finn/internals.rst
index 0fbc3cf72795005591994ddca0fa0d58b72622a8..9305f7840216f6d076a11337ddb3cfa588f1a062 100644
--- a/docs/finn/internals.rst
+++ b/docs/finn/internals.rst
@@ -4,12 +4,12 @@
 Internals
 *********
 
-Intermediate Representation: FINN-ONNX
-======================================
+Intermediate Representation: QONNX and FINN-ONNX
+================================================
 
 FINN uses `ONNX <https://github.com/onnx/onnx>`_ as an intermediate representation (IR) for neural networks. As such, almost every component inside FINN uses ONNX and its `Python API <https://github.com/onnx/onnx/blob/master/docs/PythonAPIOverview.md>`_, so you may want to familiarize yourself with how ONNX represents DNNs. Specifically, the `ONNX protobuf description <https://github.com/onnx/onnx/blob/master/onnx/onnx.proto>`_ (or its `human-readable documentation <https://github.com/onnx/onnx/blob/master/docs/IR.md>`_ and the `operator schemas <https://github.com/onnx/onnx/blob/master/docs/Operators.md>`_ are useful as reference documents. We also provide a Jupyter notebook that can help to get familiar with ONNX by showing how to work with a simple ONNX model in FINN, see chapter :ref:`tutorials` for details.
 
-.. note:: FINN uses ONNX is a specific way that we refer to as FINN-ONNX, and not all ONNX graphs are supported by FINN (and vice versa).
+.. note:: FINN supports two specialized variants of ONNX called QONNX and FINN-ONNX, and not all ONNX graphs are supported by FINN (and vice versa).
 
 Custom Quantization Annotations
 ===============================
diff --git a/docs/finn/source_code/finn.analysis.rst b/docs/finn/source_code/finn.analysis.rst
index 7312150657c86976638e73fdf2c0450160989a6a..1de42ac32bc62ce71e039f63168302b22711f454 100644
--- a/docs/finn/source_code/finn.analysis.rst
+++ b/docs/finn/source_code/finn.analysis.rst
@@ -23,6 +23,13 @@ finn.analysis.base
    :undoc-members:
    :show-inheritance:
 
+finn.analysis.inference\_cost
+-----------------------------
+
+.. automodule:: finn.analysis.inference_cost
+   :members:
+   :undoc-members:
+   :show-inheritance:
 
 finn.analysis.topology
 -----------------------------
diff --git a/docs/finn/source_code/finn.custom_op.fpgadataflow.rst b/docs/finn/source_code/finn.custom_op.fpgadataflow.rst
index 7b4e7bfa05f895cd03aed2859576e07db28bd9f9..34a6285f227690c87c568855e7ca70ddb9b2764c 100644
--- a/docs/finn/source_code/finn.custom_op.fpgadataflow.rst
+++ b/docs/finn/source_code/finn.custom_op.fpgadataflow.rst
@@ -13,6 +13,23 @@ Base Class
    :undoc-members:
    :show-inheritance:
 
+finn.custom\_op.fpgadataflow.addstreams\_batch
+-----------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.addstreams_batch
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.custom\_op.fpgadataflow.channelwise\_op\_batch
+-----------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.channelwise_op_batch
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+
 finn.custom\_op.fpgadataflow.convolutioninputgenerator
 -------------------------------------------------------------
 
@@ -21,6 +38,87 @@ finn.custom\_op.fpgadataflow.convolutioninputgenerator
    :undoc-members:
    :show-inheritance:
 
+finn.custom\_op.fpgadataflow.convolutioninputgenerator1d
+-------------------------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.convolutioninputgenerator1d
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.custom\_op.fpgadataflow.downsampler
+-----------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.downsampler
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.custom\_op.fpgadataflow.duplicatestreams\_batch
+-----------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.duplicatestreams_batch
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.custom\_op.fpgadataflow.fmpadding\_batch
+-----------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.fmpadding_batch
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.custom\_op.fpgadataflow.globalaccpool\_batch
+-----------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.globalaccpool_batch
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.custom\_op.fpgadataflow.iodma
+-----------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.iodma
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.custom\_op.fpgadataflow.labelselect\_batch
+-----------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.labelselect_batch
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.custom\_op.fpgadataflow.lookup
+-----------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.lookup
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.custom\_op.fpgadataflow.pool\_batch
+-----------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.pool_batch
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.custom\_op.fpgadataflow.streamingdataflowpartition
+--------------------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.streamingdataflowpartition
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+
 finn.custom\_op.fpgadataflow.streamingdatawidthconverter\_batch
 ----------------------------------------------------------------------
 
@@ -61,6 +159,15 @@ finn.custom\_op.fpgadataflow.templates
    :undoc-members:
    :show-inheritance:
 
+finn.custom\_op.fpgadataflow.thresholding\_batch
+-----------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.thresholding_batch
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+
 finn.custom\_op.fpgadataflow.tlastmarker
 -----------------------------------------------
 
@@ -68,3 +175,19 @@ finn.custom\_op.fpgadataflow.tlastmarker
    :members:
    :undoc-members:
    :show-inheritance:
+
+finn.custom\_op.fpgadataflow.upsampler
+-----------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.upsampler
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.custom\_op.fpgadataflow.vector\_vector\_activate\_batch
+-----------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.vector_vector_activate_batch
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/finn/source_code/finn.custom_op.general.rst b/docs/finn/source_code/finn.custom_op.general.rst
index e86774a48e22b5af9e4d2995a4287a740b1c08e5..87749fd69e541e628436aa904c180338418addc1 100644
--- a/docs/finn/source_code/finn.custom_op.general.rst
+++ b/docs/finn/source_code/finn.custom_op.general.rst
@@ -5,6 +5,14 @@ Custom Op - General
 General Custom Ops
 ===================
 
+finn.custom\_op.general.bipolar_quant
+--------------------------------------
+
+.. automodule:: finn.custom_op.general.bipolar_quant
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 finn.custom\_op.general.debugmarker
 -----------------------------------
 
@@ -13,6 +21,14 @@ finn.custom\_op.general.debugmarker
    :undoc-members:
    :show-inheritance:
 
+finn.custom\_op.general.genericpartition
+-----------------------------------------
+
+.. automodule:: finn.custom_op.general.genericpartition
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 finn.custom\_op.general.im2col
 ------------------------------
 
@@ -37,6 +53,14 @@ finn.custom\_op.general.multithreshold
    :undoc-members:
    :show-inheritance:
 
+finn.custom\_op.general.quant
+------------------------------
+
+.. automodule:: finn.custom_op.general.quant
+  :members:
+  :undoc-members:
+  :show-inheritance:
+
 finn.custom\_op.general.quantavgpool2d
 --------------------------------------
 
@@ -45,13 +69,13 @@ finn.custom\_op.general.quantavgpool2d
   :undoc-members:
   :show-inheritance:
 
-finn.custom\_op.general.streamingdataflowpartition
----------------------------------------------------
+finn.custom\_op.general.trunc
+------------------------------
 
-.. automodule:: finn.custom_op.general.streamingdataflowpartition
-   :members:
-   :undoc-members:
-   :show-inheritance:
+.. automodule:: finn.custom_op.general.trunc
+  :members:
+  :undoc-members:
+  :show-inheritance:
 
 finn.custom\_op.general.xnorpopcount
 -------------------------------------
diff --git a/docs/finn/source_code/finn.transformation.fpgadataflow.rst b/docs/finn/source_code/finn.transformation.fpgadataflow.rst
index 42bc7fb5315756b924e0d1cce58ca4e110bda824..b1e7075bdcfb675a894f3e66b61d59117e4f078d 100644
--- a/docs/finn/source_code/finn.transformation.fpgadataflow.rst
+++ b/docs/finn/source_code/finn.transformation.fpgadataflow.rst
@@ -62,6 +62,14 @@ finn.transformation.fpgadataflow.create\_stitched\_ip
    :undoc-members:
    :show-inheritance:
 
+finn.transformation.fpgadataflow.externalize\_params
+------------------------------------------------------------
+
+.. automodule:: finn.transformation.fpgadataflow.externalize_params
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 finn.transformation.fpgadataflow.floorplan
 ----------------------------------------------------
 
diff --git a/docs/finn/source_code/finn.transformation.qonnx.rst b/docs/finn/source_code/finn.transformation.qonnx.rst
new file mode 100644
index 0000000000000000000000000000000000000000..8320e19efb81dd5a52f750e22e280f41070bf48c
--- /dev/null
+++ b/docs/finn/source_code/finn.transformation.qonnx.rst
@@ -0,0 +1,51 @@
+***********************
+Transformation - QONNX
+************************
+
+Transformation (QONNX)
+===========================
+
+.. automodule:: finn.transformation.qonnx
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.transformation.qonnx.convert\_qonnx\_to\_finn
+---------------------------------------------------
+
+.. automodule:: finn.transformation.qonnx.convert_qonnx_to_finn
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.transformation.qonnx.fold\_quant\_weights
+-----------------------------------------------
+
+.. automodule:: finn.transformation.qonnx.fold_quant_weights
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.transformation.qonnx.infer\_quant\_avg\_pool\_2d
+------------------------------------------------------
+
+.. automodule:: finn.transformation.qonnx.infer_quant_avg_pool_2d
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.transformation.qonnx.qonnx\_activation\_handlers
+-------------------------------------------------------
+
+.. automodule:: finn.transformation.qonnx.qonnx_activation_handlers
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.transformation.qonnx.quant\_act\_to\_multithreshold
+---------------------------------------------------------
+
+.. automodule:: finn.transformation.qonnx.quant_act_to_multithreshold
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/finn/source_code/finn.transformation.rst b/docs/finn/source_code/finn.transformation.rst
index aeb0d7614222740315633f7658cab9cc7e75490b..cffb0fd0f9e963c02a6986e47e1654951ad3bab0 100644
--- a/docs/finn/source_code/finn.transformation.rst
+++ b/docs/finn/source_code/finn.transformation.rst
@@ -11,6 +11,7 @@ Submodules
    :maxdepth: 2
 
    finn.transformation.fpgadataflow
+   finn.transformation.qonnx
    finn.transformation.streamline
 
 Transformation Passes
@@ -40,6 +41,14 @@ finn.transformation.bipolar\_to\_xnor
    :undoc-members:
    :show-inheritance:
 
+finn.transformation.change\_3d\_tensors\_to\_4d
+------------------------------------------------
+
+.. automodule:: finn.transformation.change_3d_tensors_to_4d
+  :members:
+  :undoc-members:
+  :show-inheritance:
+
 finn.transformation.change\_datalayout
 --------------------------------------------
 
@@ -48,6 +57,13 @@ finn.transformation.change\_datalayout
   :undoc-members:
   :show-inheritance:
 
+finn.transformation.create\_generic\_partitions
+------------------------------------------------
+
+.. automodule:: finn.transformation.create_generic_partitions
+  :members:
+  :undoc-members:
+  :show-inheritance:
 
 finn.transformation.double\_to\_single\_float
 ----------------------------------------------------
@@ -57,6 +73,23 @@ finn.transformation.double\_to\_single\_float
    :undoc-members:
    :show-inheritance:
 
+finn.transformation.extend\_partition
+------------------------------------------
+
+.. automodule:: finn.transformation.extend_partition
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.transformation.extract\_conv\_bias
+------------------------------------------
+
+.. automodule:: finn.transformation.extract_conv_bias
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+
 finn.transformation.fold\_constants
 ------------------------------------------
 
@@ -65,6 +98,14 @@ finn.transformation.fold\_constants
    :undoc-members:
    :show-inheritance:
 
+finn.transformation.gemm\_to\_matmul
+------------------------------------------
+
+.. automodule:: finn.transformation.gemm_to_matmul
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 finn.transformation.general
 ----------------------------------
 
@@ -113,6 +154,13 @@ finn.transformation.lower\_convs\_to\_matmul
    :undoc-members:
    :show-inheritance:
 
+finn.transformation.make\_input\_chanlast
+------------------------------------------
+
+.. automodule:: finn.transformation.make_input_chanlast
+  :members:
+  :undoc-members:
+  :show-inheritance:
 
 finn.transformation.merge\_onnx\_models
 ----------------------------------------
@@ -130,3 +178,11 @@ finn.transformation.move\_reshape
    :members:
    :undoc-members:
    :show-inheritance:
+
+finn.transformation.remove
+-------------------------------------
+
+.. automodule:: finn.transformation.remove
+  :members:
+  :undoc-members:
+  :show-inheritance:
diff --git a/docs/finn/source_code/finn.transformation.streamline.rst b/docs/finn/source_code/finn.transformation.streamline.rst
index f43d6d12314d3bad38f189d2831e21447f10cf10..9ed4bbe1d8c6b12c67e1c0c2927e8b7067410a9c 100644
--- a/docs/finn/source_code/finn.transformation.streamline.rst
+++ b/docs/finn/source_code/finn.transformation.streamline.rst
@@ -26,13 +26,6 @@ finn.transformation.streamline.collapse\_repeated
    :undoc-members:
    :show-inheritance:
 
-finn.transformation.streamline.remove
--------------------------------------
-
-.. automodule:: finn.transformation.streamline.remove
-  :members:
-  :undoc-members:
-  :show-inheritance:
 
 finn.transformation.streamline.reorder
 ---------------------------------------------
diff --git a/docs/finn/source_code/finn.util.rst b/docs/finn/source_code/finn.util.rst
index 82e4bf3261582c9be622cbe3f15af38ba5e3fa41..62b72c2ac84567b20fee73a16e82b5857d698c9d 100644
--- a/docs/finn/source_code/finn.util.rst
+++ b/docs/finn/source_code/finn.util.rst
@@ -72,6 +72,15 @@ finn.util.onnx
    :undoc-members:
    :show-inheritance:
 
+finn.util.platforms
+--------------------
+
+.. automodule:: finn.util.platforms
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+
 finn.util.pytorch
 ------------------
 
diff --git a/run-docker.sh b/run-docker.sh
index a1147fcee55d345850da4c533dd9e88270d727a6..2abd67f0679b32a09e51d03efe548bdc095c11a0 100755
--- a/run-docker.sh
+++ b/run-docker.sh
@@ -92,11 +92,11 @@ SCRIPTPATH=$(dirname "$SCRIPT")
 : ${FINN_DOCKER_PREBUILT="0"}
 : ${FINN_DOCKER_RUN_AS_ROOT="0"}
 : ${FINN_DOCKER_GPU="$(docker info | grep nvidia | wc -m)"}
+: ${FINN_DOCKER_EXTRA=""}
 : ${NVIDIA_VISIBLE_DEVICES=""}
 : ${DOCKER_BUILDKIT="1"}
 
 DOCKER_INTERACTIVE=""
-DOCKER_EXTRA=""
 
 if [ "$1" = "test" ]; then
   gecho "Running test suite (all tests)"
@@ -112,20 +112,20 @@ elif [ "$1" = "notebook" ]; then
     JUPYTER_PASSWD_ARG="--NotebookApp.password='$JUPYTER_PASSWD_HASH'"
   fi
   DOCKER_CMD="jupyter notebook --allow-root --no-browser --ip=0.0.0.0 --port $JUPYTER_PORT $JUPYTER_PASSWD_ARG notebooks"
-  DOCKER_EXTRA+="-e JUPYTER_PORT=$JUPYTER_PORT "
-  DOCKER_EXTRA+="-e NETRON_PORT=$NETRON_PORT "
-  DOCKER_EXTRA+="-p $JUPYTER_PORT:$JUPYTER_PORT "
-  DOCKER_EXTRA+="-p $NETRON_PORT:$NETRON_PORT "
+  FINN_DOCKER_EXTRA+="-e JUPYTER_PORT=$JUPYTER_PORT "
+  FINN_DOCKER_EXTRA+="-e NETRON_PORT=$NETRON_PORT "
+  FINN_DOCKER_EXTRA+="-p $JUPYTER_PORT:$JUPYTER_PORT "
+  FINN_DOCKER_EXTRA+="-p $NETRON_PORT:$NETRON_PORT "
 elif [ "$1" = "build_dataflow" ]; then
   BUILD_DATAFLOW_DIR=$(readlink -f "$2")
-  DOCKER_EXTRA="-v $BUILD_DATAFLOW_DIR:$BUILD_DATAFLOW_DIR "
+  FINN_DOCKER_EXTRA="-v $BUILD_DATAFLOW_DIR:$BUILD_DATAFLOW_DIR "
   DOCKER_INTERACTIVE="-it"
   #FINN_HOST_BUILD_DIR=$BUILD_DATAFLOW_DIR/build
   gecho "Running build_dataflow for folder $BUILD_DATAFLOW_DIR"
   DOCKER_CMD="build_dataflow $BUILD_DATAFLOW_DIR"
 elif [ "$1" = "build_custom" ]; then
   BUILD_CUSTOM_DIR=$(readlink -f "$2")
-  DOCKER_EXTRA="-v $BUILD_CUSTOM_DIR:$BUILD_CUSTOM_DIR -w $BUILD_CUSTOM_DIR "
+  FINN_DOCKER_EXTRA="-v $BUILD_CUSTOM_DIR:$BUILD_CUSTOM_DIR -w $BUILD_CUSTOM_DIR "
   DOCKER_INTERACTIVE="-it"
   #FINN_HOST_BUILD_DIR=$BUILD_DATAFLOW_DIR/build
   gecho "Running build_custom: $BUILD_CUSTOM_DIR/build.py"
@@ -139,9 +139,9 @@ fi
 if [ "$FINN_DOCKER_GPU" != 0 ];then
   gecho "nvidia-docker detected, enabling GPUs"
   if [ ! -z "$NVIDIA_VISIBLE_DEVICES" ];then
-    DOCKER_EXTRA+="--runtime nvidia -e NVIDIA_VISIBLE_DEVICES=$NVIDIA_VISIBLE_DEVICES "
+    FINN_DOCKER_EXTRA+="--runtime nvidia -e NVIDIA_VISIBLE_DEVICES=$NVIDIA_VISIBLE_DEVICES "
   else
-    DOCKER_EXTRA+="--gpus all "
+    FINN_DOCKER_EXTRA+="--gpus all "
   fi
 fi
 
@@ -222,7 +222,7 @@ if [ ! -z "$FINN_XILINX_PATH" ];then
     DOCKER_EXEC+="-e ALVEO_TARGET_DIR=$ALVEO_TARGET_DIR "
   fi
 fi
-DOCKER_EXEC+="$DOCKER_EXTRA "
+DOCKER_EXEC+="$FINN_DOCKER_EXTRA "
 DOCKER_EXEC+="$FINN_DOCKER_TAG $DOCKER_CMD"
 
 $DOCKER_EXEC
diff --git a/setup.cfg b/setup.cfg
index 9a6ca312aff459fb29f6e33a866b911e1a038229..96618e0ffcb8dcb217185c67948a71a132a7b45a 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -74,7 +74,17 @@ exclude =
 # PDF = ReportLab; RXP
 # finn-base is needed to build the full set of docs
 docs =
-    finn-base
+    finn-base==0.0.3
+    docutils==0.17.1
+    dataclasses-json==0.5.2
+    gspread==3.6.0
+    pytest
+    netron
+    vcdvcd
+    torchvision
+    torch
+    qonnx@git+https://github.com/fastmachinelearning/qonnx@main#egg=qonnx
+
 # Add here test requirements (semicolon/line-separated)
 testing =
     pytest
diff --git a/src/finn/analysis/verify_custom_nodes.py b/src/finn/analysis/verify_custom_nodes.py
index 9af1e9a4fe83de24f64a7e9df535bcf78f5fc234..62dac2827f11d290c5a50137e12684eb93326297 100644
--- a/src/finn/analysis/verify_custom_nodes.py
+++ b/src/finn/analysis/verify_custom_nodes.py
@@ -32,7 +32,8 @@ from finn.util.basic import is_finn_op
 
 def verify_nodes(model):
     """Checks if custom ops in graph are correctly built, with all attributes
-    and inputs.
+    and inputs. Please note that many FINN CustomOps don't yet implement the
+    verify_node function required for this analysis pass to work correctly.
 
     Returns {node op_type : info_messages}
 
diff --git a/src/finn/builder/build_dataflow_config.py b/src/finn/builder/build_dataflow_config.py
index 8577128a55938d905bf4230624182b2699e091f1..807fd706860d7e4667107ddd2ed46ea2b123c3ec 100644
--- a/src/finn/builder/build_dataflow_config.py
+++ b/src/finn/builder/build_dataflow_config.py
@@ -89,6 +89,8 @@ class LargeFIFOMemStyle(str, Enum):
 class VerificationStepType(str, Enum):
     "Steps at which FINN ONNX execution can be launched for verification."
 
+    #: verify after step_qonnx_to_finn, using Python execution
+    QONNX_TO_FINN_PYTHON = "finn_onnx_python"
     #: verify after step_tidy_up, using Python execution
     TIDY_UP_PYTHON = "initial_python"
     #: verify after step_streamline , using Python execution
@@ -103,6 +105,7 @@ class VerificationStepType(str, Enum):
 #: specified order. Use the `steps` as part of build config to restrict which
 #: steps will be run.
 default_build_dataflow_steps = [
+    "step_qonnx_to_finn",
     "step_tidy_up",
     "step_streamline",
     "step_convert_to_hls",
@@ -123,6 +126,7 @@ default_build_dataflow_steps = [
 
 #: List of steps to run for an estimate-only (no synthesis) dataflow build
 estimate_only_dataflow_steps = [
+    "step_qonnx_to_finn",
     "step_tidy_up",
     "step_streamline",
     "step_convert_to_hls",
@@ -291,6 +295,14 @@ class DataflowBuildConfig:
     #: If given, stop at this step.
     stop_step: Optional[str] = None
 
+    #: The optional argument `max_multithreshold_bit_width` affects which Quant nodes
+    #: of the QONNX format get converted to the MultiThreshold nodes of FINN. This
+    #: only affects Quant nodes in the activation path. Quant nodes, which define a
+    #: bit width larger than `max_multithreshold_bit_width` are not converted to
+    #: MultiThreshold nodes and a warning is raised instead.
+    #: If not given `max_multithreshold_bit_width` defaults to 8.
+    max_multithreshold_bit_width: Optional[int] = 8
+
     #: Override the number of inputs for rtlsim performance measurement.
     rtlsim_batch_size: Optional[int] = 1
 
diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py
index 2e1643bf805ca48efa7131e23fe1325ccc6f56a1..c977f15e7090f5cae633a013f5eb9e6b3dd34dd2 100644
--- a/src/finn/builder/build_dataflow_steps.py
+++ b/src/finn/builder/build_dataflow_steps.py
@@ -31,6 +31,7 @@ import numpy as np
 import os
 from copy import deepcopy
 from distutils.dir_util import copy_tree
+from qonnx.util.cleanup import cleanup_model
 from shutil import copy
 
 import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
@@ -94,6 +95,10 @@ from finn.transformation.infer_datatypes import InferDataTypes
 from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
 from finn.transformation.move_reshape import RemoveCNVtoFCFlatten
+from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
+from finn.transformation.qonnx.quant_act_to_multithreshold import (
+    default_filter_function_generator,
+)
 from finn.transformation.streamline import Streamline
 from finn.transformation.streamline.reorder import MakeMaxPoolNHWC
 from finn.util.basic import get_rtlsim_trace_depth
@@ -188,6 +193,37 @@ def prepare_for_stitched_ip_rtlsim(verify_model, cfg):
     return verify_model
 
 
+def step_qonnx_to_finn(model: ModelWrapper, cfg: DataflowBuildConfig):
+    """
+    This step will only execute if QONNX nodes are found.
+    These include the following op_types: "Quant" , "Trunc" and "BinaryQuant".
+    If such nodes are found the step will run the tidy-up step from QONNX
+    and then convert the QONNX model to the FINN-ONNX dialect.
+    """
+    # Check if any QONNX nodes exist, i.e. BinaryQuant, Quant or Trunc
+    q_count = 0
+    for op_type in ["BinaryQuant", "Quant", "Trunc"]:
+        q_count += len(model.get_nodes_by_op_type(op_type))
+    if q_count == 0:
+        return model
+
+    # QONNX cleanup
+    model = cleanup_model(model)
+    # QONNX to FINN-ONNX
+    model = model.transform(
+        ConvertQONNXtoFINN(
+            filter_function=default_filter_function_generator(
+                max_multithreshold_bit_width=cfg.max_multithreshold_bit_width
+            )
+        )
+    )
+
+    if VerificationStepType.QONNX_TO_FINN_PYTHON in cfg._resolve_verification_steps():
+        verify_step(model, cfg, "qonnx_to_finn_python", need_parent=False)
+
+    return model
+
+
 def step_tidy_up(model: ModelWrapper, cfg: DataflowBuildConfig):
     """Run the tidy-up step on given model. This includes shape and datatype
     inference, constant folding, and giving nodes and tensors better names.
@@ -642,6 +678,7 @@ def step_deployment_package(model: ModelWrapper, cfg: DataflowBuildConfig):
 
 #: map step name strings to step functions
 build_dataflow_step_lookup = {
+    "step_qonnx_to_finn": step_qonnx_to_finn,
     "step_tidy_up": step_tidy_up,
     "step_streamline": step_streamline,
     "step_convert_to_hls": step_convert_to_hls,
diff --git a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py
index 333f1bddcc83d16af63441727e16850bd6ecb8f0..26864c50906d7dddeda33fb00c10f74eaf51e485 100644
--- a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py
+++ b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py
@@ -127,8 +127,12 @@ class ConvolutionInputGenerator1D(HLSCustomOp):
         ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, pad, dilation_h)
         ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, pad, dilation_w)
         assert ifm_ch % simd == 0, "SIMD must divide IFMChannels"
-        wf = int((k_h * k_w * ifm_ch) // simd)
-        folded_oshape = (1, ofm_dim_h, ofm_dim_w, wf, simd)
+        if self.use_parallel_window_output():
+            wf = int((ifm_ch) // simd)
+            folded_oshape = (1, ofm_dim_h, ofm_dim_w, wf, k_h * k_w * simd)
+        else:
+            wf = int((k_h * k_w * ifm_ch) // simd)
+            folded_oshape = (1, ofm_dim_h, ofm_dim_w, wf, simd)
         return folded_oshape
 
     def make_shape_compatible_op(self, model):
@@ -156,8 +160,6 @@ class ConvolutionInputGenerator1D(HLSCustomOp):
         return DataType[self.get_nodeattr("outputDataType")]
 
     def get_instream_width(self):
-        """Returns stream width, input and output stream width are equal for
-        the sliding window function"""
         ibits = self.get_input_datatype().bitwidth()
         simd = self.get_nodeattr("SIMD")
         ifm_ch = self.get_nodeattr("IFMChannels")
@@ -166,10 +168,13 @@ class ConvolutionInputGenerator1D(HLSCustomOp):
         return in_width
 
     def get_outstream_width(self):
-        """Returns stream width, input and output stream width are equal for
-        the sliding window function, so the function to determine the input
-        stream width can be reused."""
-        return self.get_instream_width()
+        if self.use_parallel_window_output():
+            # feed all window pixels in parallel
+            k_h, k_w = self.get_nodeattr("ConvKernelDim")
+            return self.get_instream_width() * k_h * k_w
+        else:
+            # if parallel variant not in use: same width for output and input stream
+            return self.get_instream_width()
 
     def get_number_output_values(self):
         folded_oshape = self.get_folded_output_shape()
@@ -205,6 +210,22 @@ class ConvolutionInputGenerator1D(HLSCustomOp):
 
         return (ifm_ch, ifm_dim, ofm_dim, k, stride, dilation)
 
+    def use_parallel_window_output(self):
+        # Check if simple "ConvolutionInputGenerator_1D_parallel" variant can be used to
+        # feed window in parallel to the following layer, enabling full SIMD unfolding.
+        stride = self.get_nodeattr("Stride")
+        dilation = self.get_nodeattr("Dilation")
+        stride_h, stride_w = stride
+        dilation_h, dilation_w = dilation
+
+        if self.get_nodeattr("SIMD") == self.get_nodeattr("IFMChannels"):
+            if self.get_nodeattr("depthwise") == 0:
+                if stride_h == 1 and stride_w == 1:
+                    if dilation_h == 1 and dilation_w == 1:
+                        return True
+
+        return False
+
     def get_exp_cycles(self):
         simd = self.get_nodeattr("SIMD")
         (
@@ -224,12 +245,15 @@ class ConvolutionInputGenerator1D(HLSCustomOp):
         # since mmv != 1 is not supported yet, we set mmv for now to 1
         mmv = 1
         # see https://github.com/Xilinx/finn-hlslib/blob/master/slidingwindow.h
-        cycles_write_block = (ofm_dim_w * k_w * k_h * (ifm_ch / simd)) / mmv
-        cycles_read_block = stride_w * ifm_dim_w * (ifm_ch / simd)
-        max_cycles = max(cycles_write_block, cycles_read_block)
-        exp_cycles = (
-            ifm_dim_w * k_h * dilation_h * (ifm_ch / simd) + ofm_dim_h * max_cycles
-        )
+        if self.use_parallel_window_output():
+            exp_cycles = k_w + ofm_dim_w
+        else:
+            cycles_write_block = (ofm_dim_w * k_w * k_h * (ifm_ch / simd)) / mmv
+            cycles_read_block = stride_w * ifm_dim_w * (ifm_ch / simd)
+            max_cycles = max(cycles_write_block, cycles_read_block)
+            exp_cycles = (
+                ifm_dim_w * k_h * dilation_h * (ifm_ch / simd) + ofm_dim_h * max_cycles
+            )
 
         return int(exp_cycles)
 
@@ -526,39 +550,49 @@ class ConvolutionInputGenerator1D(HLSCustomOp):
             "ultra": "ap_resource_uram()",
         }
         hls_ram_style = map_to_hls_ram_style[ram_style]
-        hls_call = "ConvolutionInputGenerator"
-        # check which ConvolutionInputGenerator is needed
-        dilation_h, dilation_w = self.get_nodeattr("Dilation")
 
-        hls_call += "_NonSquare"
-        if dilation_h > 1 or dilation_w > 1:
-            hls_call += "_Dilated"
-            if self.get_nodeattr("depthwise") == 1:
-                hls_call += "_dws"
-            self.code_gen_dict["$DOCOMPUTE$"] = [
-                """{}<ConvKernelDim1_x, ConvKernelDim1_y, IFMChannels1, Input_precision1,
-                IFMDim1_x, IFMDim1_y, OFMDim1_x, OFMDim1_y, SIMD1, Stride1_x, Stride1_y,
-                Dilation1_x, Dilation1_y> (in0, out, numReps, {});""".format(
-                    hls_call, hls_ram_style
-                )
-            ]
-        elif self.get_nodeattr("depthwise") == 1:
-            hls_call += "_dws"
+        # check which ConvolutionInputGenerator is needed
+        if self.use_parallel_window_output():
+            hls_call = "ConvolutionInputGenerator_1D_parallel"
             self.code_gen_dict["$DOCOMPUTE$"] = [
-                """{}<ConvKernelDim1_x, ConvKernelDim1_y, IFMChannels1, Input_precision1,
-                IFMDim1_x, IFMDim1_y, OFMDim1_x, OFMDim1_y, SIMD1, Stride1_x, Stride1_y>
+                """{}<ConvKernelDim1_x, IFMChannels1, Input_precision1,
+                IFMDim1_x, OFMDim1_x, SIMD1, Stride1_x>
                 (in0, out, numReps, {});""".format(
                     hls_call, hls_ram_style
                 )
             ]
         else:
-            self.code_gen_dict["$DOCOMPUTE$"] = [
-                """{}<ConvKernelDim1_x, ConvKernelDim1_y, IFMChannels1, Input_precision1,
-                IFMDim1_x, IFMDim1_y, OFMDim1_x, OFMDim1_y, SIMD1, Stride1_x, Stride1_y>
-                (in0, out, numReps, {});""".format(
-                    hls_call, hls_ram_style
-                )
-            ]
+            hls_call = "ConvolutionInputGenerator_NonSquare"
+            dilation_h, dilation_w = self.get_nodeattr("Dilation")
+            if dilation_h > 1 or dilation_w > 1:
+                hls_call += "_Dilated"
+                if self.get_nodeattr("depthwise") == 1:
+                    hls_call += "_dws"
+                self.code_gen_dict["$DOCOMPUTE$"] = [
+                    """{}<ConvKernelDim1_x, ConvKernelDim1_y, IFMChannels1,
+                    Input_precision1, IFMDim1_x, IFMDim1_y, OFMDim1_x, OFMDim1_y,
+                    SIMD1, Stride1_x, Stride1_y, Dilation1_x, Dilation1_y>
+                    (in0, out, numReps, {});""".format(
+                        hls_call, hls_ram_style
+                    )
+                ]
+            elif self.get_nodeattr("depthwise") == 1:
+                hls_call += "_dws"
+                self.code_gen_dict["$DOCOMPUTE$"] = [
+                    """{}<ConvKernelDim1_x, ConvKernelDim1_y, IFMChannels1,
+                    Input_precision1, IFMDim1_x, IFMDim1_y, OFMDim1_x, OFMDim1_y,
+                    SIMD1, Stride1_x, Stride1_y> (in0, out, numReps, {});""".format(
+                        hls_call, hls_ram_style
+                    )
+                ]
+            else:
+                self.code_gen_dict["$DOCOMPUTE$"] = [
+                    """{}<ConvKernelDim1_x, ConvKernelDim1_y, IFMChannels1,
+                    Input_precision1, IFMDim1_x, IFMDim1_y, OFMDim1_x, OFMDim1_y,
+                    SIMD1, Stride1_x, Stride1_y> (in0, out, numReps, {});""".format(
+                        hls_call, hls_ram_style
+                    )
+                ]
 
     def dataoutstrm(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
@@ -574,9 +608,16 @@ class ConvolutionInputGenerator1D(HLSCustomOp):
         npy_out = "%s/output.npy" % code_gen_dir
         oshape = self.get_folded_output_shape()
         oshape_cpp_str = str(oshape).replace("(", "{").replace(")", "}")
+        if self.use_parallel_window_output():
+            # pass the number of pixels in the folded output to apintstream2npy, needed
+            # to unpack the ouput correctly and reverse only the inner SIMD dimension
+            k_h, k_w = self.get_nodeattr("ConvKernelDim")
+            multi_pixel_out = k_h * k_w
+        else:
+            multi_pixel_out = 1
 
         self.code_gen_dict["$DATAOUTSTREAM$"] = [
-            'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s");'
+            'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s", true, 1, %d);'
             % (
                 packed_hls_type,
                 elem_hls_type,
@@ -584,6 +625,7 @@ class ConvolutionInputGenerator1D(HLSCustomOp):
                 npy_type,
                 oshape_cpp_str,
                 npy_out,
+                multi_pixel_out,
             )
         ]
 
@@ -591,12 +633,21 @@ class ConvolutionInputGenerator1D(HLSCustomOp):
         self.code_gen_dict["$SAVEASCNPY$"] = []
 
     def blackboxfunction(self):
-        self.code_gen_dict["$BLACKBOXFUNCTION$"] = [
-            """void {}(hls::stream<ap_uint<SIMD1*Input_precision1>> &in0,
-                hls::stream<ap_uint<SIMD1*Input_precision1>> &out)""".format(
-                self.onnx_node.name
-            )
-        ]
+        if self.use_parallel_window_output():
+            self.code_gen_dict["$BLACKBOXFUNCTION$"] = [
+                """void {}(hls::stream<ap_uint<SIMD1*Input_precision1>> &in0,
+                    hls::stream<ap_uint<ConvKernelDim1_x*SIMD1*Input_precision1>>
+                    &out)""".format(
+                    self.onnx_node.name
+                )
+            ]
+        else:
+            self.code_gen_dict["$BLACKBOXFUNCTION$"] = [
+                """void {}(hls::stream<ap_uint<SIMD1*Input_precision1>> &in0,
+                    hls::stream<ap_uint<SIMD1*Input_precision1>> &out)""".format(
+                    self.onnx_node.name
+                )
+            ]
 
     def pragmas(self):
         self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"]
diff --git a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
index 90abb66e66bc54bc9d1f4c7a08c58ca58e6d1741..68cd1ff9ea680e157f59353d0c9d05afc3d9d6d7 100644
--- a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
+++ b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
@@ -1013,10 +1013,10 @@ class StreamingFCLayer_Batch(HLSCustomOp):
         if var == "ipgen":
             SIMD = self.get_nodeattr("SIMD")
             MW = self.get_nodeattr("MW")
-            condition = SIMD > (MW / 1024)
+            condition = SIMD >= (MW / 1024)
             msg = (
                 f"HLS synthesis of StreamingFCLayer_Batch requires: "
-                f"SIMD > MW / 1024. This is not fulfilled with: SIMD={SIMD} "
+                f"SIMD >= MW / 1024. This is not fulfilled with: SIMD={SIMD} "
                 f"and MW={MW} for node: {self.onnx_node.name}."
             )
             assert condition, msg
diff --git a/src/finn/qnn-data/cpp/npy2apintstream.hpp b/src/finn/qnn-data/cpp/npy2apintstream.hpp
index bb17f11c4fecd4d26c1199c904f3dbec8b48c65f..6aade3a2bbe2ba9914728802a8a6a448ef2d9fb2 100644
--- a/src/finn/qnn-data/cpp/npy2apintstream.hpp
+++ b/src/finn/qnn-data/cpp/npy2apintstream.hpp
@@ -45,29 +45,34 @@ void npy2apintstream(const char * npy_path, hls::stream<PackedT> & out_stream, b
 }
 
 template <typename PackedT, typename ElemT, int ElemBits, typename NpyT>
-void apintstream2npy(hls::stream<PackedT> & in_stream, const std::vector<size_t> & shape, const char * npy_path, bool reverse_inner = true, size_t numReps = 1) {
+void apintstream2npy(hls::stream<PackedT> & in_stream, const std::vector<size_t> & shape, const char * npy_path, bool reverse_inner = true, size_t numReps = 1, size_t multi_pixel_out = 1) {
   for(size_t rep = 0; rep < numReps; rep++) {
     std::vector<NpyT> data_to_save;
     size_t outer_dim_elems = 1;
     for(size_t dim = 0; dim < shape.size()-1; dim++) {
       outer_dim_elems *= shape[dim];
     }
-    size_t inner_dim_elems = shape[shape.size()-1];
-    DEBUG_APINTSTREAM2NPY("n_outer " << outer_dim_elems << " n_inner " << inner_dim_elems)
+    size_t inner_dim_elems = shape[shape.size()-1] / multi_pixel_out;
+    DEBUG_APINTSTREAM2NPY("n_outer " << outer_dim_elems << " n_inner " << inner_dim_elems << " n_multi_pixel_out " << multi_pixel_out)
     for(size_t outer_elem = 0; outer_elem < outer_dim_elems; outer_elem++) {
       PackedT packed_elem;
       in_stream >> packed_elem;
       DEBUG_APINTSTREAM2NPY("packed hls elem " << std::hex << packed_elem << std::dec)
-      for(size_t ii = 0; ii < inner_dim_elems; ii++) {
-        size_t i = reverse_inner ? inner_dim_elems-ii-1 : ii;
-        ap_uint<ElemBits> tmp_elem = packed_elem((i+1)*ElemBits-1, i*ElemBits);
-        // important: don't init elem = reinterpret_cast.. directly here
-        // this causes weird behavior for conversion to NpyT afterwards
-        ElemT elem;
-        elem = reinterpret_cast<ElemT&>(tmp_elem);
-        NpyT npyt = (NpyT) elem;
-        DEBUG_APINTSTREAM2NPY("elem " << elem << " NpyT " << npyt)
-        data_to_save.push_back(npyt);
+      for(size_t ii_multi_pixel_out = 0; ii_multi_pixel_out < multi_pixel_out; ii_multi_pixel_out++) {
+        // loop over multi_pixel_out blocks of inner_dim_elems separately,
+        // so that reverse_inner is not applied across multiple pixels
+        for(size_t ii = 0; ii < inner_dim_elems; ii++) {
+          size_t i = ii_multi_pixel_out*inner_dim_elems;
+          i += reverse_inner ? inner_dim_elems-ii-1 : ii;
+          ap_uint<ElemBits> tmp_elem = packed_elem((i+1)*ElemBits-1, i*ElemBits);
+          // important: don't init elem = reinterpret_cast.. directly here
+          // this causes weird behavior for conversion to NpyT afterwards
+          ElemT elem;
+          elem = reinterpret_cast<ElemT&>(tmp_elem);
+          NpyT npyt = (NpyT) elem;
+          DEBUG_APINTSTREAM2NPY("elem " << elem << " NpyT " << npyt)
+          data_to_save.push_back(npyt);
+        }
       }
     }
     cnpy::npy_save(npy_path, &data_to_save[0], shape, "w");
diff --git a/src/finn/transformation/fpgadataflow/template_driver.py b/src/finn/transformation/fpgadataflow/template_driver.py
index 62b9bfb1969114ad858f50cb7eb0137aa12135ed..31dd22573e35894794dc522c0cf6ab47ce6c6cfc 100644
--- a/src/finn/transformation/fpgadataflow/template_driver.py
+++ b/src/finn/transformation/fpgadataflow/template_driver.py
@@ -119,6 +119,8 @@ if __name__ == "__main__":
         for ifn in inputfile:
             ibuf_normal.append(np.load(ifn))
         obuf_normal = accel.execute(ibuf_normal)
+        if not isinstance(obuf_normal, list):
+            obuf_normal = [obuf_normal]
         for o, obuf in enumerate(obuf_normal):
             np.save(outputfile[o], obuf)
     elif exec_mode == "throughput_test":
diff --git a/src/finn/transformation/fpgadataflow/templates.py b/src/finn/transformation/fpgadataflow/templates.py
index b881bf78880e819b48a80011be1ed6b7812188ac..a12f359c7d3f1c29a17694ef4987a1a349286234 100644
--- a/src/finn/transformation/fpgadataflow/templates.py
+++ b/src/finn/transformation/fpgadataflow/templates.py
@@ -119,6 +119,7 @@ if {$BOARD == "ZCU104"} {
     set ZYNQ_TYPE "zynq_us+"
 } elseif {$BOARD == "Pynq-Z2"} {
     set ZYNQ_TYPE "zynq_7000"
+    set_property board_part tul.com.tw:pynq-z2:part0:1.0 [current_project]
 } elseif {$BOARD == "Pynq-Z1"} {
     set ZYNQ_TYPE "zynq_7000"
     set_property board_part www.digilentinc.com:pynq-z1:part0:1.0 [current_project]
diff --git a/src/finn/transformation/qonnx/convert_qonnx_to_finn.py b/src/finn/transformation/qonnx/convert_qonnx_to_finn.py
new file mode 100644
index 0000000000000000000000000000000000000000..70656e4d0987924ba43d0e657414d0d172feb5ce
--- /dev/null
+++ b/src/finn/transformation/qonnx/convert_qonnx_to_finn.py
@@ -0,0 +1,99 @@
+# Copyright (c) 2021, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from qonnx.transformation.quant_constant_folding import FoldTransposeIntoQuantInit
+
+from finn.transformation.base import Transformation
+from finn.transformation.extract_conv_bias import ExtractBiasFromConv
+from finn.transformation.gemm_to_matmul import GemmToMatMul
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.qonnx.fold_quant_weights import FoldQuantWeights
+from finn.transformation.qonnx.infer_quant_avg_pool_2d import (
+    AvgPoolAndTruncToQuantAvgPool,
+)
+from finn.transformation.qonnx.quant_act_to_multithreshold import (
+    ConvertQuantActToMultiThreshold,
+    default_filter_function_generator,
+)
+from finn.transformation.remove import RemoveIdentityOps
+
+
+class ConvertQONNXtoFINN(Transformation):
+    """Converts QONNX dialect to FINN ONNX dialect.
+    First the weights are converted using the FoldQuantWeights transformation,
+    then the ConvertQuantActToMultiThreshold transformation is used to convert
+    the activations.
+    If incompatibilities are found a ValueError or RuntimeError is raised.
+
+    The optional keyword argument `filter_function`
+    presents a way to control which Quant and BipolarQuant nodes in the activation path
+    are converted to MultiThreshold nodes. A warning will be emitted when a Quant node
+    is not converted to a MultiThreshold node.
+
+    :param filter_function: Each candidate Quant and BinaryQant node is first evaluated
+    by this function. If the function returns False,
+    then the node is not converted to a MultiTrheshold node.
+    The function is given the model and candidate node as parameters.
+    Per default a filter function is inserted, which disables the conversion of
+    Quant nodes, which have a bit width of larger than 8.
+    Defaults to: default_filter_function_generator(max_multithreshold_bit_width=8)
+    """
+
+    def __init__(
+        self,
+        filter_function=default_filter_function_generator(
+            max_multithreshold_bit_width=8
+        ),
+    ):
+        super().__init__()
+        self._filter_function = filter_function
+
+    def apply(self, model):
+        # Extract the bias from Conv node
+        model = model.transform(ExtractBiasFromConv())
+        # Gemm operations are not supported by FINN, so we convert them to MatMul
+        model = model.transform(GemmToMatMul())
+        model = model.transform(FoldTransposeIntoQuantInit())
+        # Make sure the datatypes exist, these are required for folding the weights
+        model = model.transform(InferDataTypes())
+        # Fold weights
+        model = model.transform(FoldQuantWeights())
+        # Convert activations
+        model = model.transform(
+            ConvertQuantActToMultiThreshold(
+                filter_function=self._filter_function,
+            )
+        )
+        # Recompute datatypes
+        model = model.transform(InferDataTypes())
+        # Convert AvgPool -> Mul -> Trunc structure to QuantAvgPool2d
+        model = model.transform(AvgPoolAndTruncToQuantAvgPool())
+        # Remove empty padding if it exists
+        model = model.transform(RemoveIdentityOps())
+
+        return model, False
diff --git a/src/finn/transformation/qonnx/fold_quant_weights.py b/src/finn/transformation/qonnx/fold_quant_weights.py
new file mode 100644
index 0000000000000000000000000000000000000000..12c854d3bab2b762abc3649e15beff29ff8de3ac
--- /dev/null
+++ b/src/finn/transformation/qonnx/fold_quant_weights.py
@@ -0,0 +1,205 @@
+# Copyright (c) 2021, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import numpy as np
+from onnx import TensorProto, helper
+from qonnx.transformation.quant_constant_folding import FoldTransposeIntoQuantInit
+
+import finn.core.onnx_exec as oxe
+from finn.custom_op.registry import getCustomOp
+from finn.transformation.base import Transformation
+from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.remove import remove_node_and_rewire
+
+
+class FoldQuantWeights(Transformation):
+    """Merges Quant nodes, which are used as weights into the initializer
+    of the weight tensor.
+    """
+
+    def apply(self, model):
+        graph = model.graph
+        node_ind = 0
+        graph_modified = False
+        execution_context = model.make_empty_exec_context()
+        for n in graph.node:
+            node_ind += 1
+            if n.op_type == "Quant" or n.op_type == "BipolarQuant":
+                node_inp_inits = list(map(lambda x: model.get_initializer(x), n.input))
+                node_inp_dyn = list(filter(lambda x: x is None, node_inp_inits))
+                node_out = n.output[0]
+                is_all_constant_inputs = len(node_inp_dyn) == 0
+                ishape = model.get_tensor_shape(n.input[0])
+                is_const_shape = (n.op_type == "Shape") and (ishape is not None)
+                if is_all_constant_inputs or is_const_shape:
+                    # Check node validity
+                    if (
+                        n.op_type == "Quant"
+                        and not model.get_initializer(n.input[2]) == 0
+                    ):
+                        raise ValueError(
+                            "Only Quant nodes with zero-point == 0 "
+                            "are currently supported."
+                        )
+                    if model.is_fork_node(n):
+                        raise ValueError(
+                            "Weights quantized with the Quant node are not "
+                            "allowed to be fork nodes node."
+                        )
+                    target_node = model.find_direct_successors(n)
+                    if target_node is None:
+                        raise RuntimeError(
+                            "Weights quantized with the Quant node must have "
+                            "a successor node."
+                        )
+                    else:
+                        target_node = target_node[0]
+                    # If there is a DebugMarker in the weight path,
+                    # then the DebugMarker needs to be removed before any further
+                    # action is taken. Because this node interferes
+                    # with how the constant folding tries to determine how to
+                    # apply scale factors and in any case the DebugMarker would not
+                    # return useful information after folding.
+                    if target_node.op_type == "DebugMarker":
+                        remove_node_and_rewire(model, target_node)
+                        model = model.transform(FoldTransposeIntoQuantInit())
+                        return model, True
+
+                    # Continue with constant folding the quant node
+                    scale = model.get_initializer(n.input[1])
+                    unity_scale = (scale.flatten() == 1.0).all()
+                    # this node has no dynamic inputs, only constant ones -- so we can
+                    # do constant folding.
+                    oxe.execute_node(n, execution_context, graph)
+                    q_node_output = execution_context[node_out]
+                    # Check we can directly constant fold
+                    if unity_scale:
+                        # use the execution result as an initializer
+                        model.set_initializer(node_out, q_node_output)
+                    else:
+                        # Check next operator type
+                        mul_like_nodes = ["Mul", "Div", "Conv", "MatMul"]
+                        add_like_nodes = ["Add", "Sub"]
+                        all_supported_ops = mul_like_nodes.copy()
+                        all_supported_ops.extend(add_like_nodes)
+
+                        if target_node.op_type not in all_supported_ops:
+                            raise ValueError(
+                                f"Can't constant fold Quant weight node "
+                                f"into node type {target_node.op_type} "
+                                f"at node: {target_node}."
+                            )
+
+                        # For both mul and Add:
+                        # Move the scale factor behind the next operator
+                        scale = model.get_initializer(n.input[1])
+                        new_initializer = q_node_output / scale
+                        # Round, to correct for floating point errors
+                        new_initializer = np.round(new_initializer)
+                        model.set_initializer(node_out, new_initializer)
+                        q_inst = getCustomOp(n)
+                        new_dtype = q_inst.get_integer_datatype(model)
+                        model.set_tensor_datatype(node_out, new_dtype)
+
+                        # Reshape scale for Conv if required
+                        if target_node.op_type == "Conv" and len(scale.shape) > 0:
+                            bias_shape = [1] * len(scale.shape)
+                            bias_shape[1] = -1
+                            scale = scale.reshape(bias_shape)
+
+                        if scale.shape == (1,):
+                            scale = scale[0]
+                            mul_shape = tuple()
+                        else:
+                            mul_shape = scale.shape
+                        mul_tensor = helper.make_tensor_value_info(
+                            model.make_new_valueinfo_name(),
+                            TensorProto.FLOAT,
+                            mul_shape,
+                        )
+                        graph.value_info.append(mul_tensor)
+                        model.set_initializer(mul_tensor.name, scale)
+
+                        successor = model.find_consumers(node_out)
+                        if successor is None:
+                            raise RuntimeError(
+                                "Can only constant fold scaled Quant weights "
+                                "if a successor exists."
+                            )
+                        successor = successor[0]
+                        succ_output_name = successor.output[0]
+
+                        output_shape = model.get_tensor_shape(successor.output[0])
+                        act_mul_tensor = helper.make_tensor_value_info(
+                            model.make_new_valueinfo_name(),
+                            TensorProto.FLOAT,
+                            output_shape,
+                        )
+                        graph.value_info.append(act_mul_tensor)
+                        successor.output[0] = act_mul_tensor.name
+
+                        mul_node = helper.make_node(
+                            "Mul",
+                            [act_mul_tensor.name, mul_tensor.name],
+                            [succ_output_name],
+                        )
+                        graph.node.insert(node_ind, mul_node)
+
+                        if target_node.op_type in add_like_nodes:
+                            # Move the scale factor behind also in-front of
+                            # the next operator
+                            div_tensor = helper.make_tensor_value_info(
+                                model.make_new_valueinfo_name(),
+                                TensorProto.FLOAT,
+                                mul_shape,
+                            )
+                            graph.value_info.append(div_tensor)
+                            model.set_initializer(div_tensor.name, scale)
+
+                            succ_input_name = successor.input[0]
+                            act_mul_tensor = helper.make_tensor_value_info(
+                                model.make_new_valueinfo_name(),
+                                TensorProto.FLOAT,
+                                output_shape,
+                            )
+                            graph.value_info.append(act_mul_tensor)
+                            successor.input[0] = act_mul_tensor.name
+
+                            div_node = helper.make_node(
+                                "Div",
+                                [succ_input_name, div_tensor.name],
+                                [act_mul_tensor.name],
+                            )
+                            graph.node.insert(node_ind, div_node)
+
+                    # remove old node
+                    graph.node.remove(n)
+                    graph_modified = True
+                    model = model.transform(InferShapes())
+                    return (model, graph_modified)
+        return (model, graph_modified)
diff --git a/src/finn/transformation/qonnx/infer_quant_avg_pool_2d.py b/src/finn/transformation/qonnx/infer_quant_avg_pool_2d.py
new file mode 100644
index 0000000000000000000000000000000000000000..faad31fa06e76b245f25b6f0aa583fec5c0da29a
--- /dev/null
+++ b/src/finn/transformation/qonnx/infer_quant_avg_pool_2d.py
@@ -0,0 +1,315 @@
+# Copyright (c) 2021, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+import math
+from onnx import TensorProto, helper
+
+from finn.core.datatype import DataType
+from finn.custom_op.registry import getCustomOp
+from finn.transformation.base import Transformation
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
+from finn.util.basic import get_by_name
+
+
+def _get_signed_from_upstream(model, trunc_node):
+    """
+    Find out what the sign of the input to the trunc node is,
+    by looking at the upstream nodes.
+    """
+    node = trunc_node
+    # Check if the input of this node already has a FINN datatype
+    signed = None
+    inp_dt = model.get_tensor_datatype(node.input[0])
+    if inp_dt is not None and inp_dt is not DataType["FLOAT32"]:
+        signed = inp_dt.signed()
+    # Go further up the graph, since the datatype inference works top down
+    # these nodes should either be sign preserving ops or they already have a
+    # datatype defined for the output tensor.
+    curr_node = node
+    if signed is None:
+        while curr_node is not None:
+            if model.is_join_node(curr_node):
+                raise RuntimeError(
+                    "Datatype Inference for the Trunc node only supports "
+                    "linear nodes in the upstream path."
+                )
+            next_node = model.find_direct_predecessors(curr_node)
+            if next_node is None:
+                raise RuntimeError(
+                    "Could not infere the Datatype for the Trunc node due to "
+                    "missing upstream ndoes."
+                )
+            next_node = next_node[0]
+            out_dt = model.get_tensor_datatype(next_node.output[0])
+            if out_dt is not None and out_dt is not DataType["FLOAT32"]:
+                signed = out_dt.signed()
+                break
+            # Special cases where the node has an internal or intrinsic datatype.
+            if next_node.op_type == "MultiThreshold":
+                mt_inst = getCustomOp(next_node)
+                out_dt = DataType[mt_inst.get_nodeattr("out_dtype")]
+                if out_dt is not None and out_dt is not DataType["FLOAT32"]:
+                    signed = out_dt.signed()
+                    break
+            if next_node.op_type == "BipolarQuant":
+                signed = True
+                break
+            if next_node.op_type == "Quant":
+                q_inst = getCustomOp(next_node)
+                out_dt = q_inst.get_integer_datatype(model)
+                if out_dt is not None and out_dt is not DataType["FLOAT32"]:
+                    signed = out_dt.signed()
+                    break
+
+            # Check if we are allowed to move on to the next op
+            sign_preserving_ops = ["Add", "Mul", "AveragePool", "Pad"]
+            if next_node.op_type not in sign_preserving_ops:
+                raise RuntimeError(
+                    f"Could not infere the Datatype for the Trunc node, "
+                    f"because the sign of the input datatype could not be infered "
+                    f"from upstream nodes. And traversal further up the graph was "
+                    f"disallowed, since the next node type {next_node.op_type} "
+                    f"is not in the list of "
+                    f"sign preserving ops {sign_preserving_ops}."
+                )
+            curr_node = next_node
+
+    if signed is None:
+        raise RuntimeError(
+            "Could not infere the Datatype for the Trunc node, "
+            "because the sign of the input datatype could not be infered "
+            "from upstream nodes."
+        )
+
+    return signed
+
+
+class AvgPoolAndTruncToQuantAvgPool(Transformation):
+    """
+    Convert a section of nodes of the pattern:
+    AveragePool -> Mul (scalar) -> Trunc
+    To the FINN op: QuantAvgPool2d
+    """
+
+    def apply(self, model):
+        graph = model.graph
+        node_ind = 0
+        for n in graph.node:
+            node_ind += 1
+            if n.op_type == "AveragePool":
+                mul_node = model.find_direct_successors(n)
+                if (
+                    mul_node is not None
+                    and len(mul_node) == 1
+                    and mul_node[0].op_type == "Mul"
+                ):
+                    mul_node = mul_node[0]
+                    t_node = model.find_direct_successors(mul_node)
+                    if (
+                        t_node is not None
+                        and len(t_node) == 1
+                        and t_node[0].op_type == "Trunc"
+                    ):
+                        t_node = t_node[0]
+                        running_node_index = node_ind
+                        # Check node for compatibility
+                        # Avg pooling node
+                        k_s = get_by_name(n.attribute, "kernel_shape")
+                        if k_s is None or len(k_s.ints) != 2 or len(set(k_s.ints)) != 1:
+                            raise ValueError(
+                                "FINN only supports average pooling with "
+                                "2D square kernels."
+                            )
+                        k_s = k_s.ints[0]
+
+                        pads = get_by_name(n.attribute, "pads")
+                        if (
+                            pads is None
+                            or len(set(pads.ints)) != 1
+                            or pads.ints[0] != 0
+                        ):
+                            raise ValueError(
+                                "FINN dosn't support padding for average pooling."
+                            )
+
+                        stride = get_by_name(n.attribute, "strides")
+                        if (
+                            stride is None
+                            or len(stride.ints) != 2
+                            or len(set(stride.ints)) != 1
+                        ):
+                            raise ValueError(
+                                "FINN only supports 2D strides with equal values in "
+                                "each direction."
+                            )
+                        stride = stride.ints[0]
+
+                        # Mul node
+                        mul_val = model.get_initializer(mul_node.input[1])
+                        if (
+                            mul_val is None
+                            or len(mul_val.shape) != 0
+                            or mul_val != k_s * k_s
+                        ):
+                            raise ValueError(
+                                f"The Mul node after the AveragePool node must have "
+                                f"static initialization at the second input, "
+                                f"further the initialization must be of zero dimension "
+                                f"and the value of the initialization must be "
+                                f"the quadratic value of the kernel size, "
+                                f"in this case {k_s * k_s}."
+                            )
+
+                        # Trunc node
+                        rounding_mode = get_by_name(t_node.attribute, "rounding_mode")
+                        if rounding_mode is None or rounding_mode.s != b"FLOOR":
+                            raise ValueError(
+                                "The Trunc node must have the rounding_mode "
+                                "set to 'FLOOR'."
+                            )
+                        for inp in t_node.input[1:]:
+                            if model.get_initializer(inp) is None:
+                                raise ValueError(
+                                    f"All inputs of the Trunc node, "
+                                    f"except the first, must be statically "
+                                    f"initialized. However, {inp} is not."
+                                )
+                        zero_pt = model.get_initializer(t_node.input[2])
+                        if len(zero_pt.shape) != 0 or zero_pt != 0:
+                            raise ValueError(
+                                f"Finn only supports 0 as the zero point for "
+                                f"the Trunc node, it currently is {zero_pt}."
+                            )
+                        trunc_in_bits = model.get_initializer(t_node.input[3]).flatten()
+                        trunc_out_bits = model.get_initializer(
+                            t_node.input[4]
+                        ).flatten()
+                        if (
+                            len(trunc_in_bits.shape) != 1
+                            or len(trunc_out_bits.shape) != 1
+                        ):
+                            raise ValueError(
+                                f"Finn only supports scalar bit widths "
+                                f"for the Trunc node. The input bit width "
+                                f"currently is: {trunc_in_bits}, "
+                                f"while the output bit width is: {trunc_out_bits}."
+                            )
+                        trunc_in_bits = int(trunc_in_bits[0])
+                        trunc_out_bits = int(trunc_out_bits[0])
+
+                        # Calculate parameters for the QuantAvgPool2d node,
+                        # Calculate input bit width. Basically this backwards:
+                        # https://github.com/Xilinx/finn-base/blob/
+                        # 7c2603a95e90e4de2575020e575c24eab6a15889/src/finn/custom_op/
+                        # general/quantavgpool2d.py#L94
+                        ibits = math.floor(
+                            math.log(2 ** trunc_in_bits / (k_s * k_s), 2)
+                        )
+                        # Get sign
+                        signed = _get_signed_from_upstream(model, t_node)
+                        # ToDo: Change this to NHWC,
+                        #  when the channels last layout comes around.
+                        data_layout = "NCHW"
+
+                        # Insert scale nodes, QuantAvgPool2d node and required tensors
+                        scale = model.get_initializer(t_node.input[1])
+                        scale_div_tensor = helper.make_tensor_value_info(
+                            model.make_new_valueinfo_name(),
+                            TensorProto.FLOAT,
+                            None,
+                        )
+                        graph.value_info.append(scale_div_tensor)
+                        model.set_initializer(scale_div_tensor.name, scale)
+
+                        act_scale_div_tensor = helper.make_tensor_value_info(
+                            model.make_new_valueinfo_name(),
+                            TensorProto.FLOAT,
+                            None,
+                        )
+                        graph.value_info.append(act_scale_div_tensor)
+
+                        scale_div_node = helper.make_node(
+                            "Div",
+                            [n.input[0], scale_div_tensor.name],
+                            [act_scale_div_tensor.name],
+                        )
+                        graph.node.insert(running_node_index, scale_div_node)
+                        running_node_index += 1
+
+                        act_scale_mul_tensor = helper.make_tensor_value_info(
+                            model.make_new_valueinfo_name(),
+                            TensorProto.FLOAT,
+                            None,
+                        )
+                        graph.value_info.append(act_scale_mul_tensor)
+
+                        QuantAvgPool2d_node = helper.make_node(
+                            "QuantAvgPool2d",
+                            [act_scale_div_tensor.name],
+                            [act_scale_mul_tensor.name],
+                            domain="finn.custom_op.general",
+                            stride=stride,
+                            kernel=k_s,
+                            ibits=ibits,
+                            obits=trunc_out_bits,
+                            signed=int(signed),
+                            data_layout=data_layout,
+                        )
+                        graph.node.insert(running_node_index, QuantAvgPool2d_node)
+                        running_node_index += 1
+
+                        scale_mul_tensor = helper.make_tensor_value_info(
+                            model.make_new_valueinfo_name(),
+                            TensorProto.FLOAT,
+                            None,
+                        )
+                        graph.value_info.append(scale_mul_tensor)
+                        model.set_initializer(scale_mul_tensor.name, scale)
+
+                        scale_mul_node = helper.make_node(
+                            "Mul",
+                            [act_scale_mul_tensor.name, scale_mul_tensor.name],
+                            [t_node.output[0]],
+                        )
+                        graph.node.insert(running_node_index, scale_mul_node)
+                        running_node_index += 1
+
+                        # Remove old nodes
+                        graph.node.remove(n)
+                        graph.node.remove(mul_node)
+                        graph.node.remove(t_node)
+
+                        # Recompute shapes and datatypes
+                        model = model.transform(InferShapes())
+                        model = model.transform(InferDataTypes())
+
+                        return model, True
+
+        return model, False
diff --git a/src/finn/transformation/qonnx/qonnx_activation_handlers.py b/src/finn/transformation/qonnx/qonnx_activation_handlers.py
new file mode 100644
index 0000000000000000000000000000000000000000..3336b1eee7fa9d54092cd56b9ba0edaf9d0884b1
--- /dev/null
+++ b/src/finn/transformation/qonnx/qonnx_activation_handlers.py
@@ -0,0 +1,524 @@
+# Copyright (c) 2021, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import numpy as np
+from abc import ABC, abstractmethod
+from onnx import TensorProto, helper
+
+from finn.core.modelwrapper import ModelWrapper
+from finn.custom_op.registry import getCustomOp
+
+
+class QuantActBaseHandler(ABC):
+    """Base class for converting quantized activation expressed in the QONNX dialect
+    to the FINN ONNX dialect.
+    :param model: The model on which this handler should operate.
+    :type model: class: `finn.core.modelwrapper.ModelWrapper`
+    :param quant_node: The Quant node which a given handler should replace.
+    :param quant_node_index: The index of the Quant node in the given model.
+    :type quant_node_index: `int`
+    """
+
+    def __init__(self, model: ModelWrapper, quant_node, quant_node_index: int):
+        """Base class constructor"""
+        super().__init__()
+        self._model = model
+        self._q_node = quant_node
+        self._q_index = quant_node_index
+
+    @property
+    @classmethod
+    @abstractmethod
+    def valid_predecessor_op_types(self):
+        """Defines which op types the preceding node is allowed to have for
+        this type of activation.
+        """
+        raise NotImplementedError()
+
+    @abstractmethod
+    def _check_compatibility(self):
+        """Check for compatibility with FINN.
+        There are many more possible combinations of QONNX settings,
+        than what is supported by FINN.
+        """
+        raise NotImplementedError()
+
+    @abstractmethod
+    def _calculate_act_bias(self):
+        """Calculate the activation bias,
+        which is introduced as an Add node behind the MultiTrheshold node.
+        """
+        raise NotImplementedError()
+
+    @abstractmethod
+    def _calculate_thresholds(self):
+        """Calculate the threshold array for the MultiThreshold node."""
+        raise NotImplementedError()
+
+    @abstractmethod
+    def _calculate_act_scale(self):
+        """Calculate the activation scale,
+        which is indroduced as a Mul node behind the Add node
+        for the activation bias.
+        """
+        raise NotImplementedError()
+
+    @abstractmethod
+    def _remove_activation_node(self, multi_threshold_node):
+        """Remove the activation node in front of the Quant node."""
+        raise NotImplementedError()
+
+    def _extract_output_datatype(self):
+        """Get the output datatype for the MultiThreshold node."""
+        q_inst = getCustomOp(self._q_node)
+        dtype = q_inst.get_integer_datatype(self._model)
+        dtype = dtype.name
+        return dtype
+
+    def calculate_node_parameters(self):
+        """Calculate all parameters required for replacing the QONNX style activation
+        with a FINN style one.
+        """
+        return {
+            "out_dtype": self._extract_output_datatype(),
+            "thresholds": self._calculate_thresholds(),
+            "adder_bias": self._calculate_act_bias(),
+            "mul_scale": self._calculate_act_scale(),
+        }
+
+    def replace_quant_node(self):
+        """Replace the given QONNX style activation with a FINN style one."""
+
+        # Check that we actually support what the user is trying to do
+        self._check_compatibility()
+
+        # Shorten instance variables
+        model = self._model
+        graph = model.graph
+        n = self._q_node
+        running_node_index = self._q_index
+
+        # Calculate insertion parameters
+        parameter_dict = self.calculate_node_parameters()
+        thresholds = parameter_dict["thresholds"]
+        out_dtype = parameter_dict["out_dtype"]
+        adder_bias = parameter_dict["adder_bias"]
+        mul_scale = parameter_dict["mul_scale"]
+
+        # Modify graph
+        # Insert threshold tensor
+        thresh_tensor = helper.make_tensor_value_info(
+            model.make_new_valueinfo_name(),
+            TensorProto.FLOAT,
+            thresholds.shape,
+        )
+        graph.value_info.append(thresh_tensor)
+        model.set_initializer(thresh_tensor.name, thresholds)
+
+        # Insert MultiThreshold node
+        outp_trans_node = helper.make_node(
+            "MultiThreshold",
+            [n.input[0], thresh_tensor.name],
+            [n.output[0]],
+            out_dtype="FLOAT32",
+            domain="finn.custom_op.general",
+        )
+        graph.node.insert(running_node_index, outp_trans_node)
+        running_node_index += 1
+
+        # Get the MultiThreshold node instance to work with
+        mt_node = graph.node[running_node_index - 1]
+        mt_inst = getCustomOp(mt_node)
+
+        # Set scale and bias
+        # If these values are scalar then they can be set as attributes
+        # of the MultiThreshold node, if not they get inserted as adder and mul nodes
+        # behind the MultiTrheshold nodes.
+        bias_scalar = adder_bias.shape == (1,) or len(adder_bias.shape) == 0
+        scale_scalar = mul_scale.shape == (1,) or len(mul_scale.shape) == 0
+        if scale_scalar and bias_scalar and self._q_node.op_type == "BipolarQuant":
+            # Get Quant parameters
+            mul_scale = np.atleast_1d(mul_scale)
+            # ONNX only accepts 64bit floats as attributes
+            mul_scale = mul_scale.astype(dtype=np.float64)
+            adder_bias = np.atleast_1d(adder_bias)
+            adder_bias = adder_bias.astype(dtype=np.float64)
+
+            # Set Bias and scale
+            mt_inst.set_nodeattr("out_scale", mul_scale[0])
+            # FINN applies scale first then bias,
+            # which is the other way around in Brevitas,
+            # we thus need to adjust the bias in the MultiThreshold node
+            finn_bias = adder_bias[0] * mul_scale[0]
+            mt_inst.set_nodeattr("out_bias", finn_bias)
+
+            # Set the output data type
+            mt_inst.set_nodeattr("out_dtype", out_dtype)
+        else:
+            # Set datatype
+            mt_inst.set_nodeattr("out_dtype", out_dtype)
+
+            # Insertion parameters
+            up_stream_node = mt_node
+
+            # Set bias
+            zero_bias = False
+            if bias_scalar:
+                adder_bias = np.atleast_1d(adder_bias)
+                # ONNX only accepts 64bit floats as attributes
+                adder_bias = adder_bias.astype(dtype=np.float64)[0]
+                add_shape = tuple()
+                if adder_bias == 0.0:
+                    zero_bias = True
+            else:
+                add_shape = adder_bias.shape
+
+            if not zero_bias:
+                # Insert Add node
+                add_tensor = helper.make_tensor_value_info(
+                    model.make_new_valueinfo_name(),
+                    TensorProto.FLOAT,
+                    add_shape,
+                )
+                graph.value_info.append(add_tensor)
+                model.set_initializer(add_tensor.name, adder_bias)
+
+                output_shape = model.get_tensor_shape(n.output[0])
+                act_add_tensor = helper.make_tensor_value_info(
+                    model.make_new_valueinfo_name(),
+                    TensorProto.FLOAT,
+                    output_shape,
+                )
+                graph.value_info.append(act_add_tensor)
+
+                add_node = helper.make_node(
+                    "Add",
+                    [act_add_tensor.name, add_tensor.name],
+                    [n.output[0]],
+                )
+                graph.node.insert(running_node_index, add_node)
+                running_node_index += 1
+                add_node = graph.node[running_node_index - 1]
+
+                # Re-point the upstream node
+                up_stream_node.output[0] = act_add_tensor.name
+                up_stream_node = add_node
+
+            # Set scale
+            # Insert Mul node
+            unity_scale = False
+            if scale_scalar:
+                mul_scale = np.atleast_1d(mul_scale)
+                mul_scale = mul_scale.astype(dtype=np.float64)[0]
+                mul_shape = tuple()
+                if mul_scale == 1.0:
+                    unity_scale = True
+            else:
+                mul_shape = mul_scale.shape
+
+            if not unity_scale:
+                mul_tensor = helper.make_tensor_value_info(
+                    model.make_new_valueinfo_name(),
+                    TensorProto.FLOAT,
+                    mul_shape,
+                )
+                graph.value_info.append(mul_tensor)
+                model.set_initializer(mul_tensor.name, mul_scale)
+
+                output_shape = model.get_tensor_shape(n.output[0])
+                act_mul_tensor = helper.make_tensor_value_info(
+                    model.make_new_valueinfo_name(),
+                    TensorProto.FLOAT,
+                    output_shape,
+                )
+                graph.value_info.append(act_mul_tensor)
+
+                mul_node = helper.make_node(
+                    "Mul",
+                    [act_mul_tensor.name, mul_tensor.name],
+                    [n.output[0]],
+                )
+                graph.node.insert(running_node_index, mul_node)
+                running_node_index += 1
+                mul_node = graph.node[running_node_index - 1]
+
+                # Re-point the upstream node
+                up_stream_node.output[0] = act_mul_tensor.name
+                up_stream_node = mul_node
+
+        # Remove activation node
+        self._remove_activation_node(mt_node)
+
+        # Remove the Quant node
+        graph.node.remove(n)
+
+        # return the internal model representation
+        return self._model
+
+
+class QuantReluHandler(QuantActBaseHandler):
+    """Class for converting a quantized relu operation expressed in the QONNX
+    dialect to the FINN ONNX dialect."""
+
+    valid_predecessor_op_types = [
+        "Relu",
+    ]
+
+    def _check_compatibility(self):
+        if self._q_node.op_type == "Quant":
+            q_inst = getCustomOp(self._q_node)
+            narrow = q_inst.get_nodeattr("narrow")
+            signed = q_inst.get_nodeattr("signed")
+            if signed or narrow:
+                raise ValueError(
+                    "FINN only supports unsigned and non-narrow Quant nodes "
+                    "for Relu activations."
+                )
+            if not self._model.get_initializer(self._q_node.input[2]) == 0:
+                raise ValueError(
+                    "Only Quant nodes with zero-point == 0 "
+                    "are currently supported for ReLu activations."
+                )
+        elif self._q_node.op_type == "BipolarQuant":
+            return
+        else:
+            raise RuntimeError("Got an unexpected quantizer node type")
+
+    def _calculate_act_bias(self):
+        # No bias allowed for Relu activations, see: https://github.com/Xilinx/
+        # brevitas/blob/a5bfd6dc5e030f0047ac1ee47932b60e8e873e17/src/brevitas/
+        # export/onnx/finn/handler/act.py#L48
+        bias = np.array([0.0])
+        return bias
+
+    def _calculate_thresholds(self):
+        # Gather parameters
+        if self._q_node.op_type == "Quant":
+            bit_width = self._model.get_initializer(self._q_node.input[3])
+        elif self._q_node.op_type == "BipolarQuant":
+            bit_width = 1.0
+        else:
+            raise RuntimeError("Got an unexpected quantizer node type")
+        quant_scale = self._model.get_initializer(self._q_node.input[1]).astype(
+            np.float32
+        )
+        # q_inst = getCustomOp(self._q_node)
+        # narrow = q_inst.get_nodeattr("narrow")
+
+        # Calculate thersholds, see: https://github.com/Xilinx/brevitas/blob/
+        # a5bfd6dc5e030f0047ac1ee47932b60e8e873e17/src/brevitas/export/
+        # onnx/finn/handler/act.py#L21
+        num_distinct_values = 2 ** bit_width
+        num_thresholds = int(num_distinct_values - 1)
+        flat_scale = quant_scale.flatten().astype(np.float32)
+        num_scale_channels = flat_scale.shape[0]
+        step = np.abs(flat_scale).astype(np.float32)
+        min_threshold = step / 2
+        thresholds = np.empty((num_scale_channels, num_thresholds)).astype(np.float32)
+        for c in range(num_scale_channels):
+            for t in range(num_thresholds):
+                thresholds[c][t] = min_threshold[c] + step[c] * t
+
+        # ToDo: The index 1 needs to be changed to -1 for the channels last format
+        num_output_channels = self._model.get_tensor_shape(self._q_node.output[0])[1]
+        final_shape = (num_output_channels, num_thresholds)
+        if thresholds.shape != final_shape:
+            thresholds = np.broadcast_to(thresholds, final_shape)
+
+        return thresholds
+
+    def _calculate_act_scale(self):
+        # Gather parameters
+        quant_scale = self._model.get_initializer(self._q_node.input[1])
+        # Calculate scale, see: https://github.com/Xilinx/brevitas/blob/
+        # a5bfd6dc5e030f0047ac1ee47932b60e8e873e17/src/brevitas/export/
+        # onnx/finn/handler/act.py#L40
+        scale = quant_scale
+        return scale
+
+    def _remove_activation_node(self, multi_threshold_node):
+        # Find the activation node
+        act_node = self._model.find_direct_predecessors(self._q_node)
+        if act_node is None:
+            raise RuntimeError(
+                "For handling of Relu activations a predecesor to "
+                "the Quant node must exist."
+            )
+        act_node = act_node[0]
+        if not act_node.op_type == "Relu":
+            raise RuntimeError(
+                "The predecesor of the Quant node must be Relu for handling "
+                "of Relu activations."
+            )
+
+        # Reroute upstream tensor
+        multi_threshold_node.input[0] = act_node.input[0]
+
+        # Remove the activation node
+        self._model.graph.node.remove(act_node)
+
+
+class QuantIdentityHandler(QuantActBaseHandler):
+    """Class for converting a quantized identity operation expressed in the QONNX
+    dialect to the FINN ONNX dialect.
+    This handler also takes care of quantized HardTanh activations, because
+    these are equivalent to quantized identity activations.
+    """
+
+    valid_predecessor_op_types = [
+        "BatchNormalization",
+        "Sub",
+        "Add",
+        "Mul",
+        "Div",
+        "DebugMarker",
+        None,
+    ]
+
+    def _check_compatibility(self):
+        # Gather parameters to check
+        if self._q_node.op_type == "Quant":
+            q_inst = getCustomOp(self._q_node)
+            signed = q_inst.get_nodeattr("signed")
+            if not signed:
+                raise ValueError(
+                    "FINN only supports signed Quant nodes for identity activations."
+                )
+            if not self._model.get_initializer(self._q_node.input[2]) == 0:
+                raise ValueError(
+                    "Only Quant nodes with zero-point == 0 "
+                    "are currently supported for identity activations."
+                )
+        elif self._q_node.op_type == "BipolarQuant":
+            quant_scale = self._model.get_initializer(self._q_node.input[1])
+            if (quant_scale.flatten().shape[0] != 1) or quant_scale.flatten()[0] != 1.0:
+                raise ValueError(
+                    "FINN only supports Bipolar identity activations "
+                    "with out per channel scaling and the scaling must be 1. "
+                )
+        else:
+            raise RuntimeError("Got an unexpected quantizer node type")
+
+    def _calculate_act_bias(self):
+        # Gather parameters
+        q_inst = getCustomOp(self._q_node)
+        if self._q_node.op_type == "Quant":
+            bit_width = self._model.get_initializer(self._q_node.input[3])
+            narrow = q_inst.get_nodeattr("narrow")
+        elif self._q_node.op_type == "BipolarQuant":
+            bit_width = 1.0
+        else:
+            raise RuntimeError("Got an unexpected quantizer node type")
+        # Calculate bias, see: https://github.com/Xilinx/brevitas/blob/
+        # a5bfd6dc5e030f0047ac1ee47932b60e8e873e17/src/brevitas/export/
+        # onnx/finn/handler/act.py#L64
+        if bit_width == 1.0:
+            bias = np.array([-0.5])
+        else:
+            if narrow:
+                min_non_scaled_val = -(2 ** (bit_width - 1) - 1)
+            else:
+                min_non_scaled_val = -(2 ** (bit_width - 1))
+            bias = np.array([min_non_scaled_val])
+        return bias
+
+    def _calculate_thresholds(self):
+        # Gather parameters
+        quant_scale = self._model.get_initializer(self._q_node.input[1])
+        q_inst = getCustomOp(self._q_node)
+        if self._q_node.op_type == "Quant":
+            bit_width = self._model.get_initializer(self._q_node.input[3])
+            narrow = q_inst.get_nodeattr("narrow")
+        elif self._q_node.op_type == "BipolarQuant":
+            bit_width = 1.0
+        else:
+            raise RuntimeError("Got an unexpected quantizer node type")
+
+        # Calculate thersholds, see: https://github.com/Xilinx/brevitas/
+        # blob/a5bfd6dc5e030f0047ac1ee47932b60e8e873e17/src/brevitas/
+        # export/onnx/finn/handler/act.py#L76
+        if bit_width == 1.0:
+            thresholds = np.empty([1, 1])
+            thresholds[0] = 0
+            return thresholds
+        else:
+            if narrow:
+                num_distinct_values = 2 ** bit_width - 1
+            else:
+                num_distinct_values = 2 ** bit_width
+
+            num_thresholds = int(num_distinct_values - 1)
+            flat_scale = quant_scale.flatten()
+            num_scale_channels = flat_scale.shape[0]
+            step = np.abs(flat_scale)
+            half_step = step / 2.0
+            thresholds = np.empty((num_scale_channels, num_thresholds))
+            # compute the value of the smallest threshold, we'll neg-bias all
+            # generated thresholds by this much
+            min_threshold = -half_step - step * ((num_thresholds // 2) - 1)
+            if not narrow:
+                min_threshold -= step
+            for c in range(num_scale_channels):
+                for t in range(num_thresholds):
+                    thresholds[c][t] = min_threshold[c] + step[c] * t
+
+            # ToDo: The index 1 needs to be changed to -1 for the channels last format
+            num_output_channels = self._model.get_tensor_shape(self._q_node.output[0])[
+                1
+            ]
+            final_shape = (num_output_channels, num_thresholds)
+            if thresholds.shape != final_shape:
+                thresholds = np.broadcast_to(thresholds, final_shape)
+
+            return thresholds
+
+    def _calculate_act_scale(self):
+        # Gather parameters
+        if self._q_node.op_type == "Quant":
+            bit_width = self._model.get_initializer(self._q_node.input[3])
+        elif self._q_node.op_type == "BipolarQuant":
+            bit_width = 1.0
+        else:
+            raise RuntimeError("Got an unexpected quantizer node type")
+        quant_scale = self._model.get_initializer(self._q_node.input[1])
+        # Calculate scale, see: https://github.com/Xilinx/brevitas/
+        # blob/a5bfd6dc5e030f0047ac1ee47932b60e8e873e17/src/brevitas/
+        # export/onnx/finn/handler/act.py#L111
+        if bit_width != 1:
+            scale = quant_scale
+        else:
+            assert (
+                quant_scale.flatten().shape[0] == 1
+            ), "Unsupported BIPOLAR per channel scale"
+            assert quant_scale.flatten()[0] == 1.0, "Unsupported BIPOLAR scale != 1"
+            scale = quant_scale * 2
+        return scale
+
+    def _remove_activation_node(self, multi_threshold_node):
+        # The Quant identity activation has per definition no explicit activation node
+        return
diff --git a/src/finn/transformation/qonnx/quant_act_to_multithreshold.py b/src/finn/transformation/qonnx/quant_act_to_multithreshold.py
new file mode 100644
index 0000000000000000000000000000000000000000..29ba93dfcfe6d18e0ff8927b6d646cb310d0262a
--- /dev/null
+++ b/src/finn/transformation/qonnx/quant_act_to_multithreshold.py
@@ -0,0 +1,160 @@
+# Copyright (c) 2021, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+import warnings
+
+from finn.transformation.base import Transformation
+from finn.transformation.qonnx.qonnx_activation_handlers import QuantActBaseHandler
+
+
+def default_filter_function_generator(max_multithreshold_bit_width=8):
+    """
+    This function generates the default filter function for the
+    ConvertQuantActToMultiThreshold transformation. Per default the returned
+    function disables the conversion of Quant nodes which have a bit width above 8 bit.
+
+    This function generator can be used as a template to write custom
+    filter functions.
+    """
+
+    def filter_function(model, q_node):
+        if q_node.op_type == "Quant":
+            bit_width = model.get_initializer(q_node.input[3])
+        elif q_node.op_type == "BipolarQuant":
+            bit_width = 1.0
+        else:
+            raise RuntimeError("Got an unexpected quantizer node type")
+        if bit_width is None:
+            raise ValueError("Quant nodes must have a static bit width.")
+        if bit_width > max_multithreshold_bit_width:
+            warnings.warn(
+                f'The Quant node with name: "{q_node.name}" was not converted to a '
+                f"MultiThreshold node, because its bit width of {bit_width} is "
+                f"higher than the configured maximum bit width of "
+                f"{max_multithreshold_bit_width}."
+            )
+            return False
+        return True
+
+    return filter_function
+
+
+class ConvertQuantActToMultiThreshold(Transformation):
+    """
+    Converts Quant nodes in the activation path to MultiThreshold nodes.
+
+    The optional keyword argument `filter_function`
+    presents a way to control which Quant and BipolarQuant nodes in the activation path
+    are converted to MultiThreshold nodes. A warning will be emitted when a Quant node
+    is not converted to a MultiThreshold node.
+
+    :param filter_function: Each candidate Quant and BinaryQant node is first evaluated
+    by this function. If the function returns False,
+    then the node is not converted to a MultiTrheshold node.
+    The function is given the model and candidate node as parameters.
+    Per default a filter function is inserted, which disables the conversion of
+    Quant nodes, which have a bit width of larger than 8.
+    Defaults to: default_filter_function_generator(max_multithreshold_bit_width=8)
+    """
+
+    def __init__(
+        self,
+        filter_function=default_filter_function_generator(
+            max_multithreshold_bit_width=8
+        ),
+    ):
+        super().__init__()
+        self._filter_function = filter_function
+
+    def apply(self, model):
+        graph = model.graph
+        node_ind = 0
+        graph_modified = False
+
+        for n in graph.node:
+            node_ind += 1
+            if n.op_type == "Quant" or n.op_type == "BipolarQuant":
+                # Check that the node is in the activation path
+                inp = model.get_initializer(n.input[0])
+                out = model.get_initializer(n.output[0])
+                if not (inp is None and out is None):
+                    continue
+                predecessor = model.find_direct_predecessors(n)
+                if predecessor is not None:
+                    predecessor_op_type = predecessor[0].op_type
+                else:
+                    predecessor_op_type = predecessor
+                if model.is_fork_node(n):
+                    raise ValueError(
+                        "Forking Quant/BipolarQuant nodes are currently "
+                        "not supported by FINN."
+                    )
+                if n.op_type == "Quant" and not model.get_initializer(n.input[2]) == 0:
+                    raise ValueError(
+                        "Only Quant nodes with zero-point == 0 are currently supported."
+                    )
+
+                # Check that this node passes the user filter
+                if not self._filter_function(model, n):
+                    warnings.warn(
+                        f'The Quant node with name: "{n.name}" was not converted to a '
+                        f"MultiThreshold node, because the filtering function "
+                        f"returned False for this node."
+                    )
+                    continue
+
+                # Check for possible ambiguity in handler selection
+                valid_predecessors = []
+                for cls in QuantActBaseHandler.__subclasses__():
+                    valid_predecessors.extend(cls.valid_predecessor_op_types)
+                if len(valid_predecessors) != len(set(valid_predecessors)):
+                    raise RuntimeError(
+                        "Two or more activation handlers declare the same "
+                        "type of valid predecessor node. "
+                        "This leads to ambiguity in the handler selection "
+                        "and must thus be avoided."
+                    )
+
+                # Try to find a fitting handler for this Quant activation node
+                for handler_cls in QuantActBaseHandler.__subclasses__():
+                    if predecessor_op_type in handler_cls.valid_predecessor_op_types:
+                        handler = handler_cls(model, n, node_ind)
+                        break
+                else:
+                    raise ValueError(
+                        f"Quant nodes in the activation path and with predecessor "
+                        f"nodes of type {predecessor_op_type} are currently not "
+                        f"supported by FINN and can not be converted to "
+                        f"MultiThreshold nodes."
+                    )
+                model = handler.replace_quant_node()
+                graph_modified = True
+                return (model, graph_modified)
+
+        return (model, graph_modified)
diff --git a/src/finn/transformation/streamline/__init__.py b/src/finn/transformation/streamline/__init__.py
index ea547571677a9d90a226b55de8582145b8c298f4..d0ec26a4d10c688db7931e40d7cfd840394b55a1 100644
--- a/src/finn/transformation/streamline/__init__.py
+++ b/src/finn/transformation/streamline/__init__.py
@@ -39,6 +39,7 @@ from finn.transformation.general import (
     GiveUniqueNodeNames,
 )
 from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.remove import RemoveIdentityOps
 from finn.transformation.streamline.absorb import (
     Absorb1BitMulIntoConv,
     Absorb1BitMulIntoMatMul,
@@ -51,7 +52,6 @@ from finn.transformation.streamline.collapse_repeated import (
     CollapseRepeatedAdd,
     CollapseRepeatedMul,
 )
-from finn.transformation.streamline.remove import RemoveIdentityOps
 from finn.transformation.streamline.reorder import (
     MoveAddPastConv,
     MoveAddPastMul,
diff --git a/src/finn/transformation/streamline/absorb.py b/src/finn/transformation/streamline/absorb.py
index cba9648187ffadcff048b045f0c85c81c770e44d..97ae3b51a849a4174c9853cb41c0d6d72bdf8dad 100644
--- a/src/finn/transformation/streamline/absorb.py
+++ b/src/finn/transformation/streamline/absorb.py
@@ -315,7 +315,8 @@ class AbsorbTransposeIntoMultiThreshold(Transformation):
         graph = model.graph
         node_ind = 0
         graph_modified = False
-        for n in graph.node:
+        nodes = [n for n in model.graph.node]
+        for n in nodes:
             node_ind += 1
             if n.op_type == "Transpose" and not model.is_fork_node(n):
                 perms = list(get_by_name(n.attribute, "perm").ints)
@@ -326,37 +327,41 @@ class AbsorbTransposeIntoMultiThreshold(Transformation):
                         and mt_cand.op_type == "MultiThreshold"
                         # and not model.is_fork_node(mt_cand)
                     ):
-                        final_t_cands = model.find_consumers(mt_cand.output[0])
+                        mt_cand_orig_output = mt_cand.output[0]
                         mt = getCustomOp(mt_cand)
                         mt.set_nodeattr("data_layout", "NHWC")
-                        # get rid of first tranpose node
+                        # Rewire input of MultiThreshold node
                         mt_cand.input[0] = n.input[0]
+                        # Make new intermediate tensor
+                        intermediate_tensor_name = model.make_new_valueinfo_name()
+                        intermediate_tensor_shape = model.get_tensor_shape(n.input[0])
+                        intermediate_tensor_finn_dtype = model.get_tensor_datatype(
+                            mt_cand.output[0]
+                        )
+                        # Create a new ValueInfoProto and set the shape
+                        model.set_tensor_shape(
+                            intermediate_tensor_name, intermediate_tensor_shape
+                        )
+                        # Set the tensor layout
+                        model.set_tensor_layout(
+                            intermediate_tensor_name, DataLayout.NHWC
+                        )
+                        # Set the tensor FINN datatype
+                        model.set_tensor_datatype(
+                            intermediate_tensor_name, intermediate_tensor_finn_dtype
+                        )
+                        # Rewire output of MT node
+                        mt_cand.output[0] = intermediate_tensor_name
+                        # Get rid of first transpose node
                         graph.node.remove(n)
-                        # fix output shape for MultiThreshold
-                        mt_orig_oshape = model.get_tensor_shape(mt_cand.output[0])
-                        mt_ishape = model.get_tensor_shape(mt_cand.input[0])
-                        model.set_tensor_shape(mt_cand.output[0], mt_ishape)
-                        # re-insert Transpose behind MultiThreshold
-                        transpose_output = model.make_new_valueinfo_name()
+                        # Create new Transpose node
                         new_transpose = oh.make_node(
                             "Transpose",
-                            [mt_cand.output[0]],
-                            [transpose_output],
+                            [intermediate_tensor_name],
+                            [mt_cand_orig_output],
                             perm=[0, 3, 1, 2],
                         )
                         graph.node.insert(node_ind + 1, new_transpose)
-                        if final_t_cands is not None:
-                            # rewire next nodes' inputs
-                            for final_t_cand in final_t_cands:
-                                final_t_cand.input[0] = transpose_output
-                        else:
-                            # replace graph top-level output
-                            get_by_name(
-                                model.graph.output, mt_cand.output[0]
-                            ).name = transpose_output
-                            model.set_tensor_shape(mt_cand.output[0], mt_ishape)
-                        # set value_info shape for transpose output
-                        model.set_tensor_shape(transpose_output, mt_orig_oshape)
                         graph_modified = True
         if graph_modified:
             model = model.transform(InferDataTypes())
diff --git a/src/finn/transformation/streamline/remove.py b/src/finn/transformation/streamline/remove.py
deleted file mode 100644
index 27e420a7936c2d9203150d2d682bf45e1aff0638..0000000000000000000000000000000000000000
--- a/src/finn/transformation/streamline/remove.py
+++ /dev/null
@@ -1,92 +0,0 @@
-# Copyright (c) 2020, Xilinx
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# * Redistributions of source code must retain the above copyright notice, this
-#   list of conditions and the following disclaimer.
-#
-# * Redistributions in binary form must reproduce the above copyright notice,
-#   this list of conditions and the following disclaimer in the documentation
-#   and/or other materials provided with the distribution.
-#
-# * Neither the name of FINN nor the names of its
-#   contributors may be used to endorse or promote products derived from
-#   this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
-import numpy as np
-
-from finn.transformation.base import Transformation
-from finn.transformation.infer_shapes import InferShapes
-
-
-def _remove_node_and_rewire(model, node):
-    producer = model.find_producer(node.input[0])
-    if producer is not None:
-        # wire output tensor to
-        # output of producer node
-        producer.output[0] = node.output[0]
-    else:
-        # node is first in graph
-        consumer = model.find_consumer(node.output[0])
-        assert consumer is not None, "Whole graph is identity"
-        assert consumer.input[0] == node.output[0]
-        # rewire consumer's input directly to graph input
-        consumer.input[0] = node.input[0]
-    # remove node
-    model.graph.node.remove(node)
-
-
-class RemoveIdentityOps(Transformation):
-    """Remove identity ops like Add/Sub with zero or Mul/Div with one. A tolerance
-    value (defaults to 1e-05) can be specified during init for the comparison
-    to zero/one."""
-
-    def __init__(self, atol=1e-05):
-        super().__init__()
-        self.atol = atol
-
-    def apply(self, model):
-        graph = model.graph
-        node_ind = 0
-        graph_modified = False
-        for n in graph.node:
-            node_ind += 1
-            if (
-                n.op_type in ["Add", "Sub"]
-                and not model.is_fork_node(n)
-                and not model.is_join_node(n)
-            ):
-                A = model.get_initializer(n.input[1])
-                if (
-                    A is not None
-                    and np.isclose(A, np.zeros_like(A), atol=self.atol).all()
-                ):
-                    _remove_node_and_rewire(model, n)
-
-            elif (
-                n.op_type in ["Mul", "Div"]
-                and not model.is_fork_node(n)
-                and not model.is_join_node(n)
-            ):
-                A = model.get_initializer(n.input[1])
-                if (
-                    A is not None
-                    and np.isclose(A, np.ones_like(A), atol=self.atol).all()
-                ):
-                    _remove_node_and_rewire(model, n)
-        model = model.transform(InferShapes())
-        return (model, graph_modified)
diff --git a/src/finn/transformation/streamline/reorder.py b/src/finn/transformation/streamline/reorder.py
index 416755ae21002716e1535094d58aa8593ce3221a..0cdd6651d982426b1d81d7313346dcd899294bf7 100644
--- a/src/finn/transformation/streamline/reorder.py
+++ b/src/finn/transformation/streamline/reorder.py
@@ -623,6 +623,10 @@ class MoveScalarLinearPastInvariants(Transformation):
                     # if initializer is not scalar, skip
                     if np.prod(init0.shape) != 1:
                         continue
+                    # Flatten input if required
+                    if len(init0.shape) > 0:
+                        init0 = init0.flatten()[0]
+                        model.set_initializer(prod0.input[1], init0)
                     # move prod0 from input to output,
                     old_prod0_in = prod0.input[0]
                     old_prod0_out = prod0.output[0]
diff --git a/tests/brevitas/test_brevitas_avg_pool_export.py b/tests/brevitas/test_brevitas_avg_pool_export.py
index 68e563da6351dad6e61d5a2d1ffcbfed9859d0f5..1b38914a83e7c5d68bb004df7545b518d6a93ddd 100644
--- a/tests/brevitas/test_brevitas_avg_pool_export.py
+++ b/tests/brevitas/test_brevitas_avg_pool_export.py
@@ -31,19 +31,23 @@ import numpy as np
 import os
 import torch
 from brevitas.export import FINNManager
+from brevitas.export.onnx.generic.manager import BrevitasONNXManager
 from brevitas.nn import QuantAvgPool2d
 from brevitas.quant_tensor import QuantTensor
+from qonnx.util.cleanup import cleanup as qonnx_cleanup
 
 import finn.core.onnx_exec as oxe
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.infer_datatypes import InferDataTypes
 from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
 from finn.util.basic import gen_finn_dt_tensor
 
-export_onnx_path = "test_brevitas_avg_pool_export.onnx"
+base_export_onnx_path = "test_brevitas_avg_pool_export.onnx"
 
 
+@pytest.mark.parametrize("QONNX_export", [False, True])
 @pytest.mark.parametrize("kernel_size", [2, 3])
 @pytest.mark.parametrize("stride", [1, 2])
 @pytest.mark.parametrize("signed", [True, False])
@@ -52,11 +56,23 @@ export_onnx_path = "test_brevitas_avg_pool_export.onnx"
 @pytest.mark.parametrize("channels", [2, 4])
 @pytest.mark.parametrize("idim", [7, 8])
 def test_brevitas_avg_pool_export(
-    kernel_size, stride, signed, bit_width, input_bit_width, channels, idim
+    kernel_size,
+    stride,
+    signed,
+    bit_width,
+    input_bit_width,
+    channels,
+    idim,
+    QONNX_export,
 ):
-
+    export_onnx_path = base_export_onnx_path.replace(
+        ".onnx", f"test_QONNX-{QONNX_export}.onnx"
+    )
     quant_avgpool = QuantAvgPool2d(
-        kernel_size=kernel_size, stride=stride, bit_width=bit_width
+        kernel_size=kernel_size,
+        stride=stride,
+        bit_width=bit_width,
+        return_quant_tensor=False,
     )
     quant_avgpool.eval()
 
@@ -69,31 +85,57 @@ def test_brevitas_avg_pool_export(
     # Brevitas QuantAvgPool layers need QuantTensors to export correctly
     # which requires setting up a QuantTensor instance with the scale
     # factor, zero point, bitwidth and signedness
-    scale_array = np.random.uniform(low=0, high=1, size=(1, channels, 1, 1)).astype(
-        np.float32
-    )
+    scale_array = np.ones((1, channels, 1, 1)).astype(np.float32)
+    scale_array *= 0.5
     input_tensor = torch.from_numpy(input_array * scale_array).float()
     scale_tensor = torch.from_numpy(scale_array).float()
     zp = torch.tensor(0.0)
     input_quant_tensor = QuantTensor(
-        input_tensor, scale_tensor, zp, input_bit_width, signed
+        input_tensor, scale_tensor, zp, input_bit_width, signed, training=False
     )
 
     # export
-    FINNManager.export(
-        quant_avgpool, export_path=export_onnx_path, input_t=input_quant_tensor
-    )
+    if QONNX_export:
+        BrevitasONNXManager.export(
+            quant_avgpool,
+            export_path=export_onnx_path,
+            input_t=input_quant_tensor,
+        )
+        model = ModelWrapper(export_onnx_path)
+
+        # Statically set the additional inputs generated by the BrevitasONNXManager
+        model.graph.input.remove(model.graph.input[3])
+        model.graph.input.remove(model.graph.input[2])
+        model.graph.input.remove(model.graph.input[1])
+        model.set_initializer("1", scale_array)
+        model.set_initializer("2", np.array(0.0).astype(np.float32))
+        model.set_initializer("3", np.array(input_bit_width).astype(np.float32))
+        model.save(export_onnx_path)
+
+        qonnx_cleanup(export_onnx_path, out_file=export_onnx_path)
+        model = ModelWrapper(export_onnx_path)
+        model = model.transform(ConvertQONNXtoFINN())
+        model.save(export_onnx_path)
+    else:
+        FINNManager.export(
+            quant_avgpool, export_path=export_onnx_path, input_t=input_quant_tensor
+        )
     model = ModelWrapper(export_onnx_path)
     model = model.transform(InferShapes())
     model = model.transform(InferDataTypes())
 
     # reference brevitas output
-    ref_output_array = quant_avgpool(input_quant_tensor).tensor.detach().numpy()
+    ref_output_array = quant_avgpool(input_quant_tensor).detach().numpy()
     # finn output
-    idict = {model.graph.input[0].name: input_array}
+    if QONNX_export:
+        # Manually apply the Quant tensor scaling for QONNX
+        idict = {model.graph.input[0].name: input_array * scale_array}
+    else:
+        idict = {model.graph.input[0].name: input_array}
     odict = oxe.execute_onnx(model, idict, True)
     finn_output = odict[model.graph.output[0].name]
     # compare outputs
     assert np.isclose(ref_output_array, finn_output).all()
     # cleanup
+    # assert False
     os.remove(export_onnx_path)
diff --git a/tests/brevitas/test_brevitas_cnv.py b/tests/brevitas/test_brevitas_cnv.py
index 8a1783ae9468244ad7e0999b59c3c7b696682dae..78ca361366902b37f826b575904126c783adbece 100644
--- a/tests/brevitas/test_brevitas_cnv.py
+++ b/tests/brevitas/test_brevitas_cnv.py
@@ -34,12 +34,15 @@ import brevitas.onnx as bo
 import numpy as np
 import os
 import torch
+from brevitas.export.onnx.generic.manager import BrevitasONNXManager
+from qonnx.util.cleanup import cleanup as qonnx_cleanup
 
 import finn.core.onnx_exec as oxe
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.fold_constants import FoldConstants
 from finn.transformation.general import GiveUniqueNodeNames, RemoveStaticGraphInputs
 from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
 from finn.util.test import get_test_model_trained
 
 export_onnx_path = "test_brevitas_cnv.onnx"
@@ -47,11 +50,20 @@ export_onnx_path = "test_brevitas_cnv.onnx"
 
 @pytest.mark.parametrize("abits", [1, 2])
 @pytest.mark.parametrize("wbits", [1, 2])
-def test_brevitas_cnv_export_exec(wbits, abits):
+@pytest.mark.parametrize("QONNX_export", [False, True])
+def test_brevitas_cnv_export_exec(wbits, abits, QONNX_export):
     if wbits > abits:
         pytest.skip("No wbits > abits cases at the moment")
     cnv = get_test_model_trained("CNV", wbits, abits)
-    bo.export_finn_onnx(cnv, (1, 3, 32, 32), export_onnx_path)
+    ishape = (1, 3, 32, 32)
+    if QONNX_export:
+        BrevitasONNXManager.export(cnv, ishape, export_onnx_path)
+        qonnx_cleanup(export_onnx_path, out_file=export_onnx_path)
+        model = ModelWrapper(export_onnx_path)
+        model = model.transform(ConvertQONNXtoFINN())
+        model.save(export_onnx_path)
+    else:
+        bo.export_finn_onnx(cnv, ishape, export_onnx_path)
     model = ModelWrapper(export_onnx_path)
     model = model.transform(GiveUniqueNodeNames())
     model = model.transform(InferShapes())
diff --git a/tests/brevitas/test_brevitas_debug.py b/tests/brevitas/test_brevitas_debug.py
index 4418368350b627644c76a7483c5c5dfaf031cda0..e42b93babefd9ca6a7a86def18a5cbb21d795c4c 100644
--- a/tests/brevitas/test_brevitas_debug.py
+++ b/tests/brevitas/test_brevitas_debug.py
@@ -26,38 +26,71 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import pytest
+
 import brevitas.onnx as bo
 import numpy as np
 import onnx
 import onnx.numpy_helper as nph
 import os
 import torch
+from brevitas.export.onnx.generic.manager import BrevitasONNXManager
 from pkgutil import get_data
+from qonnx.util.cleanup import cleanup as qonnx_cleanup
 
 import finn.core.onnx_exec as oxe
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.fold_constants import FoldConstants
 from finn.transformation.general import RemoveStaticGraphInputs
 from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
 from finn.util.test import get_test_model_trained
 
 
-def test_brevitas_debug():
+@pytest.mark.parametrize("QONNX_export", [False, True])
+@pytest.mark.parametrize("QONNX_FINN_conversion", [False, True])
+def test_brevitas_debug(QONNX_export, QONNX_FINN_conversion):
+    if (not QONNX_export) and QONNX_FINN_conversion:
+        pytest.skip("This test configuration is not valid and is thus skipped.")
     finn_onnx = "test_brevitas_debug.onnx"
     fc = get_test_model_trained("TFC", 2, 2)
-    dbg_hook = bo.enable_debug(fc)
-    bo.export_finn_onnx(fc, (1, 1, 28, 28), finn_onnx)
+    ishape = (1, 1, 28, 28)
+    if QONNX_export:
+        dbg_hook = bo.enable_debug(fc, proxy_level=True)
+        BrevitasONNXManager.export(fc, ishape, finn_onnx)
+        # DebugMarkers have the brevitas.onnx domain, so that needs adjusting
+        model = ModelWrapper(finn_onnx)
+        dbg_nodes = model.get_nodes_by_op_type("DebugMarker")
+        for dbg_node in dbg_nodes:
+            dbg_node.domain = "finn.custom_op.general"
+        model.save(finn_onnx)
+        qonnx_cleanup(finn_onnx, out_file=finn_onnx)
+        if QONNX_FINN_conversion:
+            model = ModelWrapper(finn_onnx)
+            model = model.transform(ConvertQONNXtoFINN())
+            model.save(finn_onnx)
+    else:
+        dbg_hook = bo.enable_debug(fc)
+        bo.export_finn_onnx(fc, ishape, finn_onnx)
+        model = ModelWrapper(finn_onnx)
+        # DebugMarkers have the brevitas.onnx domain, so that needs adjusting
+        # ToDo: We should probably have transformation pass, which does this
+        #  domain conversion for us?
+        dbg_nodes = model.get_nodes_by_op_type("DebugMarker")
+        for dbg_node in dbg_nodes:
+            dbg_node.domain = "finn.custom_op.general"
+        model = model.transform(InferShapes())
+        model = model.transform(FoldConstants())
+        model = model.transform(RemoveStaticGraphInputs())
+        model.save(finn_onnx)
     model = ModelWrapper(finn_onnx)
-    model = model.transform(InferShapes())
-    model = model.transform(FoldConstants())
-    model = model.transform(RemoveStaticGraphInputs())
     assert len(model.graph.input) == 1
     assert len(model.graph.output) == 1
     # load one of the test vectors
     raw_i = get_data("finn.data", "onnx/mnist-conv/test_data_set_0/input_0.pb")
     input_tensor = onnx.load_tensor_from_string(raw_i)
     # run using FINN-based execution
-    input_dict = {"0": nph.to_array(input_tensor)}
+    input_dict = {model.graph.input[0].name: nph.to_array(input_tensor)}
     output_dict = oxe.execute_onnx(model, input_dict, return_full_exec_context=True)
     produced = output_dict[model.graph.output[0].name]
     # run using PyTorch/Brevitas
@@ -70,9 +103,19 @@ def test_brevitas_debug():
     names_brevitas = set(dbg_hook.values.keys())
     names_finn = set(output_dict.keys())
     names_common = names_brevitas.intersection(names_finn)
-    assert len(names_common) == 16
+    # The different exports return debug markers in different numbers and places
+    print(len(names_common))
+    if QONNX_export and not QONNX_FINN_conversion:
+        assert len(names_common) == 12
+    elif QONNX_export and QONNX_FINN_conversion:
+        assert len(names_common) == 8
+    else:
+        assert len(names_common) == 16
     for dbg_name in names_common:
-        tensor_pytorch = dbg_hook.values[dbg_name].detach().numpy()
+        if QONNX_export:
+            tensor_pytorch = dbg_hook.values[dbg_name].value.detach().numpy()
+        else:
+            tensor_pytorch = dbg_hook.values[dbg_name].detach().numpy()
         tensor_finn = output_dict[dbg_name]
         assert np.isclose(tensor_finn, tensor_pytorch, atol=1e-5).all()
     os.remove(finn_onnx)
diff --git a/tests/brevitas/test_brevitas_fc.py b/tests/brevitas/test_brevitas_fc.py
index b280ab9e116f8b4735f31d16e08d8f1055470155..8e1e3de8d06b24ce946fb0a6726d875d0e75736e 100644
--- a/tests/brevitas/test_brevitas_fc.py
+++ b/tests/brevitas/test_brevitas_fc.py
@@ -33,13 +33,16 @@ import numpy as np
 import onnx
 import onnx.numpy_helper as nph
 import torch
+from brevitas.export.onnx.generic.manager import BrevitasONNXManager
 from pkgutil import get_data
+from qonnx.util.cleanup import cleanup as qonnx_cleanup
 
 import finn.core.onnx_exec as oxe
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.fold_constants import FoldConstants
 from finn.transformation.general import RemoveStaticGraphInputs
 from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
 from finn.util.basic import make_build_dir
 from finn.util.test import get_test_model_trained
 
@@ -52,15 +55,25 @@ export_onnx_path = make_build_dir("test_brevitas_fc_")
 @pytest.mark.parametrize("wbits", [1, 2])
 # network topology / size
 @pytest.mark.parametrize("size", ["TFC", "SFC", "LFC"])
-def test_brevitas_fc_onnx_export_and_exec(size, wbits, abits):
+# QONNX export
+@pytest.mark.parametrize("QONNX_export", [False, True])
+def test_brevitas_fc_onnx_export_and_exec(size, wbits, abits, QONNX_export):
     if size == "LFC" and wbits == 2 and abits == 2:
         pytest.skip("No LFC-w2a2 present at the moment")
     if wbits > abits:
         pytest.skip("No wbits > abits cases at the moment")
-    nname = "%s_%dW%dA" % (size, wbits, abits)
+    nname = "%s_%dW%dA_QONNX-%d" % (size, wbits, abits, QONNX_export)
     finn_onnx = export_onnx_path + "/%s.onnx" % nname
     fc = get_test_model_trained(size, wbits, abits)
-    bo.export_finn_onnx(fc, (1, 1, 28, 28), finn_onnx)
+    ishape = (1, 1, 28, 28)
+    if QONNX_export:
+        BrevitasONNXManager.export(fc, ishape, finn_onnx)
+        qonnx_cleanup(finn_onnx, out_file=finn_onnx)
+        model = ModelWrapper(finn_onnx)
+        model = model.transform(ConvertQONNXtoFINN())
+        model.save(finn_onnx)
+    else:
+        bo.export_finn_onnx(fc, ishape, finn_onnx)
     model = ModelWrapper(finn_onnx)
     model = model.transform(InferShapes())
     model = model.transform(FoldConstants())
@@ -71,7 +84,7 @@ def test_brevitas_fc_onnx_export_and_exec(size, wbits, abits):
     raw_i = get_data("finn.data", "onnx/mnist-conv/test_data_set_0/input_0.pb")
     input_tensor = onnx.load_tensor_from_string(raw_i)
     # run using FINN-based execution
-    input_dict = {"0": nph.to_array(input_tensor)}
+    input_dict = {model.graph.input[0].name: nph.to_array(input_tensor)}
     output_dict = oxe.execute_onnx(model, input_dict)
     produced = output_dict[list(output_dict.keys())[0]]
     # run using PyTorch/Brevitas
diff --git a/tests/brevitas/test_brevitas_non_scaled_QuantHardTanh_export.py b/tests/brevitas/test_brevitas_non_scaled_quanthardtanh_export.py
similarity index 82%
rename from tests/brevitas/test_brevitas_non_scaled_QuantHardTanh_export.py
rename to tests/brevitas/test_brevitas_non_scaled_quanthardtanh_export.py
index 6ddf71a5cba14916e3bcb13e65b1da2f4fddc63f..b530b4bd84c548319549a8b16e0c3a79584e075d 100644
--- a/tests/brevitas/test_brevitas_non_scaled_QuantHardTanh_export.py
+++ b/tests/brevitas/test_brevitas_non_scaled_quanthardtanh_export.py
@@ -36,11 +36,14 @@ import torch
 from brevitas.core.quant import QuantType
 from brevitas.core.restrict_val import RestrictValueType
 from brevitas.core.scaling import ScalingImplType
+from brevitas.export.onnx.generic.manager import BrevitasONNXManager
 from brevitas.nn import QuantHardTanh
+from qonnx.util.cleanup import cleanup as qonnx_cleanup
 
 import finn.core.onnx_exec as oxe
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
 
 export_onnx_path = "test_brevitas_non_scaled_QuantHardTanh_export.onnx"
 
@@ -48,7 +51,10 @@ export_onnx_path = "test_brevitas_non_scaled_QuantHardTanh_export.onnx"
 @pytest.mark.parametrize("abits", [1, 2, 4, 8])
 @pytest.mark.parametrize("narrow_range", [False, True])
 @pytest.mark.parametrize("max_val", [1.0, 1 - 2 ** (-7)])
-def test_brevitas_act_export_qhardtanh_nonscaled(abits, narrow_range, max_val):
+@pytest.mark.parametrize("QONNX_export", [False, True])
+def test_brevitas_act_export_qhardtanh_nonscaled(
+    abits, narrow_range, max_val, QONNX_export
+):
     def get_quant_type(bit_width):
         if bit_width is None:
             return QuantType.FP
@@ -69,7 +75,15 @@ def test_brevitas_act_export_qhardtanh_nonscaled(abits, narrow_range, max_val):
         scaling_impl_type=ScalingImplType.CONST,
         narrow_range=narrow_range,
     )
-    bo.export_finn_onnx(b_act, ishape, export_onnx_path)
+    if QONNX_export:
+        m_path = export_onnx_path
+        BrevitasONNXManager.export(b_act, ishape, m_path)
+        qonnx_cleanup(m_path, out_file=m_path)
+        model = ModelWrapper(m_path)
+        model = model.transform(ConvertQONNXtoFINN())
+        model.save(m_path)
+    else:
+        bo.export_finn_onnx(b_act, ishape, export_onnx_path)
     model = ModelWrapper(export_onnx_path)
     model = model.transform(InferShapes())
     inp_tensor = np.random.uniform(low=min_val, high=max_val, size=ishape).astype(
diff --git a/tests/brevitas/test_brevitas_QConv2d.py b/tests/brevitas/test_brevitas_qconv2d.py
similarity index 85%
rename from tests/brevitas/test_brevitas_QConv2d.py
rename to tests/brevitas/test_brevitas_qconv2d.py
index 9de2efbcee627384cb76d1e05d0495cf6b40b169..beaea4e51ecdd4cff9f0d4d0c16735cdecad207c 100644
--- a/tests/brevitas/test_brevitas_QConv2d.py
+++ b/tests/brevitas/test_brevitas_qconv2d.py
@@ -36,12 +36,15 @@ from brevitas.core.quant import QuantType
 from brevitas.core.restrict_val import RestrictValueType
 from brevitas.core.scaling import ScalingImplType
 from brevitas.core.stats import StatsOp
+from brevitas.export.onnx.generic.manager import BrevitasONNXManager
 from brevitas.nn import QuantConv2d
+from qonnx.util.cleanup import cleanup as qonnx_cleanup
 
 import finn.core.onnx_exec as oxe
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
 from finn.util.basic import gen_finn_dt_tensor
 
 export_onnx_path = "test_brevitas_conv.onnx"
@@ -50,7 +53,8 @@ export_onnx_path = "test_brevitas_conv.onnx"
 @pytest.mark.parametrize("dw", [False, True])
 @pytest.mark.parametrize("bias", [True, False])
 @pytest.mark.parametrize("in_channels", [32])
-def test_brevitas_QConv2d(dw, bias, in_channels):
+@pytest.mark.parametrize("QONNX_export", [False, True])
+def test_brevitas_QConv2d(dw, bias, in_channels, QONNX_export):
     ishape = (1, 32, 111, 111)
     if dw is True:
         groups = in_channels
@@ -89,7 +93,15 @@ def test_brevitas_QConv2d(dw, bias, in_channels):
     weight_tensor = gen_finn_dt_tensor(DataType["INT4"], w_shape)
     b_conv.weight = torch.nn.Parameter(torch.from_numpy(weight_tensor).float())
     b_conv.eval()
-    bo.export_finn_onnx(b_conv, ishape, export_onnx_path)
+    if QONNX_export:
+        m_path = export_onnx_path
+        BrevitasONNXManager.export(b_conv, ishape, m_path)
+        qonnx_cleanup(m_path, out_file=m_path)
+        model = ModelWrapper(m_path)
+        model = model.transform(ConvertQONNXtoFINN())
+        model.save(m_path)
+    else:
+        bo.export_finn_onnx(b_conv, ishape, export_onnx_path)
     model = ModelWrapper(export_onnx_path)
     model = model.transform(InferShapes())
     inp_tensor = np.random.uniform(low=-1.0, high=1.0, size=ishape).astype(np.float32)
diff --git a/tests/brevitas/test_brevitas_qlinear.py b/tests/brevitas/test_brevitas_qlinear.py
index 67e4d04d066a2a1a6baf78429d91e724a9d80e7f..1099d3ec83336e5cd07707b35baea112b7a2aee6 100644
--- a/tests/brevitas/test_brevitas_qlinear.py
+++ b/tests/brevitas/test_brevitas_qlinear.py
@@ -33,12 +33,15 @@ import numpy as np
 import os
 import torch
 from brevitas.core.quant import QuantType
+from brevitas.export.onnx.generic.manager import BrevitasONNXManager
 from brevitas.nn import QuantLinear
+from qonnx.util.cleanup import cleanup as qonnx_cleanup
 
 import finn.core.onnx_exec as oxe
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
 from finn.util.basic import gen_finn_dt_tensor
 
 export_onnx_path = "test_brevitas_qlinear.onnx"
@@ -49,7 +52,10 @@ export_onnx_path = "test_brevitas_qlinear.onnx"
 @pytest.mark.parametrize("in_features", [3])
 @pytest.mark.parametrize("w_bits", [4])
 @pytest.mark.parametrize("i_dtype", [DataType["UINT4"]])
-def test_brevitas_qlinear(bias, out_features, in_features, w_bits, i_dtype):
+@pytest.mark.parametrize("QONNX_export", [False, True])
+def test_brevitas_qlinear(
+    bias, out_features, in_features, w_bits, i_dtype, QONNX_export
+):
     i_shape = (1, in_features)
     w_shape = (out_features, in_features)
     b_linear = QuantLinear(
@@ -66,7 +72,15 @@ def test_brevitas_qlinear(bias, out_features, in_features, w_bits, i_dtype):
     )
     b_linear.weight.data = torch.from_numpy(weight_tensor_fp)
     b_linear.eval()
-    bo.export_finn_onnx(b_linear, i_shape, export_onnx_path)
+    if QONNX_export:
+        m_path = export_onnx_path
+        BrevitasONNXManager.export(b_linear, i_shape, m_path)
+        qonnx_cleanup(m_path, out_file=m_path)
+        model = ModelWrapper(m_path)
+        model = model.transform(ConvertQONNXtoFINN())
+        model.save(m_path)
+    else:
+        bo.export_finn_onnx(b_linear, i_shape, export_onnx_path)
     model = ModelWrapper(export_onnx_path)
     model = model.transform(InferShapes())
     inp_tensor = gen_finn_dt_tensor(i_dtype, i_shape)
diff --git a/tests/brevitas/test_brevitas_relu_act_export.py b/tests/brevitas/test_brevitas_relu_act_export.py
index bb59a8414feffbb8362de629f8b30ac200a5227f..57ead3b6c047220e90d4276620cc14b8f795fe08 100644
--- a/tests/brevitas/test_brevitas_relu_act_export.py
+++ b/tests/brevitas/test_brevitas_relu_act_export.py
@@ -36,11 +36,14 @@ import torch
 from brevitas.core.quant import QuantType
 from brevitas.core.restrict_val import RestrictValueType
 from brevitas.core.scaling import ScalingImplType
+from brevitas.export.onnx.generic.manager import BrevitasONNXManager
 from brevitas.nn import QuantReLU
+from qonnx.util.cleanup import cleanup as qonnx_cleanup
 
 import finn.core.onnx_exec as oxe
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
 
 export_onnx_path = "test_brevitas_relu_act_export.onnx"
 
@@ -50,7 +53,8 @@ export_onnx_path = "test_brevitas_relu_act_export.onnx"
 @pytest.mark.parametrize(
     "scaling_impl_type", [ScalingImplType.CONST, ScalingImplType.PARAMETER]
 )
-def test_brevitas_act_export_relu(abits, max_val, scaling_impl_type):
+@pytest.mark.parametrize("QONNX_export", [False, True])
+def test_brevitas_act_export_relu(abits, max_val, scaling_impl_type, QONNX_export):
     min_val = -1.0
     ishape = (1, 15)
 
@@ -71,8 +75,15 @@ scaling_impl.learned_value": torch.tensor(
             )
         }
         b_act.load_state_dict(checkpoint)
-
-    bo.export_finn_onnx(b_act, ishape, export_onnx_path)
+    if QONNX_export:
+        m_path = export_onnx_path
+        BrevitasONNXManager.export(b_act, ishape, m_path)
+        qonnx_cleanup(m_path, out_file=m_path)
+        model = ModelWrapper(m_path)
+        model = model.transform(ConvertQONNXtoFINN())
+        model.save(m_path)
+    else:
+        bo.export_finn_onnx(b_act, ishape, export_onnx_path)
     model = ModelWrapper(export_onnx_path)
     model = model.transform(InferShapes())
     inp_tensor = np.random.uniform(low=min_val, high=max_val, size=ishape).astype(
@@ -103,7 +114,10 @@ scaling_impl.learned_value": torch.tensor(
 @pytest.mark.parametrize("abits", [2, 4, 8])
 @pytest.mark.parametrize("max_val", [1.0, 1.5, 1 - 2 ** (-7)])
 @pytest.mark.parametrize("scaling_per_channel", [True, False])
-def test_brevitas_act_export_relu_imagenet(abits, max_val, scaling_per_channel):
+@pytest.mark.parametrize("QONNX_export", [False, True])
+def test_brevitas_act_export_relu_imagenet(
+    abits, max_val, scaling_per_channel, QONNX_export
+):
     out_channels = 32
     ishape = (1, out_channels, 1, 1)
     min_val = -1.0
@@ -115,7 +129,7 @@ def test_brevitas_act_export_relu_imagenet(abits, max_val, scaling_per_channel):
         restrict_scaling_type=RestrictValueType.LOG_FP,
         scaling_min_val=2e-16,
         max_val=6.0,
-        return_quant_tensor=True,
+        return_quant_tensor=False,
         per_channel_broadcastable_shape=(1, out_channels, 1, 1),
     )
     if scaling_per_channel is True:
@@ -129,7 +143,15 @@ scaling_impl.learned_value": rand_tensor.type(
         )
     }
     b_act.load_state_dict(checkpoint)
-    bo.export_finn_onnx(b_act, ishape, export_onnx_path)
+    if QONNX_export:
+        m_path = export_onnx_path
+        BrevitasONNXManager.export(b_act, ishape, m_path)
+        qonnx_cleanup(m_path, out_file=m_path)
+        model = ModelWrapper(m_path)
+        model = model.transform(ConvertQONNXtoFINN())
+        model.save(m_path)
+    else:
+        bo.export_finn_onnx(b_act, ishape, export_onnx_path)
     model = ModelWrapper(export_onnx_path)
     model = model.transform(InferShapes())
     inp_tensor = np.random.uniform(low=min_val, high=max_val, size=ishape).astype(
@@ -140,7 +162,7 @@ scaling_impl.learned_value": rand_tensor.type(
     produced = odict[model.graph.output[0].name]
     inp_tensor = torch.from_numpy(inp_tensor).float()
     b_act.eval()
-    expected = b_act.forward(inp_tensor).tensor.detach().numpy()
+    expected = b_act.forward(inp_tensor).detach().numpy()
     if not np.isclose(produced, expected, atol=1e-3).all():
         print(abits, max_val)
         print("scale: ", b_act.quant_act_scale().type(torch.FloatTensor).detach())
diff --git a/tests/brevitas/test_brevitas_scaled_QHardTanh_export.py b/tests/brevitas/test_brevitas_scaled_qhardtanh_export.py
similarity index 87%
rename from tests/brevitas/test_brevitas_scaled_QHardTanh_export.py
rename to tests/brevitas/test_brevitas_scaled_qhardtanh_export.py
index 345fae872119c75aa8e85cb5812c94dfc15bad7f..c6da2e2e971ee97cb73243284920cc87e8b4d7bb 100644
--- a/tests/brevitas/test_brevitas_scaled_QHardTanh_export.py
+++ b/tests/brevitas/test_brevitas_scaled_qhardtanh_export.py
@@ -36,11 +36,14 @@ import torch
 from brevitas.core.quant import QuantType
 from brevitas.core.restrict_val import RestrictValueType
 from brevitas.core.scaling import ScalingImplType
+from brevitas.export.onnx.generic.manager import BrevitasONNXManager
 from brevitas.nn import QuantHardTanh
+from qonnx.util.cleanup import cleanup as qonnx_cleanup
 
 import finn.core.onnx_exec as oxe
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
 
 export_onnx_path = "test_brevitas_scaled_QHardTanh_export.onnx"
 
@@ -52,8 +55,9 @@ export_onnx_path = "test_brevitas_scaled_QHardTanh_export.onnx"
 @pytest.mark.parametrize(
     "scaling_impl_type", [ScalingImplType.CONST, ScalingImplType.PARAMETER]
 )
+@pytest.mark.parametrize("QONNX_export", [False, True])
 def test_brevitas_act_export_qhardtanh_scaled(
-    abits, narrow_range, min_val, max_val, scaling_impl_type
+    abits, narrow_range, min_val, max_val, scaling_impl_type, QONNX_export
 ):
     def get_quant_type(bit_width):
         if bit_width is None:
@@ -84,8 +88,15 @@ tensor_quant.scaling_impl.learned_value": torch.tensor(
             )
         }
         b_act.load_state_dict(checkpoint)
-
-    bo.export_finn_onnx(b_act, ishape, export_onnx_path)
+    if QONNX_export:
+        m_path = export_onnx_path
+        BrevitasONNXManager.export(b_act, ishape, m_path)
+        qonnx_cleanup(m_path, out_file=m_path)
+        model = ModelWrapper(m_path)
+        model = model.transform(ConvertQONNXtoFINN())
+        model.save(m_path)
+    else:
+        bo.export_finn_onnx(b_act, ishape, export_onnx_path)
     model = ModelWrapper(export_onnx_path)
     model = model.transform(InferShapes())
     inp_tensor = np.random.uniform(low=min_val, high=max_val, size=ishape).astype(
diff --git a/tests/end2end/test_end2end_bnn_pynq.py b/tests/end2end/test_end2end_bnn_pynq.py
index 14e10da86ec5cec14dfabdd0b4f6c2b92dd42519..1fddc7c1c26a0ba04d5849809ccf59b0a926a509 100644
--- a/tests/end2end/test_end2end_bnn_pynq.py
+++ b/tests/end2end/test_end2end_bnn_pynq.py
@@ -38,9 +38,11 @@ import os
 import subprocess
 import torch
 import warnings
+from brevitas.export.onnx.generic.manager import BrevitasONNXManager
 from collections import OrderedDict
 from dataset_loading import cifar, mnist
 from datetime import datetime
+from qonnx.util.cleanup import cleanup as qonnx_cleanup
 from scipy.stats import linregress
 
 import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
@@ -82,6 +84,7 @@ from finn.transformation.insert_topk import InsertTopK
 from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
 from finn.transformation.merge_onnx_models import MergeONNXModels
 from finn.transformation.move_reshape import RemoveCNVtoFCFlatten
+from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
 from finn.transformation.streamline import Streamline
 from finn.transformation.streamline.reorder import (
     MakeMaxPoolNHWC,
@@ -104,8 +107,14 @@ mem_mode = "decoupled"
 rtlsim_trace = False
 
 
-def get_checkpoint_name(topology, wbits, abits, step):
-    return build_dir + "/end2end_%s_w%da%d_%s.onnx" % (topology, wbits, abits, step)
+def get_checkpoint_name(topology, wbits, abits, QONNX_export, step):
+    return build_dir + "/end2end_%s_w%da%d_QONNX-%d_%s.onnx" % (
+        topology,
+        wbits,
+        abits,
+        QONNX_export,
+        step,
+    )
 
 
 def get_dashboard_data(topology, wbits, abits):
@@ -303,15 +312,23 @@ def topology2dataset(topology):
 @pytest.mark.parametrize("wbits", [1, 2])
 @pytest.mark.parametrize("abits", [1, 2])
 @pytest.mark.parametrize("topology", ["lfc", "tfc", "cnv"])
+@pytest.mark.parametrize("QONNX_export", [False, True])
 class TestEnd2End:
-    def test_export(self, topology, wbits, abits):
+    def test_export(self, topology, wbits, abits, QONNX_export):
         if wbits > abits:
             pytest.skip("No wbits > abits end2end network configs for now")
         if topology == "lfc" and not (wbits == 1 and abits == 1):
             pytest.skip("Skipping certain lfc configs")
         (model, ishape) = get_trained_network_and_ishape(topology, wbits, abits)
-        chkpt_name = get_checkpoint_name(topology, wbits, abits, "export")
-        bo.export_finn_onnx(model, ishape, chkpt_name)
+        chkpt_name = get_checkpoint_name(topology, wbits, abits, QONNX_export, "export")
+        if QONNX_export:
+            BrevitasONNXManager.export(model, ishape, chkpt_name)
+            qonnx_cleanup(chkpt_name, out_file=chkpt_name)
+            model = ModelWrapper(chkpt_name)
+            model = model.transform(ConvertQONNXtoFINN())
+            model.save(chkpt_name)
+        else:
+            bo.export_finn_onnx(model, ishape, chkpt_name)
         nname = "%s_w%da%d" % (topology, wbits, abits)
         update_dashboard_data(topology, wbits, abits, "network", nname)
         dtstr = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
@@ -323,8 +340,10 @@ class TestEnd2End:
         update_dashboard_data(topology, wbits, abits, "finn-commit", finn_commit)
         assert os.path.isfile(chkpt_name)
 
-    def test_import_and_tidy(self, topology, wbits, abits):
-        prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "export")
+    def test_import_and_tidy(self, topology, wbits, abits, QONNX_export):
+        prev_chkpt_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "export"
+        )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)
         model = model.transform(InferShapes())
         model = model.transform(FoldConstants())
@@ -332,17 +351,23 @@ class TestEnd2End:
         model = model.transform(GiveReadableTensorNames())
         model = model.transform(InferDataTypes())
         model = model.transform(RemoveStaticGraphInputs())
-        chkpt = get_checkpoint_name(topology, wbits, abits, "import_and_tidy")
+        chkpt = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "import_and_tidy"
+        )
         model.save(chkpt)
 
-    def test_add_pre_and_postproc(self, topology, wbits, abits):
-        prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "import_and_tidy")
+    def test_add_pre_and_postproc(self, topology, wbits, abits, QONNX_export):
+        prev_chkpt_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "import_and_tidy"
+        )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)
         global_inp_name = model.graph.input[0].name
         ishape = model.get_tensor_shape(global_inp_name)
         # preprocessing: torchvision's ToTensor divides uint8 inputs by 255
         totensor_pyt = ToTensor()
-        chkpt_preproc_name = get_checkpoint_name(topology, wbits, abits, "preproc")
+        chkpt_preproc_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "preproc"
+        )
         bo.export_finn_onnx(totensor_pyt, ishape, chkpt_preproc_name)
         assert os.path.isfile(chkpt_preproc_name)
         # join preprocessing and core model
@@ -355,7 +380,9 @@ class TestEnd2End:
         model.set_tensor_datatype(global_inp_name, DataType["UINT8"])
         # postprocessing: insert Top-1 node at the end
         model = model.transform(InsertTopK(k=1))
-        chkpt_name = get_checkpoint_name(topology, wbits, abits, "pre_post")
+        chkpt_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "pre_post"
+        )
         # tidy-up again
         model = model.transform(InferShapes())
         model = model.transform(FoldConstants())
@@ -366,8 +393,10 @@ class TestEnd2End:
         model.save(chkpt_name)
         assert os.path.isfile(chkpt_name)
 
-    def test_streamline(self, topology, wbits, abits):
-        prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "pre_post")
+    def test_streamline(self, topology, wbits, abits, QONNX_export):
+        prev_chkpt_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "pre_post"
+        )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)
         model = model.transform(absorb.AbsorbSignBiasIntoMultiThreshold())
         # move past any reshapes to be able to streamline input scaling
@@ -383,10 +412,14 @@ class TestEnd2End:
         model = model.transform(absorb.AbsorbScalarMulAddIntoTopK())
         model = model.transform(InferDataLayouts())
         model = model.transform(RemoveUnusedTensors())
-        model.save(get_checkpoint_name(topology, wbits, abits, "streamline"))
+        model.save(
+            get_checkpoint_name(topology, wbits, abits, QONNX_export, "streamline")
+        )
 
-    def test_convert_to_hls_layers(self, topology, wbits, abits):
-        prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "streamline")
+    def test_convert_to_hls_layers(self, topology, wbits, abits, QONNX_export):
+        prev_chkpt_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "streamline"
+        )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)
         if topology == "tfc" and wbits == 1 and abits == 1:
             # use standalone thresholds for tfc-w1a1 to also exercise that option
@@ -408,16 +441,55 @@ class TestEnd2End:
         model = model.transform(absorb.AbsorbConsecutiveTransposes())
         model = model.transform(GiveUniqueNodeNames())
         model = model.transform(InferDataLayouts())
-        model.save(get_checkpoint_name(topology, wbits, abits, "convert_to_hls_layers"))
+        model.save(
+            get_checkpoint_name(
+                topology, wbits, abits, QONNX_export, "convert_to_hls_layers"
+            )
+        )
+        exp_layer_counts = {
+            "tfc": [
+                ("Reshape", 1),
+                ("Thresholding_Batch", 1),
+                ("StreamingFCLayer_Batch", 4),
+                ("LabelSelect_Batch", 1),
+            ],
+            "tfc-1-1": [
+                ("Reshape", 1),
+                ("Thresholding_Batch", 4),
+                ("StreamingFCLayer_Batch", 4),
+                ("LabelSelect_Batch", 1),
+            ],
+            "lfc": [
+                ("Reshape", 1),
+                ("Thresholding_Batch", 1),
+                ("StreamingFCLayer_Batch", 4),
+                ("LabelSelect_Batch", 1),
+            ],
+            "cnv": [
+                ("Transpose", 1),
+                ("Thresholding_Batch", 1),
+                ("ConvolutionInputGenerator", 6),
+                ("StreamingFCLayer_Batch", 9),
+                ("StreamingMaxPool_Batch", 2),
+                ("LabelSelect_Batch", 1),
+            ],
+        }
+        if topology == "tfc" and wbits == 1 and abits == 1:
+            exp_key = "tfc-1-1"
+        else:
+            exp_key = topology
+        exp_layer_counts = exp_layer_counts[exp_key]
+        for (op_type, exp_count) in exp_layer_counts:
+            assert len(model.get_nodes_by_op_type(op_type)) == exp_count
 
-    def test_create_dataflow_partition(self, topology, wbits, abits):
+    def test_create_dataflow_partition(self, topology, wbits, abits, QONNX_export):
         prev_chkpt_name = get_checkpoint_name(
-            topology, wbits, abits, "convert_to_hls_layers"
+            topology, wbits, abits, QONNX_export, "convert_to_hls_layers"
         )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)
         parent_model = model.transform(CreateDataflowPartition())
         parent_model_chkpt = get_checkpoint_name(
-            topology, wbits, abits, "dataflow_parent"
+            topology, wbits, abits, QONNX_export, "dataflow_parent"
         )
         parent_model.save(parent_model_chkpt)
         sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
@@ -425,28 +497,36 @@ class TestEnd2End:
         dataflow_model_filename = sdp_node.get_nodeattr("model")
         dataflow_model = load_test_checkpoint_or_skip(dataflow_model_filename)
         dataflow_model_chkpt = get_checkpoint_name(
-            topology, wbits, abits, "dataflow_model"
+            topology, wbits, abits, QONNX_export, "dataflow_model"
         )
         dataflow_model.save(dataflow_model_chkpt)
 
-    def test_fold(self, topology, wbits, abits):
-        prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "dataflow_model")
+    def test_fold(self, topology, wbits, abits, QONNX_export):
+        prev_chkpt_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "dataflow_model"
+        )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)
         folding_fxn = get_folding_function(topology, wbits, abits)
         model = folding_fxn(model)
-        model.save(get_checkpoint_name(topology, wbits, abits, "fold"))
+        model.save(get_checkpoint_name(topology, wbits, abits, QONNX_export, "fold"))
 
     @pytest.mark.slow
     @pytest.mark.vivado
-    def test_cppsim(self, topology, wbits, abits):
-        prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "fold")
+    def test_cppsim(self, topology, wbits, abits, QONNX_export):
+        prev_chkpt_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "fold"
+        )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)
         model = model.transform(PrepareCppSim())
         model = model.transform(CompileCppSim())
         model = model.transform(SetExecMode("cppsim"))
-        cppsim_chkpt = get_checkpoint_name(topology, wbits, abits, "cppsim")
+        cppsim_chkpt = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "cppsim"
+        )
         model.save(cppsim_chkpt)
-        parent_chkpt = get_checkpoint_name(topology, wbits, abits, "dataflow_parent")
+        parent_chkpt = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "dataflow_parent"
+        )
         (input_tensor_npy, output_tensor_npy) = get_golden_io_pair(
             topology, wbits, abits, return_topk=1
         )
@@ -456,22 +536,28 @@ class TestEnd2End:
     @pytest.mark.slow
     @pytest.mark.vivado
     @pytest.mark.parametrize("kind", ["zynq", "alveo"])
-    def test_ipgen(self, topology, wbits, abits, kind):
+    def test_ipgen(self, topology, wbits, abits, QONNX_export, kind):
         if kind == "alveo" and ("VITIS_PATH" not in os.environ):
             pytest.skip("VITIS_PATH not set")
-        prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "fold")
+        prev_chkpt_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "fold"
+        )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)
         test_fpga_part = get_build_env(kind, target_clk_ns)["part"]
         model = model.transform(GiveUniqueNodeNames())
         model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
         model = model.transform(HLSSynthIP())
-        model.save(get_checkpoint_name(topology, wbits, abits, "ipgen_" + kind))
+        model.save(
+            get_checkpoint_name(topology, wbits, abits, QONNX_export, "ipgen_" + kind)
+        )
 
     @pytest.mark.slow
     @pytest.mark.vivado
     @pytest.mark.parametrize("kind", ["zynq", "alveo"])
-    def test_set_fifo_depths(self, topology, wbits, abits, kind):
-        prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "ipgen_" + kind)
+    def test_set_fifo_depths(self, topology, wbits, abits, QONNX_export, kind):
+        prev_chkpt_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "ipgen_" + kind
+        )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)
         test_fpga_part = get_build_env(kind, target_clk_ns)["part"]
         model = model.transform(InsertAndSetFIFODepths(test_fpga_part, target_clk_ns))
@@ -483,14 +569,18 @@ class TestEnd2End:
                 op_inst = getCustomOp(node)
                 assert op_inst.get_nodeattr("inFIFODepth") == 0
                 assert op_inst.get_nodeattr("outFIFODepth") == 0
-        model.save(get_checkpoint_name(topology, wbits, abits, "fifodepth_" + kind))
+        model.save(
+            get_checkpoint_name(
+                topology, wbits, abits, QONNX_export, "fifodepth_" + kind
+            )
+        )
 
     @pytest.mark.slow
     @pytest.mark.vivado
     @pytest.mark.parametrize("kind", ["zynq"])
-    def test_ipstitch_rtlsim(self, topology, wbits, abits, kind):
+    def test_ipstitch_rtlsim(self, topology, wbits, abits, QONNX_export, kind):
         prev_chkpt_name = get_checkpoint_name(
-            topology, wbits, abits, "fifodepth_" + kind
+            topology, wbits, abits, QONNX_export, "fifodepth_" + kind
         )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)
         test_fpga_part = get_build_env(kind, target_clk_ns)["part"]
@@ -514,10 +604,12 @@ class TestEnd2End:
             )
             os.environ["RTLSIM_TRACE_DEPTH"] = "3"
         rtlsim_chkpt = get_checkpoint_name(
-            topology, wbits, abits, "ipstitch_rtlsim_" + kind
+            topology, wbits, abits, QONNX_export, "ipstitch_rtlsim_" + kind
         )
         model.save(rtlsim_chkpt)
-        parent_chkpt = get_checkpoint_name(topology, wbits, abits, "dataflow_parent")
+        parent_chkpt = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "dataflow_parent"
+        )
         (input_tensor_npy, output_tensor_npy) = get_golden_io_pair(
             topology, wbits, abits, return_topk=1
         )
@@ -527,9 +619,9 @@ class TestEnd2End:
     @pytest.mark.slow
     @pytest.mark.vivado
     @pytest.mark.parametrize("kind", ["zynq"])
-    def test_throughput_rtlsim(self, topology, wbits, abits, kind):
+    def test_throughput_rtlsim(self, topology, wbits, abits, QONNX_export, kind):
         prev_chkpt_name = get_checkpoint_name(
-            topology, wbits, abits, "ipstitch_rtlsim_" + kind
+            topology, wbits, abits, QONNX_export, "ipstitch_rtlsim_" + kind
         )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)
         n_nodes = len(model.graph.node)
@@ -550,15 +642,23 @@ class TestEnd2End:
     @pytest.mark.slow
     @pytest.mark.vivado
     @pytest.mark.parametrize("kind", ["zynq"])
-    def test_validate_top1(self, topology, wbits, abits, kind):
+    def test_validate_top1(self, topology, wbits, abits, QONNX_export, kind):
         if "TEST_END2END_VALIDATE_TOP1" not in os.environ:
             pytest.skip("TEST_END2END_VALIDATE_TOP1 not set")
-        prepostproc_chkpt = get_checkpoint_name(topology, wbits, abits, "pre_post")
-        streamline_chkpt = get_checkpoint_name(topology, wbits, abits, "streamline")
-        parent_chkpt = get_checkpoint_name(topology, wbits, abits, "dataflow_parent")
-        cppsim_chkpt = get_checkpoint_name(topology, wbits, abits, "cppsim")
+        prepostproc_chkpt = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "pre_post"
+        )
+        streamline_chkpt = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "streamline"
+        )
+        parent_chkpt = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "dataflow_parent"
+        )
+        cppsim_chkpt = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "cppsim"
+        )
         rtlsim_chkpt = get_checkpoint_name(
-            topology, wbits, abits, "ipstitch_rtlsim_" + kind
+            topology, wbits, abits, QONNX_export, "ipstitch_rtlsim_" + kind
         )
         dataset = topology2dataset(topology)
         assert measure_top1_accuracy(prepostproc_chkpt, dataset) > 80
@@ -570,11 +670,11 @@ class TestEnd2End:
     @pytest.mark.vivado
     @pytest.mark.vitis
     @pytest.mark.parametrize("kind", ["zynq", "alveo"])
-    def test_build(self, topology, wbits, abits, kind):
+    def test_build(self, topology, wbits, abits, QONNX_export, kind):
         if kind == "alveo" and ("VITIS_PATH" not in os.environ):
             pytest.skip("VITIS_PATH not set")
         prev_chkpt_name = get_checkpoint_name(
-            topology, wbits, abits, "fifodepth_" + kind
+            topology, wbits, abits, QONNX_export, "fifodepth_" + kind
         )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)
         cfg = get_build_env(kind, target_clk_ns)
@@ -584,24 +684,32 @@ class TestEnd2End:
         for (k, v) in synth_dct.items():
             update_dashboard_data(topology, wbits, abits, k, v)
         update_dashboard_data(topology, wbits, abits, "board", cfg["board"])
-        model.save(get_checkpoint_name(topology, wbits, abits, "build_" + kind))
+        model.save(
+            get_checkpoint_name(topology, wbits, abits, QONNX_export, "build_" + kind)
+        )
 
     @pytest.mark.slow
     @pytest.mark.vivado
     @pytest.mark.vitis
     @pytest.mark.parametrize("kind", ["zynq", "alveo"])
-    def test_make_pynq_driver(self, topology, wbits, abits, kind):
+    def test_make_pynq_driver(self, topology, wbits, abits, QONNX_export, kind):
         if kind == "alveo" and ("VITIS_PATH" not in os.environ):
             pytest.skip("VITIS_PATH not set")
-        prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "build_" + kind)
+        prev_chkpt_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "build_" + kind
+        )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)
         kind_to_driver_platform = {"zynq": "zynq-iodma", "alveo": "alveo"}
         model = model.transform(MakePYNQDriver(kind_to_driver_platform[kind]))
-        model.save(get_checkpoint_name(topology, wbits, abits, "driver_" + kind))
+        model.save(
+            get_checkpoint_name(topology, wbits, abits, QONNX_export, "driver_" + kind)
+        )
 
     @pytest.mark.parametrize("kind", ["zynq", "alveo"])
-    def test_deploy(self, topology, wbits, abits, kind):
-        prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "driver_" + kind)
+    def test_deploy(self, topology, wbits, abits, QONNX_export, kind):
+        prev_chkpt_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "driver_" + kind
+        )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)
         cfg = get_build_env(kind, target_clk_ns)
         if cfg["ip"] == "":
@@ -616,11 +724,15 @@ class TestEnd2End:
             )
         )
         # save the model to be able to link it to the parent
-        model.save(get_checkpoint_name(topology, wbits, abits, "deploy_" + kind))
+        model.save(
+            get_checkpoint_name(topology, wbits, abits, QONNX_export, "deploy_" + kind)
+        )
 
     @pytest.mark.parametrize("kind", ["zynq", "alveo"])
-    def test_run_on_hw(self, topology, wbits, abits, kind):
-        prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "deploy_" + kind)
+    def test_run_on_hw(self, topology, wbits, abits, QONNX_export, kind):
+        prev_chkpt_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "deploy_" + kind
+        )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)  # NOQA
         cfg = get_build_env(kind, target_clk_ns)
         if cfg["ip"] == "":
@@ -629,7 +741,7 @@ class TestEnd2End:
             topology, wbits, abits, return_topk=1
         )
         parent_model = load_test_checkpoint_or_skip(
-            get_checkpoint_name(topology, wbits, abits, "dataflow_parent")
+            get_checkpoint_name(topology, wbits, abits, QONNX_export, "dataflow_parent")
         )
         iname = parent_model.graph.input[0].name
         oname = parent_model.graph.output[0].name
@@ -641,8 +753,10 @@ class TestEnd2End:
         assert np.isclose(y, output_tensor_npy).all()
 
     @pytest.mark.parametrize("kind", ["zynq", "alveo"])
-    def test_throughput_hw(self, topology, wbits, abits, kind):
-        prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "deploy_" + kind)
+    def test_throughput_hw(self, topology, wbits, abits, QONNX_export, kind):
+        prev_chkpt_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "deploy_" + kind
+        )
         end2end_example = "%s_w%da%d_%s" % (topology, wbits, abits, kind)
         model = load_test_checkpoint_or_skip(prev_chkpt_name)  # NOQA
         cfg = get_build_env(kind, target_clk_ns)
@@ -698,9 +812,13 @@ class TestEnd2End:
             ret[largest_bsize]["throughput[images/s]"],
         )
 
-    def test_upload_results_to_dashboard(self, topology, wbits, abits):
-        dashboard_data = get_dashboard_data(topology, wbits, abits)
-        if len(dashboard_data.keys()) > 0:
-            upload_to_end2end_dashboard(dashboard_data)
+    def test_upload_results_to_dashboard(self, topology, wbits, abits, QONNX_export):
+        # ToDo: Extend the dashboard to also upload QONNX exported models?
+        if QONNX_export:
+            pytest.skip("Dashboard data upload is disabled for QONNX exported models.")
         else:
-            pytest.skip("No data to upload to dashboard")
+            dashboard_data = get_dashboard_data(topology, wbits, abits)
+            if len(dashboard_data.keys()) > 0:
+                upload_to_end2end_dashboard(dashboard_data)
+            else:
+                pytest.skip("No data to upload to dashboard")
diff --git a/tests/end2end/test_end2end_cybsec_mlp.py b/tests/end2end/test_end2end_cybsec_mlp.py
index 23a5d23f1a91798b834797b3a8ccc35d07b2e61a..e24d87ca6a505de7d0ed50b01157092eb0a26525 100644
--- a/tests/end2end/test_end2end_cybsec_mlp.py
+++ b/tests/end2end/test_end2end_cybsec_mlp.py
@@ -40,13 +40,16 @@ import torch
 import torch.nn as nn
 import wget
 from brevitas.core.quant import QuantType
+from brevitas.export.onnx.generic.manager import BrevitasONNXManager
 from brevitas.nn import QuantIdentity, QuantLinear, QuantReLU
 from brevitas.quant_tensor import QuantTensor
+from qonnx.util.cleanup import cleanup as qonnx_cleanup
 
 import finn.builder.build_dataflow as build
 import finn.builder.build_dataflow_config as build_cfg
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
+from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
 from finn.util.basic import make_build_dir
 from finn.util.test import get_build_env, load_test_checkpoint_or_skip
 
@@ -55,13 +58,13 @@ build_kind = "zynq"
 build_dir = os.environ["FINN_BUILD_DIR"]
 
 
-def get_checkpoint_name(step):
+def get_checkpoint_name(step, QONNX_export):
     if step == "build":
         # checkpoint for build step is an entire dir
-        return build_dir + "/end2end_cybsecmlp_build"
+        return build_dir + "/end2end_cybsecmlp_build_QONNX-%d" % (QONNX_export)
     else:
         # other checkpoints are onnx files
-        return build_dir + "/end2end_cybsecmlp_%s.onnx" % (step)
+        return build_dir + "/end2end_cybsecmlp_QONNX-%d_%s.onnx" % (QONNX_export, step)
 
 
 class CybSecMLPForExport(nn.Module):
@@ -82,7 +85,8 @@ class CybSecMLPForExport(nn.Module):
         return out_final
 
 
-def test_end2end_cybsec_mlp_export():
+@pytest.mark.parametrize("QONNX_export", [False, True])
+def test_end2end_cybsec_mlp_export(QONNX_export):
     assets_dir = pk.resource_filename("finn.qnn-data", "cybsec-mlp/")
     # load up trained net in Brevitas
     input_size = 593
@@ -116,7 +120,7 @@ def test_end2end_cybsec_mlp_export():
     W_new = np.pad(W_orig, [(0, 0), (0, 7)])
     model[0].weight.data = torch.from_numpy(W_new)
     model_for_export = CybSecMLPForExport(model)
-    export_onnx_path = get_checkpoint_name("export")
+    export_onnx_path = get_checkpoint_name("export", QONNX_export)
     input_shape = (1, 600)
     # create a QuantTensor instance to mark the input as bipolar during export
     input_a = np.random.randint(0, 1, size=input_shape).astype(np.float32)
@@ -127,34 +131,61 @@ def test_end2end_cybsec_mlp_export():
         input_t, scale=torch.tensor(scale), bit_width=torch.tensor(1.0), signed=True
     )
 
-    bo.export_finn_onnx(
-        model_for_export, export_path=export_onnx_path, input_t=input_qt
-    )
+    if QONNX_export:
+        # With the BrevitasONNXManager we need to manually set
+        # the FINN DataType at the input
+        BrevitasONNXManager.export(
+            model_for_export, input_shape, export_path=export_onnx_path
+        )
+        model = ModelWrapper(export_onnx_path)
+        model.set_tensor_datatype(model.graph.input[0].name, DataType["BIPOLAR"])
+        model.save(export_onnx_path)
+        qonnx_cleanup(export_onnx_path, out_file=export_onnx_path)
+        model = ModelWrapper(export_onnx_path)
+        model = model.transform(ConvertQONNXtoFINN())
+        model.save(export_onnx_path)
+    else:
+        bo.export_finn_onnx(
+            model_for_export, export_path=export_onnx_path, input_t=input_qt
+        )
     assert os.path.isfile(export_onnx_path)
     # fix input datatype
     finn_model = ModelWrapper(export_onnx_path)
     finnonnx_in_tensor_name = finn_model.graph.input[0].name
     assert tuple(finn_model.get_tensor_shape(finnonnx_in_tensor_name)) == (1, 600)
     # verify a few exported ops
-    assert finn_model.graph.node[1].op_type == "Add"
-    assert finn_model.graph.node[2].op_type == "Div"
-    assert finn_model.graph.node[3].op_type == "MatMul"
-    assert finn_model.graph.node[-1].op_type == "MultiThreshold"
+    if QONNX_export:
+        # The first "Mul" node doesn't exist in the QONNX export,
+        # because the QuantTensor scale is not exported.
+        # However, this node would have been unity scale anyways and
+        # the models are still equivalent.
+        assert finn_model.graph.node[0].op_type == "Add"
+        assert finn_model.graph.node[1].op_type == "Div"
+        assert finn_model.graph.node[2].op_type == "MatMul"
+        assert finn_model.graph.node[-1].op_type == "MultiThreshold"
+    else:
+        assert finn_model.graph.node[0].op_type == "Mul"
+        assert finn_model.get_initializer(finn_model.graph.node[0].input[1]) == 1.0
+        assert finn_model.graph.node[1].op_type == "Add"
+        assert finn_model.graph.node[2].op_type == "Div"
+        assert finn_model.graph.node[3].op_type == "MatMul"
+        assert finn_model.graph.node[-1].op_type == "MultiThreshold"
     # verify datatypes on some tensors
     assert (
         finn_model.get_tensor_datatype(finnonnx_in_tensor_name) == DataType["BIPOLAR"]
     )
-    first_matmul_w_name = finn_model.graph.node[3].input[1]
+    first_matmul_w_name = finn_model.get_nodes_by_op_type("MatMul")[0].input[1]
     assert finn_model.get_tensor_datatype(first_matmul_w_name) == DataType["INT2"]
 
 
 @pytest.mark.slow
 @pytest.mark.vivado
-def test_end2end_cybsec_mlp_build():
-    model_file = get_checkpoint_name("export")
+@pytest.mark.parametrize("QONNX_export", [False, True])
+def test_end2end_cybsec_mlp_build(QONNX_export):
+    model_file = get_checkpoint_name("export", QONNX_export)
     load_test_checkpoint_or_skip(model_file)
     build_env = get_build_env(build_kind, target_clk_ns)
-    output_dir = make_build_dir("test_end2end_cybsec_mlp_build")
+    output_dir = make_build_dir(f"test_end2end_cybsec_mlp_build_QONNX-{QONNX_export}")
 
     cfg = build.DataflowBuildConfig(
         output_dir=output_dir,
@@ -192,13 +223,14 @@ def test_end2end_cybsec_mlp_build():
         est_res_dict = json.load(f)
         assert est_res_dict["total"]["LUT"] == 11360.0
         assert est_res_dict["total"]["BRAM_18K"] == 36.0
-    shutil.copytree(output_dir + "/deploy", get_checkpoint_name("build"))
+    shutil.copytree(output_dir + "/deploy", get_checkpoint_name("build", QONNX_export))
 
 
-def test_end2end_cybsec_mlp_run_on_hw():
+@pytest.mark.parametrize("QONNX_export", [False, True])
+def test_end2end_cybsec_mlp_run_on_hw(QONNX_export):
     build_env = get_build_env(build_kind, target_clk_ns)
     assets_dir = pk.resource_filename("finn.qnn-data", "cybsec-mlp/")
-    deploy_dir = get_checkpoint_name("build")
+    deploy_dir = get_checkpoint_name("build", QONNX_export)
     if not os.path.isdir(deploy_dir):
         pytest.skip(deploy_dir + " not found from previous test step, skipping")
     driver_dir = deploy_dir + "/driver"
diff --git a/tests/end2end/test_end2end_mobilenet_v1.py b/tests/end2end/test_end2end_mobilenet_v1.py
index 0d639ae084af257db61c85ff0b5c0d5101539b71..e459bfbc3e694d5bbc9698db562765b11f6e8c38 100644
--- a/tests/end2end/test_end2end_mobilenet_v1.py
+++ b/tests/end2end/test_end2end_mobilenet_v1.py
@@ -62,9 +62,9 @@ from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.insert_topk import InsertTopK
 from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
 from finn.transformation.merge_onnx_models import MergeONNXModels
+from finn.transformation.remove import RemoveIdentityOps
 from finn.transformation.streamline import Streamline
 from finn.transformation.streamline.collapse_repeated import CollapseRepeatedMul
-from finn.transformation.streamline.remove import RemoveIdentityOps
 from finn.transformation.streamline.round_thresholds import RoundAndClipThresholds
 from finn.util.basic import alveo_default_platform, alveo_part_map
 from finn.util.pytorch import NormalizePreProc
diff --git a/tests/fpgadataflow/test_fpgadataflow_ipstitch.py b/tests/fpgadataflow/test_fpgadataflow_ipstitch.py
index 933da667ecb54d471d9e6d48ac4462421addad7e..a4e75f5254b3bfd96871dbf32b8400edc2d55379 100644
--- a/tests/fpgadataflow/test_fpgadataflow_ipstitch.py
+++ b/tests/fpgadataflow/test_fpgadataflow_ipstitch.py
@@ -44,7 +44,6 @@ from finn.transformation.fpgadataflow.floorplan import Floorplan
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.insert_iodma import InsertIODMA
 from finn.transformation.fpgadataflow.insert_tlastmarker import InsertTLastMarker
-from finn.transformation.fpgadataflow.make_deployment import DeployToPYNQ
 from finn.transformation.fpgadataflow.make_zynq_proj import ZynqBuild
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.synth_ooc import SynthOutOfContext
@@ -363,11 +362,6 @@ def test_fpgadataflow_ipstitch_zynqbuild(board):
         assert sdp_node.__class__.__name__ == "StreamingDataflowPartition"
         assert os.path.isfile(sdp_node.get_nodeattr("model"))
         model = load_test_checkpoint_or_skip(sdp_node.get_nodeattr("model"))
-    # generate inputs for remote exec
-    iname = "inp"
-    idt = model.get_tensor_datatype(iname)
-    ishape = model.get_tensor_shape(iname)
-    x = gen_finn_dt_tensor(idt, ishape)
     # bitfile using ZynqBuild
     model = model.transform(ZynqBuild(board, 10))
     model.save(ip_stitch_model_dir + "/test_fpgadataflow_ipstitch_customzynq.onnx")
@@ -375,22 +369,3 @@ def test_fpgadataflow_ipstitch_zynqbuild(board):
     bitfile_name = model.get_metadata_prop("bitfile")
     assert bitfile_name is not None
     assert os.path.isfile(bitfile_name)
-    # deployment
-    try:
-        ip = os.environ["PYNQ_IP"]  # no default for this one; skip if not defined
-        if ip == "":
-            pytest.skip("PYNQ board IP address not specified")
-        username = os.getenv("PYNQ_USERNAME", "xilinx")
-        password = os.getenv("PYNQ_PASSWORD", "xilinx")
-        port = os.getenv("PYNQ_PORT", 22)
-        target_dir = os.getenv("PYNQ_TARGET_DIR", "/home/xilinx/finn")
-        model = model.transform(DeployToPYNQ(ip, port, username, password, target_dir))
-        deployment_dir = model.get_metadata_prop("pynq_deploy_dir")
-        assert deployment_dir is not None
-        assert os.path.isdir(deployment_dir)
-        # remote exec
-        input_dict = {"global_in": x}
-        outp = execute_onnx(model, input_dict)
-        assert np.isclose(outp["global_out"], x).all()
-    except KeyError:
-        pytest.skip("PYNQ board IP address not specified")
diff --git a/tests/transformation/streamline/test_remove_identity_ops.py b/tests/transformation/streamline/test_remove_identity_ops.py
deleted file mode 100644
index ee4e42fc8417017184594b6e754f3e8270a46ee1..0000000000000000000000000000000000000000
--- a/tests/transformation/streamline/test_remove_identity_ops.py
+++ /dev/null
@@ -1,95 +0,0 @@
-import pytest
-
-import numpy as np
-from onnx import TensorProto, helper
-
-import finn.core.onnx_exec as oxe
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.streamline.remove import RemoveIdentityOps
-from finn.util.basic import gen_finn_dt_tensor
-
-
-def insert_identity_op(model, op, as_first_node, approx):
-    if approx:
-        zero_val = 0.000001
-        one_val = 0.999999
-    else:
-        zero_val = 0.0
-        one_val = 1.0
-    if op in ["Add", "Sub"]:
-        val = np.asarray([zero_val], dtype=np.float32)
-    elif op in ["Mul", "Div"]:
-        val = np.asarray([one_val], dtype=np.float32)
-    else:
-        return
-
-    graph = model.graph
-    if as_first_node:
-        identity_node = helper.make_node(op, ["inp", "value"], ["ident_out"])
-        graph.node.insert(0, identity_node)
-        graph.node[1].input[0] = "ident_out"
-    else:
-        identity_node = helper.make_node(op, ["div_out", "value"], ["ident_out"])
-        graph.node.insert(3, identity_node)
-        graph.node[-1].input[0] = "ident_out"
-    model.set_initializer("value", val)
-
-    return model
-
-
-# identity operations to be inserted
-@pytest.mark.parametrize("op", ["Add", "Sub", "Mul", "Div"])
-@pytest.mark.parametrize("approx", [False, True])
-@pytest.mark.parametrize("as_first_node", [False, True])
-def test_remove_identity_ops(op, as_first_node, approx):
-
-    # set up onnx model
-    inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, 4, 1, 1])
-    mul = helper.make_tensor_value_info("mul", TensorProto.FLOAT, [])
-    shape = helper.make_tensor_value_info("shape", TensorProto.FLOAT, [2])
-    div = helper.make_tensor_value_info("div", TensorProto.FLOAT, [])
-    matmul = helper.make_tensor_value_info("matmul", TensorProto.FLOAT, [4, 2])
-    outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, 2])
-
-    mul_node = helper.make_node("Mul", ["inp", "mul"], ["mul_out"])
-    reshape_node = helper.make_node("Reshape", ["mul_out", "shape"], ["reshape_out"])
-    div_node = helper.make_node("Div", ["reshape_out", "div"], ["div_out"])
-    matmul_node = helper.make_node("MatMul", ["div_out", "matmul"], ["outp"])
-
-    graph = helper.make_graph(
-        nodes=[mul_node, reshape_node, div_node, matmul_node],
-        name="identity-graph",
-        inputs=[inp],
-        outputs=[outp],
-        value_info=[mul, shape, div, matmul],
-    )
-
-    model = helper.make_model(graph, producer_name="mulpastconv-model")
-    model = ModelWrapper(model)
-    inp_values = gen_finn_dt_tensor(DataType["INT2"], [1, 4, 1, 1])
-    mul_values = np.random.uniform(low=0.1, high=0.99, size=(1)).astype(np.float32)
-    shape_values = np.asarray([1, -1], dtype=np.int64)
-    div_values = np.random.uniform(low=0.1, high=0.99, size=(1)).astype(np.float32)
-    matmul_values = gen_finn_dt_tensor(DataType["INT2"], [4, 2])
-    model.set_initializer("mul", mul_values)
-    model.set_initializer("shape", shape_values)
-    model.set_initializer("div", div_values)
-    model.set_initializer("matmul", matmul_values)
-    insert_identity_op(model, op, as_first_node, approx)
-    model = model.transform(InferShapes())
-    model = model.transform(InferDataTypes())
-    idict = {"inp": inp_values}
-    odict = oxe.execute_onnx(model, idict)
-    out_before = odict["outp"]
-    num_of_nodes_before = len(model.graph.node)
-
-    model = model.transform(RemoveIdentityOps())
-    num_of_nodes_after = len(model.graph.node)
-    assert num_of_nodes_before - 1 == num_of_nodes_after
-
-    odict = oxe.execute_onnx(model, idict)
-    out_after = odict["outp"]
-    assert np.isclose(out_before, out_after, atol=1e-3).all()
diff --git a/tests/transformation/test_qonnx_to_finn.py b/tests/transformation/test_qonnx_to_finn.py
new file mode 100644
index 0000000000000000000000000000000000000000..df7d63e3d2e139077f0fa20b10714c0a43a24e47
--- /dev/null
+++ b/tests/transformation/test_qonnx_to_finn.py
@@ -0,0 +1,175 @@
+# Copyright (c) 2021, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+import pkg_resources as pk
+
+import pytest
+
+import brevitas.export.onnx.generic as b_onnx
+import brevitas.onnx as bo
+import numpy as np
+import onnx
+import onnx.numpy_helper as nph
+import torch
+from pkgutil import get_data
+from qonnx.util.cleanup import cleanup
+from tempfile import TemporaryDirectory
+
+import finn.core.onnx_exec as oxe
+from finn.core.modelwrapper import ModelWrapper
+from finn.transformation.fold_constants import FoldConstants
+from finn.transformation.general import GiveUniqueNodeNames, RemoveStaticGraphInputs
+from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
+from finn.util.test import get_test_model_trained
+
+
+def get_brev_model_and_sample_inputs(model_name, wbits, abits):
+    if "FC" in model_name:
+        in_shape = (1, 1, 28, 28)
+        raw_i = get_data("finn.data", "onnx/mnist-conv/test_data_set_0/input_0.pb")
+        input_tensor = onnx.load_tensor_from_string(raw_i)
+        input_tensor = nph.to_array(input_tensor)
+        brev_model = get_test_model_trained(model_name, wbits, abits)
+    elif model_name == "CNV":
+        in_shape = (1, 3, 32, 32)
+        fn = pk.resource_filename(
+            "finn.qnn-data", "cifar10/cifar10-test-data-class3.npz"
+        )
+        input_tensor = np.load(fn)["arr_0"].astype(np.float32)
+        input_tensor = input_tensor / 255
+        brev_model = get_test_model_trained(model_name, wbits, abits)
+    elif model_name == "mobilenet":
+        in_shape = (1, 3, 224, 224)
+        np.random.seed(42)
+        input_tensor = np.random.normal(size=in_shape).astype(dtype=np.float32)
+        brev_model = get_test_model_trained(model_name, 4, 4)
+    else:
+        raise RuntimeError(f"The model with the name {model_name} is not supported.")
+
+    return brev_model, in_shape, input_tensor
+
+
+def analysis_testing_for_no_quant_nodes(model):
+    # Test that all Quant nodes have been converted to MultiThreshold nodes
+    # or folded into tensor initializers.
+
+    for op_type in ["BinaryQuant", "Quant", "Trunc"]:
+        q_count = len(model.get_nodes_by_op_type(op_type))
+        if q_count > 0:
+            raise ValueError(f"There should be no {op_type} nodes left in the graph.")
+
+    return dict()
+
+
+# This test currently takes about 4 min and 20 seconds
+@pytest.mark.parametrize("abits", [1, 2])
+@pytest.mark.parametrize("wbits", [1, 2])
+@pytest.mark.parametrize("model_name", ["TFC", "SFC", "LFC", "CNV", "mobilenet"])
+def test_QONNX_to_FINN(model_name, wbits, abits):
+    if wbits > abits:
+        pytest.skip("No wbits > abits cases at the moment")
+    if model_name == "LFC" and wbits == 2 and abits == 2:
+        pytest.skip("No LFC-w2a2 present at the moment")
+    if model_name == "mobilenet" and (wbits != 2 or abits != 2):
+        pytest.skip("Mobilenet only runs at W2A2, though it's technically W4A4.")
+
+    # Get test config and model
+    ATOL = 1e-7
+    brev_model, in_shape, input_tensor = get_brev_model_and_sample_inputs(
+        model_name, wbits, abits
+    )
+    temp_dir = TemporaryDirectory()
+    qonnx_base_path = temp_dir.name + "/qonnx_{}.onnx"
+    finn_base_path = temp_dir.name + "/finn_{}.onnx"
+
+    # Get Brevitas output
+    torch_input_tensor = torch.from_numpy(input_tensor).float()
+    brev_output = brev_model.forward(torch_input_tensor).detach().numpy()
+
+    # Get "clean" FINN model and it's output
+    _ = bo.export_finn_onnx(brev_model, in_shape, finn_base_path.format("raw"))
+    model = ModelWrapper(finn_base_path.format("raw"))
+    model = model.transform(GiveUniqueNodeNames())
+    model = model.transform(InferShapes())
+    model = model.transform(FoldConstants())
+    model = model.transform(RemoveStaticGraphInputs())
+    model.save(finn_base_path.format("clean"))
+
+    model = ModelWrapper(finn_base_path.format("clean"))
+    input_dict = {model.graph.input[0].name: input_tensor}
+    output_dict = oxe.execute_onnx(model, input_dict, False)
+    finn_export_output = output_dict[model.graph.output[0].name]
+    # This test always fails on MobileNet for some reason
+    if model_name != "mobilenet":
+        assert np.isclose(
+            brev_output, finn_export_output, atol=ATOL
+        ).all(), "The output of the Brevitas model and the FINN model should match."
+
+    # Get the equivalent QONNX model
+    b_onnx.function.DOMAIN_STRING = "finn.custom_op.general"
+    _ = b_onnx.manager.BrevitasONNXManager.export(
+        brev_model, in_shape, qonnx_base_path.format("raw")
+    )
+    cleanup(qonnx_base_path.format("raw"), out_file=qonnx_base_path.format("clean"))
+
+    # Compare output
+    model = ModelWrapper(qonnx_base_path.format("clean"))
+    input_dict = {model.graph.input[0].name: input_tensor}
+    output_dict = oxe.execute_onnx(model, input_dict, False)
+    qonnx_export_output = output_dict[model.graph.output[0].name]
+    assert np.isclose(
+        brev_output, qonnx_export_output, atol=ATOL
+    ).all(), "The output of the Brevitas model and the QONNX model should match."
+    # This test always fails on MobileNet for some reason
+    if model_name != "mobilenet":
+        assert np.isclose(
+            qonnx_export_output, finn_export_output, atol=ATOL
+        ).all(), "The output of the FINN model and the QONNX model should match."
+
+    # Run QONNX to FINN conversion
+    model = ModelWrapper(qonnx_base_path.format("clean"))
+    model = model.transform(ConvertQONNXtoFINN())
+    model.save(qonnx_base_path.format("whole_trafo"))
+
+    # Compare output
+    model = ModelWrapper(qonnx_base_path.format("whole_trafo"))
+    input_dict = {model.graph.input[0].name: input_tensor}
+    output_dict = oxe.execute_onnx(model, input_dict, False)
+    test_output = output_dict[model.graph.output[0].name]
+    assert np.isclose(test_output, finn_export_output, atol=ATOL).all(), (
+        "The output of the FINN model "
+        "and the QONNX -> FINN converted model should match."
+    )
+
+    # Run analysis passes on the converted model
+    model = ModelWrapper(qonnx_base_path.format("whole_trafo"))
+    _ = model.analysis(analysis_testing_for_no_quant_nodes)
+
+    temp_dir.cleanup()