diff --git a/AUTHORS.rst b/AUTHORS.rst
index 533ed62e1dbda2799f74805f2100769f9c4fecfc..1d42d35a3b269176fcab79d8239b84ac8442fa43 100644
--- a/AUTHORS.rst
+++ b/AUTHORS.rst
@@ -13,3 +13,12 @@ Contributors
 * Suranga Mahesh (@surangamh)
 * Peter Lehnhardt (@pete-lennart)
 * Neil Kim Nielsen (@neilkimn)
+* Jon Ander Lezeta (@jalezeta)
+* John Terry (@jterry-x)
+* Alina Vasilciuc (@alinavalinav)
+* Alessandro Pappalardo (@volcacius)
+* Giuseppe Franco (@Giuseppe5)
+* Syed Asad Alam (@asadalam)
+* Javier Duarte (@jmduarte)
+* Uma Maheshwari (@umav1511)
+* JosÃ© Rosa (@pinxau1000)
diff --git a/README.md b/README.md
index 10ac25cb8f9e23520830efa4f2f7a58a21370e29..f36eac3a911315c260f1849a0406a9a467f0d53f 100644
--- a/README.md
+++ b/README.md
@@ -24,9 +24,9 @@ Please see the [Getting Started](https://finn.readthedocs.io/en/latest/getting_s
 
 ## What's New in FINN?
 
+* **2021-11-05:** v0.7 is released, introducing QONNX support, three new example networks and many other improvements. Read more on the [v0.7 release blog post](https://xilinx.github.io/finn//2021/11/05/finn-v07-is-released.html).
 * **2021-06-15:** v0.6 is released, with ResNet-50 on U250 and ZCU104 MobileNet-v1 in finn-examples showcasing new features plus a lot more. Read more on the [v0.6 release blog post](https://xilinx.github.io/finn//2021/06/15/finn-v06-is-released.html).
 * **2020-12-17:** v0.5b (beta) is released, with a new [examples repo](https://github.com/Xilinx/finn-examples) including MobileNet-v1. Read more on the <a href="https://xilinx.github.io/finn/2020/12/17/finn-v05b-beta-is-released.html">release blog post</a>.
-* **2020-09-21:** v0.4b (beta) is released. Read more on the <a href="https://xilinx.github.io/finn/2020/09/21/finn-v04b-beta-is-released.html">release blog post</a>.
 
 ## Documentation
 
diff --git a/custom_hls/lookup.hpp b/custom_hls/lookup.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..3001f6613ec6ed9a9e5f47d9be356d4b032f7192
--- /dev/null
+++ b/custom_hls/lookup.hpp
@@ -0,0 +1,60 @@
+/******************************************************************************
+* Copyright (c) 2021, Xilinx
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are met:
+*
+* * Redistributions of source code must retain the above copyright notice, this
+*   list of conditions and the following disclaimer.
+*
+* * Redistributions in binary form must reproduce the above copyright notice,
+*   this list of conditions and the following disclaimer in the documentation
+*   and/or other materials provided with the distribution.
+*
+* * Neither the name of FINN nor the names of its
+*   contributors may be used to endorse or promote products derived from
+*   this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ *******************************************************************************/
+
+#include <ap_int.h>
+#include <hls_stream.h>
+
+#ifndef LOOKUP_HPP
+#define LOOKUP_HPP
+
+template <
+    unsigned NumEmbeddings,
+    unsigned EmbeddingDim,
+    unsigned NumInputs,
+    typename InputType,
+    typename EmbeddingType,
+    typename InputPackedType = ap_uint<InputType::width>,
+    typename OutputPackedType = ap_uint<EmbeddingDim*EmbeddingType::width>>
+void StreamingLookup(
+    hls::stream<InputPackedType> &in,
+    hls::stream<OutputPackedType> &out,
+    OutputPackedType const embeddings[NumEmbeddings]
+) {
+    for(unsigned i = 0; i < NumInputs; i++) {
+#pragma HLS PIPELINE II=1
+        InputPackedType inPackedElem = in.read();
+        InputType inElem = *(reinterpret_cast<InputType*>(&inPackedElem));
+        OutputPackedType outElem = embeddings[inElem];
+        out.write(outElem);
+    }
+}
+
+#endif
diff --git a/docker/Dockerfile.finn b/docker/Dockerfile.finn
index 7a34312c4836189938a839d6eb147f3b2a1d6887..b7cf883898b450c0eb6f46bfc6d4e05a30978d4d 100644
--- a/docker/Dockerfile.finn
+++ b/docker/Dockerfile.finn
@@ -89,18 +89,24 @@ RUN pip install -e git+https://github.com/fbcotter/dataset_loading.git@0.0.4#egg
 
 # git-based Python repo dependencies
 # these are installed in editable mode for easier co-development
-ARG FINN_BASE_COMMIT="ac0b86a63eb937b869bfa453a996a8a8b8506546"
-ARG FINN_EXP_COMMIT="f82c0d9868bb88ea045dfadb28508d327d287221"
-ARG BREVITAS_COMMIT="462f86cdc60f9915baf13afd1676fb21da44c2ee"
-ARG PYVERILATOR_COMMIT="e2ff74030de3992dcac54bf1b6aad2915946e8cb"
+ARG FINN_BASE_COMMIT="e8facdd719b55839cca46da2cc4f4a4a372afb41"
+ARG QONNX_COMMIT="9f9eff95227cc57aadc6eafcbd44b7acda89f067"
+ARG FINN_EXP_COMMIT="af6102769226b82b639f243dc36f065340991513"
+ARG BREVITAS_COMMIT="a5b71d6de1389d3e7db898fef72e014842670f03"
+ARG PYVERILATOR_COMMIT="0c3eb9343500fc1352a02c020a736c8c2db47e8e"
 ARG CNPY_COMMIT="4e8810b1a8637695171ed346ce68f6984e585ef4"
 ARG HLSLIB_COMMIT="6e06b2969dcfebbf963596b51b4fb70c5045acf6"
 ARG OMX_COMMIT="1dfc4aa2f2895632742cd5751520c6b472feb74e"
 ARG AVNET_BDF_COMMIT="2d49cfc25766f07792c0b314489f21fe916b639b"
+
 # finn-base
 RUN git clone https://github.com/Xilinx/finn-base.git /workspace/finn-base
 RUN git -C /workspace/finn-base checkout $FINN_BASE_COMMIT
 RUN pip install -e /workspace/finn-base
+# Install qonnx without dependencies, currently its only dependency is finn-base
+RUN git clone https://github.com/fastmachinelearning/qonnx.git /workspace/qonnx
+RUN git -C /workspace/qonnx checkout $QONNX_COMMIT
+RUN pip install --no-dependencies -e /workspace/qonnx
 # finn-experimental
 RUN git clone https://github.com/Xilinx/finn-experimental.git /workspace/finn-experimental
 RUN git -C /workspace/finn-experimental checkout $FINN_EXP_COMMIT
diff --git a/docs/finn/brevitas_export.rst b/docs/finn/brevitas_export.rst
index 65f6ab6b3053d9f11239b3c048143b3d2f346808..408b14fd2b6c99ce3ec128a0361a25b3f2c193a5 100644
--- a/docs/finn/brevitas_export.rst
+++ b/docs/finn/brevitas_export.rst
@@ -8,7 +8,13 @@ Brevitas Export
    :scale: 70%
    :align: center
 
-FINN expects an ONNX model as input. This can be a model trained with `Brevitas <https://github.com/Xilinx/brevitas>`_. Brevitas is a PyTorch library for quantization-aware training and the FINN Docker image comes with several `example Brevitas networks <https://github.com/Xilinx/brevitas/tree/master/brevitas_examples/bnn_pynq>`_. Brevitas provides an export of a quantized network in ONNX representation. The resulting model consists only of `ONNX standard nodes <https://github.com/onnx/onnx/blob/master/docs/Operators.md>`_, but also contains additional attributes for the ONNX nodes to represent low precision datatypes. To work with the model it is wrapped into :ref:`modelwrapper` provided by FINN.
+FINN expects an ONNX model as input. This can be a model trained with `Brevitas <https://github.com/Xilinx/brevitas>`_. Brevitas is a PyTorch library for quantization-aware training and the FINN Docker image comes with several `example Brevitas networks <https://github.com/Xilinx/brevitas/tree/master/brevitas_examples/bnn_pynq>`_. Brevitas provides an export of a quantized network in ONNX representation in several flavors.
+Two of the Brevitas-exported ONNX variants can be ingested by FINN:
+
+   * FINN-ONNX: Quantized weights exported as tensors with additional attributes to mark low-precision datatypes. Quantized activations exported as MultiThreshold nodes.
+   * QONNX: All quantization is represented using Quant, BinaryQuant or Trunc nodes. QONNX must be converted into FINN-ONNX by :py:mod:`finn.transformation.qonnx.convert_qonnx_to_finn`
+
+To work with either type of ONNX model, it is loaded into a :ref:`modelwrapper` provided by FINN.
 
 At this stage we can already use the functional verification flow to simulate the model using Python, this is marked in the graphic with the dotted arrow. For more details please have look at :ref:`verification`.
 
diff --git a/docs/finn/developers.rst b/docs/finn/developers.rst
index 6e7fa0d920a943e468fd70464b050ab74cf8ec7d..508cd86a31b6284e072499987ae45864d3942e16 100644
--- a/docs/finn/developers.rst
+++ b/docs/finn/developers.rst
@@ -7,7 +7,7 @@ Developer documentation
 This page is intended to serve as a starting point for new FINN developers.
 Power users may also find this information useful.
 
-Getting started
+Prerequisites
 ================
 
 Before starting to do development on FINN it's a good idea to start
diff --git a/docs/finn/faq.rst b/docs/finn/faq.rst
index 87e36e0722e4db6b2efd5de5df343b7bdf68a719..e426bdb4e28dd02c83b47d59b59c318840815f78 100644
--- a/docs/finn/faq.rst
+++ b/docs/finn/faq.rst
@@ -4,68 +4,109 @@
 Frequently Asked Questions
 ***********************
 
-.. note:: **This page is under construction.**
+Can't find the answer to your question here? Check `FINN GitHub Discussions <https://github.com/Xilinx/finn/discussions>`_.
 
-Can I install FINN out of the Docker container?
-===============================================
 
-We do not support out of the Docker implementations at the moment. This is due
-to the high complexity of the FINN project dependencies.
+Can I install FINN out of the Docker container?
+    We do not support out of the Docker implementations at the moment. This is due
+    to the high complexity of the FINN project dependencies.
 
 Since FINN uses ONNX, can I compile any model from the ONNX Model Zoo to an FPGA accelerator?
-=============================================================================================
+    The short answer is no. FINN uses ONNX in a specific (non-standard) way, including custom layer
+    types and quantization annotations. Networks must be first quantized using Brevitas and exported
+    to FINN-ONNX to be converted to FPGA accelerators.
 
-The short answer is no. FINN uses ONNX in a specific (non-standard) way, including custom layer
-types and quantization annotations. Networks must be first quantized using Brevitas and exported
-to FINN-ONNX to be converted to FPGA accelerators.
 
+Can I install FINN out of the Docker container?
+    We do not support out of the Docker implementations at the moment. This is due
+    to the high complexity of the FINN project dependencies.
 
-Can I deploy custom NNs with arbitrary precisions and layers using FINN?
-=========================================================================
+Since FINN uses ONNX, can I compile any model from the ONNX Model Zoo to an FPGA accelerator?
+    The short answer is no. FINN uses ONNX in a specific (non-standard) way, including custom layer
+    types and quantization annotations. Networks must be first quantized using Brevitas and exported
+    to FINN-ONNX to be converted to FPGA accelerators.
 
-Yes, though the effort required and quality of results will vary.
-Although we do support arbitrary
-precision, the way we create the hardware isn't typically practical for more than
-4 bits, or very large networks for a single FPGA.
-In terms of layers, only a subset of quantized layers covered by the various FINN examples
-are currently supported.
-It is possible to add support for new layers, though we don't have tutorials for this in place
-just yet.
 
-Does FINN only work with the example networks?
-==============================================
+Can I deploy custom NNs with arbitrary precisions and layers using FINN?
+    Yes, though the effort required and quality of results will vary.
+    Although we do support arbitrary
+    precision, the way we create the hardware isn't typically practical for more than
+    4 bits, or very large networks for a single FPGA.
+    In terms of layers, only a subset of quantized layers covered by the various FINN examples
+    are currently supported.
+    It is possible to add support for new layers, though we don't have tutorials for this in place
+    just yet.
 
-FINN isn't restricted to the example networks;
-rather, it's restricted to certain patterns (e.g. certain layer types and their combinations).
-The current best practice for custom networks is to take a working network and gradually modify it.
+Does FINN only work with the example networks?
+    FINN isn't restricted to the example networks;
+    rather, it's restricted to certain patterns (e.g. certain layer types and their combinations).
+    The current best practice for custom networks is to take a working network and gradually modify it.
 
 What is the expected background for using FINN?
-===============================================
-
-Some general knowledge of Python, Docker, machine learning with neural networks and Jupyter notebooks
-is expected.
-Our goal is to make the tool in a shape and form so that no hardware/FPGA background
-should be necessary, although having some knowledge would give better results.
+    Some general knowledge of Python, Docker, machine learning with neural networks and Jupyter notebooks
+    is expected.
+    Our goal is to make the tool in a shape and form so that no hardware/FPGA background
+    should be necessary, although having some knowledge would give better results.
 
 What operating systems are supported by FINN?
-=============================================
-
-FINN should work fine under any Linux-based OS capable of running Vivado/Vitis, as long
-as you install Docker (``docker-ce``) on your machine .
+    FINN should work fine under any Linux-based OS capable of running Vivado/Vitis, as long
+    as you install Docker (``docker-ce``) on your machine.
 
 
 I am getting DocNav and Model_Composer errors when launching the Docker image.
-==============================================================================
-
-We do not mount those particular directories into the Docker container because they are not
-used. The errors are Vivado related but you can safely ignore them.
+    We do not mount those particular directories into the Docker container because they are not
+    used. The errors are Vivado related but you can safely ignore them.
 
 What board do you recommend to start working with FINN?
-=======================================================
-
-Our preferred target platforms are those supported by  `PYNQ <http://www.pynq.io/board.html>`_.
-For those boards we can offer end-to-end (DNN-to-bitstream) deployment,
-see the `finn-examples <https://github.com/Xilinx/finn-examples>`_ repository for some examples.
-However, FINN also supports Vivado IP Integrator designs. The IPs connect using AXI stream (FIFO)
-in-and-out interfaces. This means that it can be integrated onto any Xilinx FPGA board,
-though you will have to do the system integration manually.
+    Our preferred target platforms are those supported by  `PYNQ <http://www.pynq.io/board.html>`_.
+    For those boards we can offer end-to-end (DNN-to-bitstream) deployment,
+    see the `finn-examples <https://github.com/Xilinx/finn-examples>`_ repository for some examples.
+    However, FINN also supports Vivado IP Integrator designs. The IPs connect using AXI stream (FIFO)
+    in-and-out interfaces. This means that it can be integrated onto any Xilinx FPGA board,
+    though you will have to do the system integration manually.
+
+FINN-generated builds break after I restart my computer, because ``/tmp`` gets wiped.
+    See https://github.com/Xilinx/finn/discussions/404
+
+How can I target an arbitrary Xilinx FPGA without PYNQ support?
+    See https://github.com/Xilinx/finn/discussions/387
+
+Why does FINN-generated architectures need FIFOs between layers?
+    See https://github.com/Xilinx/finn/discussions/383
+
+How do I tell FINN to utilize DSPs instead of LUTs for MAC operations in particular layers?
+    This is done with the ``resType="dsp"`` attribute on ``StreamingFCLayer`` and ``Vector_Vector_Activate`` instances.
+    When using the ``build_dataflow`` system, this can be specified at a per layer basis by specifying it as part of one or more layersâ€™
+    folding config (:py:mod:`finn.builder.build_dataflow_config.DataflowBuildConfig.folding_config_file`).
+    This is a good idea for layers with more weight/input act bits and high PE*SIMD.
+    See the `MobileNet-v1 build config for ZCU104 in finn-examples <https://github.com/Xilinx/finn-examples/blob/main/build/mobilenet-v1/folding_config/ZCU104_folding_config.json#L15>`_ for reference.
+
+
+How do I tell FINN to utilize a particular type of memory resource in particular layers?
+    This is done with the ``ram_style`` attribute. Check the particular ``HLSCustomOp`` attribute definition to see
+    which modes are supported (`example for StreamingFCLayer <https://github.com/Xilinx/finn/blob/dev/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py#L95>`_).
+    When using the ``build_dataflow`` system, this can be specified at a per layer basis by specifying it as part of one or more layersâ€™
+    folding config (:py:mod:`finn.builder.build_dataflow_config.DataflowBuildConfig.folding_config_file`).
+    See the `MobileNet-v1 build config for ZCU104 in finn-examples <https://github.com/Xilinx/finn-examples/blob/main/build/mobilenet-v1/folding_config/ZCU104_folding_config.json#L15>`_ for reference.
+
+Which data layout do FINN-generated accelerators use? Big-endian? Little-endian?
+    The data layout used by FINN does not correspond to system-level big or little endian due to difficulties in defining what
+    the â€œword sizeâ€ is and bit packing for smaller datatypes. FINNâ€™s â€œword sizeâ€ is dependent on the parallelization of the
+    first/last layers. For instance, if the first HLS layer is using SIMD=3 this means the â€œinnermost dimensionâ€ in the
+    data packing functions will be of size 3.
+    When you use the verification infrastructure or the generated PYNQ Python drivers that FINN provides, the tool normally
+    takes care of any required data layout conversion on standard numpy arrays before presenting the data to the accelerator,
+    and vice versa on the output side. Doing this data packing and layout conversion manually can be messy at the moment.
+    If you need to do this manually, first examine how the `FINN PYNQ Python drivers <https://github.com/Xilinx/finn-examples/blob/main/finn_examples/driver.py#L379>`_ do this â€“ notice how the input data is
+    first reshaped to create the â€œfolded input shapeâ€ that reflects the word size of the first layer based on how much it
+    was parallelized, then data packing is applied to obtain a raw byte array (with some reversals going on) that can be
+    fed directly to the hardware. Another example of this is the `npy_to_rtlsim_input <https://github.com/Xilinx/finn-base/blob/dev/src/finn/util/data_packing.py#L289>`_ function, which converts npy arrays to lists of Python arbitrary-precision integers that we feed into pyverilator for rtl simulation:
+
+Why does FIFO sizing take so long for my network? Is something wrong?
+    The automatic FIFO sizing in FINN can take quite long. It unfortunately doesnâ€™t really parallelize on multiple cores since
+    itâ€™s based on running an rtl simulation with lots of inputs and very large FIFOs, then observing the max occupancy/count
+    in each FIFO.
+
+What's a good starting point for the folding configuration if I want to make manual changes?
+    First, enable automatic folding options in ``build_dataflow`` such ``target_fps``. This should find a decent set of
+    folding factors and save them to ``output_folder/auto_folding_config.json`` which you can use as a basis for creating the desired config.
diff --git a/docs/finn/getting_started.rst b/docs/finn/getting_started.rst
index 14a1ec44a00fc9448b067bae6480091897f47472..af7a05751b0b2f9c991849e2c808e089aaac68d9 100644
--- a/docs/finn/getting_started.rst
+++ b/docs/finn/getting_started.rst
@@ -12,7 +12,8 @@ Quickstart
 3. Clone the FINN compiler from the repo: ``git clone https://github.com/Xilinx/finn/`` and go into the directory where it is cloned
 4. Execute ``./run-docker.sh quicktest`` to verify your installation.
 5. Optionally, follow the instructions on :ref:`PYNQ board first-time setup` or :ref:`Alveo first-time setup` for board setup.
-6. All done! See :ref:`Running FINN in Docker` for the various options on how to run the FINN compiler.
+6. Optionally, set up a `Vivado/Vitis license`_.
+7. All done! See :ref:`Running FINN in Docker` for the various options on how to run the FINN compiler.
 
 
 How do I use FINN?
@@ -28,7 +29,7 @@ In general, the approach for using the FINN framework is as follows:
 
 1. Train your own quantized neural network (QNN) in `Brevitas <https://github.com/Xilinx/brevitas>`_. We have some `guidelines <https://bit.ly/finn-hls4ml-qat-guidelines>`_ on quantization-aware training (QAT).
 2. Export to FINN-ONNX by following `this tutorial <https://github.com/Xilinx/finn/blob/master/notebooks/basics/1_brevitas_network_import.ipynb>`_ .
-3. Use FINN's ``build_dataflow`` system on the exported model by following `this tutorial <https://github.com/Xilinx/finn/blob/master/notebooks/end2end_example/cybersecurity/3-build-accelerator-with-finn.ipynb>`_
+3. Use FINN's ``build_dataflow`` system on the exported model by following this `tutorial <https://github.com/Xilinx/finn/blob/master/notebooks/end2end_example/cybersecurity/3-build-accelerator-with-finn.ipynb>`_
 4. Adjust your QNN topology, quantization settings and ``build_dataflow`` configuration to get the desired results.
 
 Please note that the framework is still under development, and how well this works will depend on how similar your custom network is to the examples we provide.
@@ -111,6 +112,7 @@ These are summarized below:
 * (optional) ``FINN_DOCKER_TAG`` (autogenerated) specifies the Docker image tag to use.
 * (optional) ``FINN_DOCKER_RUN_AS_ROOT`` (default 0) if set to 1 then run Docker container as root, default is the current user.
 * (optional) ``FINN_DOCKER_GPU`` (autodetected) if not 0 then expose all Nvidia GPUs or those selected by ``NVIDIA_VISIBLE_DEVICES`` to Docker container for accelerated DNN training. Requires `Nvidia Container Toolkit <https://github.com/NVIDIA/nvidia-docker>`_
+* (optional) ``FINN_DOCKER_EXTRA`` (default "") pass extra arguments to the ``docker run`` command when executing ``./run-docker.sh``
 * (optional) ``NVIDIA_VISIBLE_DEVICES`` (default "") specifies specific Nvidia GPUs to use in Docker container. Possible values are a comma-separated list of GPU UUID(s) or index(es) e.g. ``0,1,2``, ``all``, ``none``, or void/empty/unset.
 * (optional) ``DOCKER_BUILDKIT`` (default "1") enables `Docker BuildKit <https://docs.docker.com/develop/develop-images/build_enhancements/>`_ for faster Docker image rebuilding (recommended).
 
@@ -181,15 +183,26 @@ On the host side:
 5. `Set up public key authentication <https://www.digitalocean.com/community/tutorials/how-to-configure-ssh-key-based-authentication-on-a-linux-server>`_. Copy your private key to the ``finn/ssh_keys`` folder on the host to get password-less deployment and remote execution.
 6. Done! You can try the ``test_end2end_vitis`` tests in the FINN Docker to verify your setup, although this will take some time.
 
+Vivado/Vitis license
+*********************
+If you are targeting Xilinx FPGA parts that needs specific licenses (non-WebPack) you can make these available to the
+FINN Docker container by passing extra arguments. To do this, you can use the ``FINN_DOCKER_EXTRA`` environment variable as follows:
 
+::
+
+  export FINN_DOCKER_EXTRA=" -v /path/to/licenses:/path/to/licenses -e XILINXD_LICENSE_FILE=/path/to/licenses "
+
+The above example mounts ``/path/to/licenses`` from the host into the same path on the Docker container, and sets the
+value of the ``XILINXD_LICENSE_FILE`` environment variable.
 
 System Requirements
 ====================
 
 * Ubuntu 18.04 with ``bash`` installed
 * Docker `without root <https://docs.docker.com/engine/install/linux-postinstall/#manage-docker-as-a-non-root-user>`_
-* A working Vivado 2019.1 or 2020.1 installation
-* A ``VIVADO_PATH`` environment variable pointing to the Vivado installation directory (e.g. the directory where settings64.sh is located)
+* A working Vivado 2020.1 installation
+* ``FINN_XILINX_PATH`` and ``FINN_XILINX_VERSION`` environment variables correctly set, see `Quickstart`_
+* *(optional)* `Vivado/Vitis license`_ if targeting non-WebPack FPGA parts.
 * *(optional)* A PYNQ board with a network connection, see `PYNQ board first-time setup`_
 * *(optional)* An Alveo board, and a working Vitis 2020.1 installation if you want to use Vitis and Alveo (see `Alveo first-time setup`_ )
 
diff --git a/docs/finn/internals.rst b/docs/finn/internals.rst
index 0fbc3cf72795005591994ddca0fa0d58b72622a8..9305f7840216f6d076a11337ddb3cfa588f1a062 100644
--- a/docs/finn/internals.rst
+++ b/docs/finn/internals.rst
@@ -4,12 +4,12 @@
 Internals
 *********
 
-Intermediate Representation: FINN-ONNX
-======================================
+Intermediate Representation: QONNX and FINN-ONNX
+================================================
 
 FINN uses `ONNX <https://github.com/onnx/onnx>`_ as an intermediate representation (IR) for neural networks. As such, almost every component inside FINN uses ONNX and its `Python API <https://github.com/onnx/onnx/blob/master/docs/PythonAPIOverview.md>`_, so you may want to familiarize yourself with how ONNX represents DNNs. Specifically, the `ONNX protobuf description <https://github.com/onnx/onnx/blob/master/onnx/onnx.proto>`_ (or its `human-readable documentation <https://github.com/onnx/onnx/blob/master/docs/IR.md>`_ and the `operator schemas <https://github.com/onnx/onnx/blob/master/docs/Operators.md>`_ are useful as reference documents. We also provide a Jupyter notebook that can help to get familiar with ONNX by showing how to work with a simple ONNX model in FINN, see chapter :ref:`tutorials` for details.
 
-.. note:: FINN uses ONNX is a specific way that we refer to as FINN-ONNX, and not all ONNX graphs are supported by FINN (and vice versa).
+.. note:: FINN supports two specialized variants of ONNX called QONNX and FINN-ONNX, and not all ONNX graphs are supported by FINN (and vice versa).
 
 Custom Quantization Annotations
 ===============================
diff --git a/docs/finn/source_code/finn.analysis.rst b/docs/finn/source_code/finn.analysis.rst
index 7312150657c86976638e73fdf2c0450160989a6a..1de42ac32bc62ce71e039f63168302b22711f454 100644
--- a/docs/finn/source_code/finn.analysis.rst
+++ b/docs/finn/source_code/finn.analysis.rst
@@ -23,6 +23,13 @@ finn.analysis.base
    :undoc-members:
    :show-inheritance:
 
+finn.analysis.inference\_cost
+-----------------------------
+
+.. automodule:: finn.analysis.inference_cost
+   :members:
+   :undoc-members:
+   :show-inheritance:
 
 finn.analysis.topology
 -----------------------------
diff --git a/docs/finn/source_code/finn.custom_op.fpgadataflow.rst b/docs/finn/source_code/finn.custom_op.fpgadataflow.rst
index 7b4e7bfa05f895cd03aed2859576e07db28bd9f9..34a6285f227690c87c568855e7ca70ddb9b2764c 100644
--- a/docs/finn/source_code/finn.custom_op.fpgadataflow.rst
+++ b/docs/finn/source_code/finn.custom_op.fpgadataflow.rst
@@ -13,6 +13,23 @@ Base Class
    :undoc-members:
    :show-inheritance:
 
+finn.custom\_op.fpgadataflow.addstreams\_batch
+-----------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.addstreams_batch
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.custom\_op.fpgadataflow.channelwise\_op\_batch
+-----------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.channelwise_op_batch
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+
 finn.custom\_op.fpgadataflow.convolutioninputgenerator
 -------------------------------------------------------------
 
@@ -21,6 +38,87 @@ finn.custom\_op.fpgadataflow.convolutioninputgenerator
    :undoc-members:
    :show-inheritance:
 
+finn.custom\_op.fpgadataflow.convolutioninputgenerator1d
+-------------------------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.convolutioninputgenerator1d
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.custom\_op.fpgadataflow.downsampler
+-----------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.downsampler
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.custom\_op.fpgadataflow.duplicatestreams\_batch
+-----------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.duplicatestreams_batch
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.custom\_op.fpgadataflow.fmpadding\_batch
+-----------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.fmpadding_batch
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.custom\_op.fpgadataflow.globalaccpool\_batch
+-----------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.globalaccpool_batch
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.custom\_op.fpgadataflow.iodma
+-----------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.iodma
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.custom\_op.fpgadataflow.labelselect\_batch
+-----------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.labelselect_batch
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.custom\_op.fpgadataflow.lookup
+-----------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.lookup
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.custom\_op.fpgadataflow.pool\_batch
+-----------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.pool_batch
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.custom\_op.fpgadataflow.streamingdataflowpartition
+--------------------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.streamingdataflowpartition
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+
 finn.custom\_op.fpgadataflow.streamingdatawidthconverter\_batch
 ----------------------------------------------------------------------
 
@@ -61,6 +159,15 @@ finn.custom\_op.fpgadataflow.templates
    :undoc-members:
    :show-inheritance:
 
+finn.custom\_op.fpgadataflow.thresholding\_batch
+-----------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.thresholding_batch
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+
 finn.custom\_op.fpgadataflow.tlastmarker
 -----------------------------------------------
 
@@ -68,3 +175,19 @@ finn.custom\_op.fpgadataflow.tlastmarker
    :members:
    :undoc-members:
    :show-inheritance:
+
+finn.custom\_op.fpgadataflow.upsampler
+-----------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.upsampler
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.custom\_op.fpgadataflow.vector\_vector\_activate\_batch
+-----------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.vector_vector_activate_batch
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/finn/source_code/finn.custom_op.general.rst b/docs/finn/source_code/finn.custom_op.general.rst
index e86774a48e22b5af9e4d2995a4287a740b1c08e5..87749fd69e541e628436aa904c180338418addc1 100644
--- a/docs/finn/source_code/finn.custom_op.general.rst
+++ b/docs/finn/source_code/finn.custom_op.general.rst
@@ -5,6 +5,14 @@ Custom Op - General
 General Custom Ops
 ===================
 
+finn.custom\_op.general.bipolar_quant
+--------------------------------------
+
+.. automodule:: finn.custom_op.general.bipolar_quant
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 finn.custom\_op.general.debugmarker
 -----------------------------------
 
@@ -13,6 +21,14 @@ finn.custom\_op.general.debugmarker
    :undoc-members:
    :show-inheritance:
 
+finn.custom\_op.general.genericpartition
+-----------------------------------------
+
+.. automodule:: finn.custom_op.general.genericpartition
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 finn.custom\_op.general.im2col
 ------------------------------
 
@@ -37,6 +53,14 @@ finn.custom\_op.general.multithreshold
    :undoc-members:
    :show-inheritance:
 
+finn.custom\_op.general.quant
+------------------------------
+
+.. automodule:: finn.custom_op.general.quant
+  :members:
+  :undoc-members:
+  :show-inheritance:
+
 finn.custom\_op.general.quantavgpool2d
 --------------------------------------
 
@@ -45,13 +69,13 @@ finn.custom\_op.general.quantavgpool2d
   :undoc-members:
   :show-inheritance:
 
-finn.custom\_op.general.streamingdataflowpartition
----------------------------------------------------
+finn.custom\_op.general.trunc
+------------------------------
 
-.. automodule:: finn.custom_op.general.streamingdataflowpartition
-   :members:
-   :undoc-members:
-   :show-inheritance:
+.. automodule:: finn.custom_op.general.trunc
+  :members:
+  :undoc-members:
+  :show-inheritance:
 
 finn.custom\_op.general.xnorpopcount
 -------------------------------------
diff --git a/docs/finn/source_code/finn.transformation.fpgadataflow.rst b/docs/finn/source_code/finn.transformation.fpgadataflow.rst
index 42bc7fb5315756b924e0d1cce58ca4e110bda824..b1e7075bdcfb675a894f3e66b61d59117e4f078d 100644
--- a/docs/finn/source_code/finn.transformation.fpgadataflow.rst
+++ b/docs/finn/source_code/finn.transformation.fpgadataflow.rst
@@ -62,6 +62,14 @@ finn.transformation.fpgadataflow.create\_stitched\_ip
    :undoc-members:
    :show-inheritance:
 
+finn.transformation.fpgadataflow.externalize\_params
+------------------------------------------------------------
+
+.. automodule:: finn.transformation.fpgadataflow.externalize_params
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 finn.transformation.fpgadataflow.floorplan
 ----------------------------------------------------
 
diff --git a/docs/finn/source_code/finn.transformation.qonnx.rst b/docs/finn/source_code/finn.transformation.qonnx.rst
new file mode 100644
index 0000000000000000000000000000000000000000..8320e19efb81dd5a52f750e22e280f41070bf48c
--- /dev/null
+++ b/docs/finn/source_code/finn.transformation.qonnx.rst
@@ -0,0 +1,51 @@
+***********************
+Transformation - QONNX
+************************
+
+Transformation (QONNX)
+===========================
+
+.. automodule:: finn.transformation.qonnx
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.transformation.qonnx.convert\_qonnx\_to\_finn
+---------------------------------------------------
+
+.. automodule:: finn.transformation.qonnx.convert_qonnx_to_finn
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.transformation.qonnx.fold\_quant\_weights
+-----------------------------------------------
+
+.. automodule:: finn.transformation.qonnx.fold_quant_weights
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.transformation.qonnx.infer\_quant\_avg\_pool\_2d
+------------------------------------------------------
+
+.. automodule:: finn.transformation.qonnx.infer_quant_avg_pool_2d
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.transformation.qonnx.qonnx\_activation\_handlers
+-------------------------------------------------------
+
+.. automodule:: finn.transformation.qonnx.qonnx_activation_handlers
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.transformation.qonnx.quant\_act\_to\_multithreshold
+---------------------------------------------------------
+
+.. automodule:: finn.transformation.qonnx.quant_act_to_multithreshold
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/finn/source_code/finn.transformation.rst b/docs/finn/source_code/finn.transformation.rst
index aeb0d7614222740315633f7658cab9cc7e75490b..cffb0fd0f9e963c02a6986e47e1654951ad3bab0 100644
--- a/docs/finn/source_code/finn.transformation.rst
+++ b/docs/finn/source_code/finn.transformation.rst
@@ -11,6 +11,7 @@ Submodules
    :maxdepth: 2
 
    finn.transformation.fpgadataflow
+   finn.transformation.qonnx
    finn.transformation.streamline
 
 Transformation Passes
@@ -40,6 +41,14 @@ finn.transformation.bipolar\_to\_xnor
    :undoc-members:
    :show-inheritance:
 
+finn.transformation.change\_3d\_tensors\_to\_4d
+------------------------------------------------
+
+.. automodule:: finn.transformation.change_3d_tensors_to_4d
+  :members:
+  :undoc-members:
+  :show-inheritance:
+
 finn.transformation.change\_datalayout
 --------------------------------------------
 
@@ -48,6 +57,13 @@ finn.transformation.change\_datalayout
   :undoc-members:
   :show-inheritance:
 
+finn.transformation.create\_generic\_partitions
+------------------------------------------------
+
+.. automodule:: finn.transformation.create_generic_partitions
+  :members:
+  :undoc-members:
+  :show-inheritance:
 
 finn.transformation.double\_to\_single\_float
 ----------------------------------------------------
@@ -57,6 +73,23 @@ finn.transformation.double\_to\_single\_float
    :undoc-members:
    :show-inheritance:
 
+finn.transformation.extend\_partition
+------------------------------------------
+
+.. automodule:: finn.transformation.extend_partition
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.transformation.extract\_conv\_bias
+------------------------------------------
+
+.. automodule:: finn.transformation.extract_conv_bias
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+
 finn.transformation.fold\_constants
 ------------------------------------------
 
@@ -65,6 +98,14 @@ finn.transformation.fold\_constants
    :undoc-members:
    :show-inheritance:
 
+finn.transformation.gemm\_to\_matmul
+------------------------------------------
+
+.. automodule:: finn.transformation.gemm_to_matmul
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 finn.transformation.general
 ----------------------------------
 
@@ -113,6 +154,13 @@ finn.transformation.lower\_convs\_to\_matmul
    :undoc-members:
    :show-inheritance:
 
+finn.transformation.make\_input\_chanlast
+------------------------------------------
+
+.. automodule:: finn.transformation.make_input_chanlast
+  :members:
+  :undoc-members:
+  :show-inheritance:
 
 finn.transformation.merge\_onnx\_models
 ----------------------------------------
@@ -130,3 +178,11 @@ finn.transformation.move\_reshape
    :members:
    :undoc-members:
    :show-inheritance:
+
+finn.transformation.remove
+-------------------------------------
+
+.. automodule:: finn.transformation.remove
+  :members:
+  :undoc-members:
+  :show-inheritance:
diff --git a/docs/finn/source_code/finn.transformation.streamline.rst b/docs/finn/source_code/finn.transformation.streamline.rst
index f43d6d12314d3bad38f189d2831e21447f10cf10..9ed4bbe1d8c6b12c67e1c0c2927e8b7067410a9c 100644
--- a/docs/finn/source_code/finn.transformation.streamline.rst
+++ b/docs/finn/source_code/finn.transformation.streamline.rst
@@ -26,13 +26,6 @@ finn.transformation.streamline.collapse\_repeated
    :undoc-members:
    :show-inheritance:
 
-finn.transformation.streamline.remove
--------------------------------------
-
-.. automodule:: finn.transformation.streamline.remove
-  :members:
-  :undoc-members:
-  :show-inheritance:
 
 finn.transformation.streamline.reorder
 ---------------------------------------------
diff --git a/docs/finn/source_code/finn.util.rst b/docs/finn/source_code/finn.util.rst
index 82e4bf3261582c9be622cbe3f15af38ba5e3fa41..62b72c2ac84567b20fee73a16e82b5857d698c9d 100644
--- a/docs/finn/source_code/finn.util.rst
+++ b/docs/finn/source_code/finn.util.rst
@@ -72,6 +72,15 @@ finn.util.onnx
    :undoc-members:
    :show-inheritance:
 
+finn.util.platforms
+--------------------
+
+.. automodule:: finn.util.platforms
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+
 finn.util.pytorch
 ------------------
 
diff --git a/finn-rtllib/memstream/component.xml b/finn-rtllib/memstream/component.xml
index 3d6767abfc11eb114ddb084f1f7275f7a93d0607..1e5b710dc86bde4d442ce9e83b188aeed24388c5 100644
--- a/finn-rtllib/memstream/component.xml
+++ b/finn-rtllib/memstream/component.xml
@@ -1662,9 +1662,27 @@
   <spirit:vendorExtensions>
     <xilinx:coreExtensions>
       <xilinx:supportedFamilies>
-        <xilinx:family xilinx:lifeCycle="Production">zynq</xilinx:family>
-        <xilinx:family xilinx:lifeCycle="Production">virtexuplusHBM</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Beta">aartix7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Beta">akintex7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Beta">artix7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Beta">artix7l</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Beta">azynq</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">kintex7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">kintex7l</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">kintexu</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">kintexuplus</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">qkintex7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">qkintex7l</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">qvirtex7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">qzynq</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">qzynqplus</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">versal</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">virtex7</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">virtexu</xilinx:family>
         <xilinx:family xilinx:lifeCycle="Production">virtexuplus</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">virtexuplusHBM</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">virtexupluse58g</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">zynq</xilinx:family>
         <xilinx:family xilinx:lifeCycle="Production">zynquplus</xilinx:family>
       </xilinx:supportedFamilies>
       <xilinx:taxonomies>
diff --git a/notebooks/advanced/2_custom_op.ipynb b/notebooks/advanced/2_custom_op.ipynb
index 7d7bc5c50b25e5b270fadc641bc2fa40d6dadddd..57f2601c73853ba9acc5f8ff85e0a18efc7e9a17 100644
--- a/notebooks/advanced/2_custom_op.ipynb
+++ b/notebooks/advanced/2_custom_op.ipynb
@@ -58,13 +58,11 @@
        " '__repr__',\n",
        " '__setattr__',\n",
        " '__sizeof__',\n",
+       " '__slots__',\n",
        " '__str__',\n",
        " '__subclasshook__',\n",
        " '__weakref__',\n",
-       " '_abc_cache',\n",
-       " '_abc_negative_cache',\n",
-       " '_abc_negative_cache_version',\n",
-       " '_abc_registry',\n",
+       " '_abc_impl',\n",
        " 'execute_node',\n",
        " 'get_nodeattr',\n",
        " 'get_nodeattr_allowed_values',\n",
@@ -211,7 +209,7 @@
        "{'DebugMarker': finn.custom_op.general.debugmarker.DebugMarker,\n",
        " 'QuantAvgPool2d': finn.custom_op.general.quantavgpool2d.QuantAvgPool2d,\n",
        " 'MaxPoolNHWC': finn.custom_op.general.maxpoolnhwc.MaxPoolNHWC,\n",
-       " 'StreamingDataflowPartition': finn.custom_op.general.streamingdataflowpartition.StreamingDataflowPartition,\n",
+       " 'GenericPartition': finn.custom_op.general.genericpartition.GenericPartition,\n",
        " 'MultiThreshold': finn.custom_op.general.multithreshold.MultiThreshold,\n",
        " 'XnorPopcountMatMul': finn.custom_op.general.xnorpopcount.XnorPopcountMatMul,\n",
        " 'Im2Col': finn.custom_op.general.im2col.Im2Col,\n",
@@ -335,8 +333,8 @@
     {
      "data": {
       "text/plain": [
-       "array([[[-6.,  2., -3., -6.],\n",
-       "        [-6.,  0.,  1., -2.]]], dtype=float32)"
+       "array([[[ 0., -3.,  1., -8.],\n",
+       "        [ 2., -2., -4., -8.]]], dtype=float32)"
       ]
      },
      "execution_count": 7,
@@ -349,7 +347,7 @@
     "from finn.util.basic import gen_finn_dt_tensor\n",
     "\n",
     "# generate a random input of e.g signed 4-bit values\n",
-    "random_input = gen_finn_dt_tensor(DataType.INT4, input_shape)\n",
+    "random_input = gen_finn_dt_tensor(DataType[\"INT4\"], input_shape)\n",
     "random_input\n"
    ]
   },
@@ -368,8 +366,8 @@
     {
      "data": {
       "text/plain": [
-       "{'outp': array([[[36.,  4.,  9., 36.],\n",
-       "         [36.,  0.,  1.,  4.]]], dtype=float32)}"
+       "{'outp': array([[[ 0.,  9.,  1., 64.],\n",
+       "         [ 4.,  4., 16., 64.]]], dtype=float32)}"
       ]
      },
      "execution_count": 8,
@@ -576,7 +574,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Available functions: ['__abstractmethods__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_abc_cache', '_abc_negative_cache', '_abc_negative_cache_version', '_abc_registry', 'execute_node', 'get_nodeattr', 'get_nodeattr_allowed_values', 'get_nodeattr_def', 'get_nodeattr_types', 'infer_node_datatype', 'make_shape_compatible_op', 'my_custom_cpp_gen', 'onnx_node', 'set_nodeattr', 'verify_node']\n",
+      "Available functions: ['__abstractmethods__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__slots__', '__str__', '__subclasshook__', '__weakref__', '_abc_impl', 'execute_node', 'get_nodeattr', 'get_nodeattr_allowed_values', 'get_nodeattr_def', 'get_nodeattr_types', 'infer_node_datatype', 'make_shape_compatible_op', 'my_custom_cpp_gen', 'onnx_node', 'set_nodeattr', 'verify_node']\n",
       "codegen_dir: \n",
       "exec_mode: python\n"
      ]
@@ -666,7 +664,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "/tmp/finn_dev_jalezeta/my_custom_oppaxpincq\n"
+      "/tmp/finn_dev_maltanar/my_custom_oppswiou3i\n"
      ]
     }
    ],
@@ -820,8 +818,8 @@
     {
      "data": {
       "text/plain": [
-       "array([[[-8.,  4.,  7.,  2.],\n",
-       "        [-5., -1.,  2.,  0.]]], dtype=float32)"
+       "array([[[-6.,  3.,  2., -5.],\n",
+       "        [ 5.,  2.,  0., -2.]]], dtype=float32)"
       ]
      },
      "execution_count": 21,
@@ -831,7 +829,7 @@
    ],
    "source": [
     "# generate a random input of e.g signed 4-bit values\n",
-    "random_input = gen_finn_dt_tensor(DataType.INT4, input_shape)\n",
+    "random_input = gen_finn_dt_tensor(DataType[\"INT4\"], input_shape)\n",
     "random_input"
    ]
   },
@@ -850,8 +848,8 @@
     {
      "data": {
       "text/plain": [
-       "{'outp': array([[[64., 16., 49.,  4.],\n",
-       "         [25.,  1.,  4.,  0.]]], dtype=float32)}"
+       "{'outp': array([[[36.,  9.,  4., 25.],\n",
+       "         [25.,  4.,  0.,  4.]]], dtype=float32)}"
       ]
      },
      "execution_count": 22,
@@ -882,8 +880,8 @@
     {
      "data": {
       "text/plain": [
-       "{'outp': array([[[64., 16., 49.,  4.],\n",
-       "         [25.,  1.,  4.,  0.]]])}"
+       "{'outp': array([[[36.,  9.,  4., 25.],\n",
+       "         [25.,  4.,  0.,  4.]]])}"
       ]
      },
      "execution_count": 23,
@@ -897,6 +895,13 @@
     "ret = execute_onnx(mixedop_graph_new, inp_dict)\n",
     "ret"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
@@ -915,7 +920,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.8"
+   "version": "3.8.5"
   }
  },
  "nbformat": 4,
diff --git a/notebooks/basics/1_brevitas_network_import.ipynb b/notebooks/basics/1_brevitas_network_import.ipynb
index 8ba7d00a171f68577b28c5897f0106ea4207a6ef..b6d6c3bdfd2962987a63c62a5532e7969a33982f 100644
--- a/notebooks/basics/1_brevitas_network_import.ipynb
+++ b/notebooks/basics/1_brevitas_network_import.ipynb
@@ -17,7 +17,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -36,121 +36,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "# MIT License\n",
-      "#\n",
-      "# Copyright (c) 2019 Xilinx\n",
-      "#\n",
-      "# Permission is hereby granted, free of charge, to any person obtaining a copy\n",
-      "# of this software and associated documentation files (the \"Software\"), to deal\n",
-      "# in the Software without restriction, including without limitation the rights\n",
-      "# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n",
-      "# copies of the Software, and to permit persons to whom the Software is\n",
-      "# furnished to do so, subject to the following conditions:\n",
-      "#\n",
-      "# The above copyright notice and this permission notice shall be included in all\n",
-      "# copies or substantial portions of the Software.\n",
-      "#\n",
-      "# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n",
-      "# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n",
-      "# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n",
-      "# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n",
-      "# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n",
-      "# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n",
-      "# SOFTWARE.\n",
-      "\n",
-      "import ast\n",
-      "from functools import reduce\n",
-      "from operator import mul\n",
-      "\n",
-      "from torch.nn import Module, ModuleList, BatchNorm1d, Dropout\n",
-      "import torch\n",
-      "\n",
-      "from brevitas.nn import QuantIdentity, QuantLinear\n",
-      "from .common import CommonWeightQuant, CommonActQuant\n",
-      "from .tensor_norm import TensorNorm\n",
-      "\n",
-      "DROPOUT = 0.2\n",
-      "\n",
-      "\n",
-      "class FC(Module):\n",
-      "\n",
-      "    def __init__(\n",
-      "            self,\n",
-      "            num_classes,\n",
-      "            weight_bit_width,\n",
-      "            act_bit_width,\n",
-      "            in_bit_width,\n",
-      "            in_channels,\n",
-      "            out_features,\n",
-      "            in_features=(28, 28)):\n",
-      "        super(FC, self).__init__()\n",
-      "\n",
-      "        self.features = ModuleList()\n",
-      "        self.features.append(QuantIdentity(act_quant=CommonActQuant, bit_width=in_bit_width))\n",
-      "        self.features.append(Dropout(p=DROPOUT))\n",
-      "        in_features = reduce(mul, in_features)\n",
-      "        for out_features in out_features:\n",
-      "            self.features.append(QuantLinear(\n",
-      "                in_features=in_features,\n",
-      "                out_features=out_features,\n",
-      "                bias=False,\n",
-      "                weight_bit_width=weight_bit_width,\n",
-      "                weight_quant=CommonWeightQuant))\n",
-      "            in_features = out_features\n",
-      "            self.features.append(BatchNorm1d(num_features=in_features))\n",
-      "            self.features.append(QuantIdentity(act_quant=CommonActQuant, bit_width=act_bit_width))\n",
-      "            self.features.append(Dropout(p=DROPOUT))\n",
-      "        self.features.append(QuantLinear(\n",
-      "                in_features=in_features,\n",
-      "                out_features=num_classes,\n",
-      "                bias=False,\n",
-      "                weight_bit_width=weight_bit_width,\n",
-      "                weight_quant=CommonWeightQuant))\n",
-      "        self.features.append(TensorNorm())\n",
-      "\n",
-      "        for m in self.modules():\n",
-      "          if isinstance(m, QuantLinear):\n",
-      "            torch.nn.init.uniform_(m.weight.data, -1, 1)\n",
-      "\n",
-      "    def clip_weights(self, min_val, max_val):\n",
-      "        for mod in self.features:\n",
-      "            if isinstance(mod, QuantLinear):\n",
-      "                mod.weight.data.clamp_(min_val, max_val)\n",
-      "    \n",
-      "    def forward(self, x):\n",
-      "        x = x.view(x.shape[0], -1)\n",
-      "        x = 2.0 * x - torch.tensor([1.0], device=x.device)\n",
-      "        for mod in self.features:\n",
-      "            x = mod(x)\n",
-      "        return x\n",
-      "\n",
-      "\n",
-      "def fc(cfg):\n",
-      "    weight_bit_width = cfg.getint('QUANT', 'WEIGHT_BIT_WIDTH')\n",
-      "    act_bit_width = cfg.getint('QUANT', 'ACT_BIT_WIDTH')\n",
-      "    in_bit_width = cfg.getint('QUANT', 'IN_BIT_WIDTH')\n",
-      "    num_classes = cfg.getint('MODEL', 'NUM_CLASSES')\n",
-      "    in_channels = cfg.getint('MODEL', 'IN_CHANNELS')\n",
-      "    out_features = ast.literal_eval(cfg.get('MODEL', 'OUT_FEATURES'))\n",
-      "    net = FC(\n",
-      "        weight_bit_width=weight_bit_width,\n",
-      "        act_bit_width=act_bit_width,\n",
-      "        in_bit_width=in_bit_width,\n",
-      "        in_channels=in_channels,\n",
-      "        out_features=out_features,\n",
-      "        num_classes=num_classes)\n",
-      "    return net\n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "from brevitas_examples import bnn_pynq\n",
     "showSrc(bnn_pynq.models.FC)"
@@ -165,255 +53,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "FC(\n",
-       "  (features): ModuleList(\n",
-       "    (0): QuantIdentity(\n",
-       "      (input_quant): IdentityQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "      )\n",
-       "      (act_quant): ActQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "        (fused_activation_quant_proxy): FusedActivationQuantProxy(\n",
-       "          (activation_impl): Identity()\n",
-       "          (tensor_quant): ClampedBinaryQuant(\n",
-       "            (scaling_impl): ConstScaling(\n",
-       "              (restrict_clamp_scaling): _RestrictClampValue(\n",
-       "                (restrict_value_impl): FloatRestrictValue()\n",
-       "                (clamp_min_ste): Identity()\n",
-       "              )\n",
-       "              (value): StatelessBuffer()\n",
-       "            )\n",
-       "            (bit_width): BitWidthConst(\n",
-       "              (bit_width): StatelessBuffer()\n",
-       "            )\n",
-       "            (delay_wrapper): DelayWrapper(\n",
-       "              (delay_impl): _NoDelay()\n",
-       "            )\n",
-       "          )\n",
-       "        )\n",
-       "      )\n",
-       "    )\n",
-       "    (1): Dropout(p=0.2)\n",
-       "    (2): QuantLinear(\n",
-       "      in_features=784, out_features=1024, bias=False\n",
-       "      (input_quant): IdentityQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "      )\n",
-       "      (output_quant): IdentityQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "      )\n",
-       "      (weight_quant): WeightQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "        (tensor_quant): BinaryQuant(\n",
-       "          (scaling_impl): ConstScaling(\n",
-       "            (restrict_clamp_scaling): _RestrictClampValue(\n",
-       "              (restrict_value_impl): FloatRestrictValue()\n",
-       "              (clamp_min_ste): Identity()\n",
-       "            )\n",
-       "            (value): StatelessBuffer()\n",
-       "          )\n",
-       "          (bit_width): BitWidthConst(\n",
-       "            (bit_width): StatelessBuffer()\n",
-       "          )\n",
-       "          (delay_wrapper): DelayWrapper(\n",
-       "            (delay_impl): _NoDelay()\n",
-       "          )\n",
-       "        )\n",
-       "      )\n",
-       "      (bias_quant): BiasQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "      )\n",
-       "    )\n",
-       "    (3): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-       "    (4): QuantIdentity(\n",
-       "      (input_quant): IdentityQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "      )\n",
-       "      (act_quant): ActQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "        (fused_activation_quant_proxy): FusedActivationQuantProxy(\n",
-       "          (activation_impl): Identity()\n",
-       "          (tensor_quant): ClampedBinaryQuant(\n",
-       "            (scaling_impl): ConstScaling(\n",
-       "              (restrict_clamp_scaling): _RestrictClampValue(\n",
-       "                (restrict_value_impl): FloatRestrictValue()\n",
-       "                (clamp_min_ste): Identity()\n",
-       "              )\n",
-       "              (value): StatelessBuffer()\n",
-       "            )\n",
-       "            (bit_width): BitWidthConst(\n",
-       "              (bit_width): StatelessBuffer()\n",
-       "            )\n",
-       "            (delay_wrapper): DelayWrapper(\n",
-       "              (delay_impl): _NoDelay()\n",
-       "            )\n",
-       "          )\n",
-       "        )\n",
-       "      )\n",
-       "    )\n",
-       "    (5): Dropout(p=0.2)\n",
-       "    (6): QuantLinear(\n",
-       "      in_features=1024, out_features=1024, bias=False\n",
-       "      (input_quant): IdentityQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "      )\n",
-       "      (output_quant): IdentityQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "      )\n",
-       "      (weight_quant): WeightQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "        (tensor_quant): BinaryQuant(\n",
-       "          (scaling_impl): ConstScaling(\n",
-       "            (restrict_clamp_scaling): _RestrictClampValue(\n",
-       "              (restrict_value_impl): FloatRestrictValue()\n",
-       "              (clamp_min_ste): Identity()\n",
-       "            )\n",
-       "            (value): StatelessBuffer()\n",
-       "          )\n",
-       "          (bit_width): BitWidthConst(\n",
-       "            (bit_width): StatelessBuffer()\n",
-       "          )\n",
-       "          (delay_wrapper): DelayWrapper(\n",
-       "            (delay_impl): _NoDelay()\n",
-       "          )\n",
-       "        )\n",
-       "      )\n",
-       "      (bias_quant): BiasQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "      )\n",
-       "    )\n",
-       "    (7): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-       "    (8): QuantIdentity(\n",
-       "      (input_quant): IdentityQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "      )\n",
-       "      (act_quant): ActQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "        (fused_activation_quant_proxy): FusedActivationQuantProxy(\n",
-       "          (activation_impl): Identity()\n",
-       "          (tensor_quant): ClampedBinaryQuant(\n",
-       "            (scaling_impl): ConstScaling(\n",
-       "              (restrict_clamp_scaling): _RestrictClampValue(\n",
-       "                (restrict_value_impl): FloatRestrictValue()\n",
-       "                (clamp_min_ste): Identity()\n",
-       "              )\n",
-       "              (value): StatelessBuffer()\n",
-       "            )\n",
-       "            (bit_width): BitWidthConst(\n",
-       "              (bit_width): StatelessBuffer()\n",
-       "            )\n",
-       "            (delay_wrapper): DelayWrapper(\n",
-       "              (delay_impl): _NoDelay()\n",
-       "            )\n",
-       "          )\n",
-       "        )\n",
-       "      )\n",
-       "    )\n",
-       "    (9): Dropout(p=0.2)\n",
-       "    (10): QuantLinear(\n",
-       "      in_features=1024, out_features=1024, bias=False\n",
-       "      (input_quant): IdentityQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "      )\n",
-       "      (output_quant): IdentityQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "      )\n",
-       "      (weight_quant): WeightQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "        (tensor_quant): BinaryQuant(\n",
-       "          (scaling_impl): ConstScaling(\n",
-       "            (restrict_clamp_scaling): _RestrictClampValue(\n",
-       "              (restrict_value_impl): FloatRestrictValue()\n",
-       "              (clamp_min_ste): Identity()\n",
-       "            )\n",
-       "            (value): StatelessBuffer()\n",
-       "          )\n",
-       "          (bit_width): BitWidthConst(\n",
-       "            (bit_width): StatelessBuffer()\n",
-       "          )\n",
-       "          (delay_wrapper): DelayWrapper(\n",
-       "            (delay_impl): _NoDelay()\n",
-       "          )\n",
-       "        )\n",
-       "      )\n",
-       "      (bias_quant): BiasQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "      )\n",
-       "    )\n",
-       "    (11): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-       "    (12): QuantIdentity(\n",
-       "      (input_quant): IdentityQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "      )\n",
-       "      (act_quant): ActQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "        (fused_activation_quant_proxy): FusedActivationQuantProxy(\n",
-       "          (activation_impl): Identity()\n",
-       "          (tensor_quant): ClampedBinaryQuant(\n",
-       "            (scaling_impl): ConstScaling(\n",
-       "              (restrict_clamp_scaling): _RestrictClampValue(\n",
-       "                (restrict_value_impl): FloatRestrictValue()\n",
-       "                (clamp_min_ste): Identity()\n",
-       "              )\n",
-       "              (value): StatelessBuffer()\n",
-       "            )\n",
-       "            (bit_width): BitWidthConst(\n",
-       "              (bit_width): StatelessBuffer()\n",
-       "            )\n",
-       "            (delay_wrapper): DelayWrapper(\n",
-       "              (delay_impl): _NoDelay()\n",
-       "            )\n",
-       "          )\n",
-       "        )\n",
-       "      )\n",
-       "    )\n",
-       "    (13): Dropout(p=0.2)\n",
-       "    (14): QuantLinear(\n",
-       "      in_features=1024, out_features=10, bias=False\n",
-       "      (input_quant): IdentityQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "      )\n",
-       "      (output_quant): IdentityQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "      )\n",
-       "      (weight_quant): WeightQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "        (tensor_quant): BinaryQuant(\n",
-       "          (scaling_impl): ConstScaling(\n",
-       "            (restrict_clamp_scaling): _RestrictClampValue(\n",
-       "              (restrict_value_impl): FloatRestrictValue()\n",
-       "              (clamp_min_ste): Identity()\n",
-       "            )\n",
-       "            (value): StatelessBuffer()\n",
-       "          )\n",
-       "          (bit_width): BitWidthConst(\n",
-       "            (bit_width): StatelessBuffer()\n",
-       "          )\n",
-       "          (delay_wrapper): DelayWrapper(\n",
-       "            (delay_impl): _NoDelay()\n",
-       "          )\n",
-       "        )\n",
-       "      )\n",
-       "      (bias_quant): BiasQuantProxyFromInjector(\n",
-       "        (_zero_hw_sentinel): StatelessBuffer()\n",
-       "      )\n",
-       "    )\n",
-       "    (15): TensorNorm()\n",
-       "  )\n",
-       ")"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "from finn.util.test import get_test_model\n",
     "lfc = get_test_model(netname = \"LFC\", wbits = 1, abits = 1, pretrained = True)\n",
@@ -429,22 +71,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPsAAAD4CAYAAAAq5pAIAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAARYElEQVR4nO3dfYyVZXrH8d/FoDAw8iYRCaisG/5QqmUbgk1KyOKmxlUMbKJm/aPauAmarMmqTVqz/UOSaqJVa/pH3YStL9CsmiWoq0a7a82mWo1GNFQQW1CULGR4E5H3t+HqH/NgZ3We6549z3nOc9z7+0kmM3Ouec65OTM/zsv13Pdt7i4Af/xGNT0AAJ1B2IFMEHYgE4QdyARhBzIxupM3Zma89Z+ZUaPKH09OnTpV23VXvf6enp6wPjAw0PJ1183dbbjLK4XdzK6U9M+SeiT9q7vfV+X6cmU27O/mS6k/6ip/eKNHx38CqcCk6r29vaW1Q4cOhcem9PX1hfUDBw6U1lIt50mTJoX1zz77LKx3o5afxptZj6R/kfR9SRdLusHMLm7XwAC0V5XX7PMlfeTuW9z9uKSnJS1pz7AAtFuVsM+Q9Lsh328rLvs9ZrbMzNaa2doKtwWgotrfoHP3FZJWSLxBBzSpyiP7dknnDfl+ZnEZgC5UJezvSJptZt8yszMl/VDS8+0ZFoB2a/lpvLufNLPbJP1ag623x9z9g7aNLCPjx48P6wcPHmz5useMGRPWjx07FtZTbcFx48aF9ai9lmoppqSOj9prqT76vn37WhlSV6v0mt3dX5L0UpvGAqBGnC4LZIKwA5kg7EAmCDuQCcIOZIKwA5mwTq4um+vpsqled6qXffTo0bA+duzYlo9Nia676vWfffbZYb3qNNLofp06dWp47O7du8N6amrwyZMnw3qdyuaz88gOZIKwA5kg7EAmCDuQCcIOZIKwA5mg9fYNkGrNVfkd1nnddUtNDa6yem1q6m5qanCTS03TegMyR9iBTBB2IBOEHcgEYQcyQdiBTBB2IBP02TvgrLPOCuvRbqOSNHHixLB+4sSJ0lpqN9LUFNbPP/88rC9YsCCs33rrraW1VC/6jjvuCOtbt24N601OM20SfXYgc4QdyARhBzJB2IFMEHYgE4QdyARhBzJBn/0b4JFHHgnrUS871Wuuuox1b29vWI+ktk2+5JJLwvqmTZvC+vHjx0trZ5xxRnhsdO6ClP53HzlyJKzXqazPXmnLZjP7VNIBSQOSTrr7vCrXB6A+lcJeWOTue9pwPQBqxGt2IBNVw+6SfmNm75rZsuF+wMyWmdlaM1tb8bYAVFD1afwCd99uZudIesXM/sfdXxv6A+6+QtIKiTfogCZVemR39+3F512SnpU0vx2DAtB+LYfdzMab2Vmnv5Z0haQN7RoYgPaq8jR+mqRniz7taElPuvu/t2VUf2RSWzYvWrQorF922WVhPeqVHzx4MDw21W/u6+sL66nzNKI566m11x999NGWr1uS7rzzztLaW2+9FR5b93bSTWg57O6+RdKftnEsAGpE6w3IBGEHMkHYgUwQdiAThB3IBFNcu0Bqqubs2bPD+v79+0trEyZMCI+NpoFK6SmwVbZ8TrX9UlJLcO/du7e0tnTp0vDYdevWhfVUSzLV8qwTS0kDmSPsQCYIO5AJwg5kgrADmSDsQCYIO5CJdiw42TFRT7fOfnBK6thU/ZZbbgnrq1atCuszZ85s+bZTffZ77rknrK9evTqsn3nmmaW1K664Ijz2wQcfDOuprbCj2168eHF47LZt28L6nj3fvDVWeWQHMkHYgUwQdiAThB3IBGEHMkHYgUwQdiATHZ/Pnup3Rzo51naqOvd54cKFYf2iiy4qrY0bNy48dvTo+FSLNWvWhPUtW7aE9SpSyz3PmTMnrKfu90jq75T57AC6FmEHMkHYgUwQdiAThB3IBGEHMkHYgUx0vM8+alT5/y9V54XXqcpc+lOnTlW67eg+S9VPnjwZHjt+/PiwfujQobCe2o46+p2l5tJfffXVYf3pp58O61X67Kk17VP3a5Na7rOb2WNmtsvMNgy5bIqZvWJmm4vPk9s5WADtN5Kn8U9IuvIrl90l6VV3ny3p1eJ7AF0sGXZ3f03SV/fRWSJpZfH1SklL2zssAO3W6hp009y9v/h6h6RpZT9oZsskLWvxdgC0SeUFJ93dow0b3X2FpBUSGzsCTWq19bbTzKZLUvF5V/uGBKAOrYb9eUk3FV/fJOlX7RkOgLok++xm9pSk70qaKmmnpLslPSfpl5LOl7RV0vXuXr4Z9v9fV21P46uuG1+1Hkn1ZFN7qEf7r1fV29sb1o8cORLWU+cAVDnH4MILLwzrH3/8ccvXnRpXak36lMOHD1c6voqyPnvyNbu731BS+l6lEQHoKE6XBTJB2IFMEHYgE4QdyARhBzLBls2FVAtyYGAgrEd6enrCetVlh6M2UarFlJrCmpK6/mjb5KgmSYsWLWppTKdFv9MTJ06Ex6amuFb5e2gKj+xAJgg7kAnCDmSCsAOZIOxAJgg7kAnCDmSiq/rsdW7nXHU55yrqvu0DBw6U1lL94lSvO3V8qk8fLRedWsb6uuuuC+tHjx4N62PHji2tpfrsqd9Zk1syt4pHdiAThB3IBGEHMkHYgUwQdiAThB3IBGEHMtHxPns0t7ube+XRksmp5ZRT6txW+dJLLw2PnTNnTlhPLSX93HPPhfVI1AeXpIULF4b1Klt4p5ahjs5dkKovwd0EHtmBTBB2IBOEHcgEYQcyQdiBTBB2IBOEHchEx/vs0Zz1OvvoqbnyqXndUU949Oj4bly6dGlYTx2/ZMmSsD5mzJjS2ty5c8NjJ02aFNZTvezXX3+95eNnz54dHptamz3V616/fn1p7fLLLw+Pje5TqTv76CnJR3Yze8zMdpnZhiGXLTez7Wa2rvi4qt5hAqhqJE/jn5B05TCXP+zuc4uPl9o7LADtlgy7u78maW8HxgKgRlXeoLvNzN4vnuZPLvshM1tmZmvNbG2F2wJQUath/5mkb0uaK6lf0kNlP+juK9x9nrvPa/G2ALRBS2F3953uPuDupyT9XNL89g4LQLu1FHYzmz7k2x9I2lD2swC6g6X6qGb2lKTvSpoqaaeku4vv50pySZ9KusXd+5M3ZhbeWKrfnJr3HZk1a1ZYv+aaa8L64sWLS2upedepedupudPR/utSvIZ5X19feGxK1Xnd0e/0iy++CI+dOHFiWE/ZvHlzaW3VqlXhsQ89VPrKVFJ399ndfdiTSpIn1bj7DcNc/GjlEQHoKE6XBTJB2IFMEHYgE4QdyARhBzKRbL219cbMPFp2uc4prnfffXdYX758eVjfs2dPaW3q1KmtDOlLqa2H9+6NpyZE9QsuuCA8NtUWTG3ZnHLs2LHSWmoaaervIdWKjaYtp7Zcfvnll8P6zTffHNab3NK5rPXGIzuQCcIOZIKwA5kg7EAmCDuQCcIOZIKwA5noeJ89qlfZmjg11TLV96yy7fKuXbvC+tatW8P6Aw88ENZXr14d1ufNK18E6OGHHw6PTW3ZPHly6YpjkqRt27aF9eh3+sQTT4THfvLJJ2H92muvDevR1OOq02tffPHFsJ6aMl0n+uxA5gg7kAnCDmSCsAOZIOxAJgg7kAnCDmSio332UaNGeTQ/+vjx4+Hx55xzTmlt9+7d4bGpPntq7nTUL05tB71p06awPmXKlLCeWrY4Wu75/PPPD49NzWdPLe+9b9++sH7jjTeW1l544YXw2JTUOgLRctGLFi0Kj02tMZC6X1LLf9eJPjuQOcIOZIKwA5kg7EAmCDuQCcIOZIKwA5noqvnsVaT6nitXrgzr119/fcvXf/jw4fDYcePGhfXUtsipef4DAwOltdS672+++WZYf/LJJ8P6unXrwvobb7xRWkudX5Dq4ad+59F5G/Pnzw+Pffvtt8P6448/HtZT68rXqeU+u5mdZ2a/NbONZvaBmf2kuHyKmb1iZpuLz/EqBwAaNZKn8Scl/Y27XyzpzyX92MwulnSXpFfdfbakV4vvAXSpZNjdvd/d3yu+PiDpQ0kzJC2RdPq58UpJS2saI4A2iF/0fIWZzZL0HUlvS5rm7v1FaYekaSXHLJO0rMIYAbTBiN+NN7M+SWsk3e7u+4fWfPBdvmHffHP3Fe4+z93LV0UEULsRhd3MztBg0H/h7s8UF+80s+lFfbqkeIlVAI1Ktt5scP7mSkl73f32IZc/IOkzd7/PzO6SNMXd/zZxXeGNnXvuueFYduzYEdYj0fa9kjRz5sywfu+995bWZsyYER6b2nI5tXVxtF20JN1///2ltY0bN4bHpqa4prZFTklNW46k2oYnTpwI69HU49Tf/YQJE8J61SnTdSprvY3kNftfSPorSevNbF1x2U8l3Sfpl2b2I0lbJcWNagCNSobd3f9LUtl/kd9r73AA1IXTZYFMEHYgE4QdyARhBzJB2IFMdHSKa09Pj0d93dRU0aj3uX///tKaJPX19YX1VN806vlW6fdK6Z5v6hyBqJed6uEfO3YsrFcV/b5TyzWnpgan/l6q/M5Sqo6tTiwlDWSOsAOZIOxAJgg7kAnCDmSCsAOZIOxAJrpqKenUHOKol55aVrjqvOzp06eX1vr7+0trI9Hb2xvWU1s213ndqWWsDx06FNarzClPGTUqfqyqMqe86fMTqqDPDmSOsAOZIOxAJgg7kAnCDmSCsAOZIOxAJrqqzw6gOvrsQOYIO5AJwg5kgrADmSDsQCYIO5AJwg5kIhl2MzvPzH5rZhvN7AMz+0lx+XIz225m64qPq+ofLoBWJU+qMbPpkqa7+3tmdpakdyUt1eB+7Afd/cER3xgn1QC1KzupZiT7s/dL6i++PmBmH0qa0d7hAajbH/Sa3cxmSfqOpLeLi24zs/fN7DEzm1xyzDIzW2tma6sNFUAVIz433sz6JP2npHvd/RkzmyZpjySX9A8afKp/c+I6eBoP1KzsafyIwm5mZ0h6UdKv3f2fhqnPkvSiu/9J4noIO1CzlifC2ODyoI9K+nBo0Is37k77gaQNVQcJoD4jeTd+gaTXJa2XdHpt3p9KukHSXA0+jf9U0i3Fm3nRdfHIDtSs0tP4diHsQP2Yzw5kjrADmSDsQCYIO5AJwg5kgrADmSDsQCYIO5AJwg5kgrADmSDsQCYIO5AJwg5kgrADmUguONlmeyRtHfL91OKybtStY+vWcUmMrVXtHNsFZYWOzmf/2o2brXX3eY0NINCtY+vWcUmMrVWdGhtP44FMEHYgE02HfUXDtx/p1rF167gkxtaqjoyt0dfsADqn6Ud2AB1C2IFMNBJ2M7vSzP7XzD4ys7uaGEMZM/vUzNYX21A3uj9dsYfeLjPbMOSyKWb2ipltLj4Pu8deQ2Prim28g23GG73vmt7+vOOv2c2sR9ImSX8paZukdyTd4O4bOzqQEmb2qaR57t74CRhmtlDSQUmrTm+tZWb/KGmvu99X/Ec52d3/rkvGtlx/4DbeNY2tbJvxv1aD9107tz9vRROP7PMlfeTuW9z9uKSnJS1pYBxdz91fk7T3KxcvkbSy+HqlBv9YOq5kbF3B3fvd/b3i6wOSTm8z3uh9F4yrI5oI+wxJvxvy/TZ1137vLuk3ZvaumS1rejDDmDZkm60dkqY1OZhhJLfx7qSvbDPeNfddK9ufV8UbdF+3wN3/TNL3Jf24eLralXzwNVg39U5/JunbGtwDsF/SQ00OpthmfI2k2919/9Bak/fdMOPqyP3WRNi3SzpvyPczi8u6grtvLz7vkvSsBl92dJOdp3fQLT7vang8X3L3ne4+4O6nJP1cDd53xTbjayT9wt2fKS5u/L4bblydut+aCPs7kmab2bfM7ExJP5T0fAPj+BozG1+8cSIzGy/pCnXfVtTPS7qp+PomSb9qcCy/p1u28S7bZlwN33eNb3/u7h3/kHSVBt+R/1jS3zcxhpJxXSjpv4uPD5oem6SnNPi07oQG39v4kaSzJb0qabOk/5A0pYvG9m8a3Nr7fQ0Ga3pDY1ugwafo70taV3xc1fR9F4yrI/cbp8sCmeANOiAThB3IBGEHMkHYgUwQdiAThB3IBGEHMvF/rSIwqVQD1iIAAAAASUVORK5CYII=\n",
-      "text/plain": [
-       "<Figure size 432x288 with 1 Axes>"
-      ]
-     },
-     "metadata": {
-      "needs_background": "light"
-     },
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "import torch\n",
     "import matplotlib.pyplot as plt\n",
@@ -460,21 +89,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "tensor([0.1020, 0.0113, 0.4806, 0.0571, 0.0482, 0.0079, 0.0450, 0.0076, 0.1851,\n",
-       "        0.0552])"
-      ]
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "from torch.nn.functional import softmax\n",
     "# do forward pass in PyTorch/Brevitas\n",
@@ -485,22 +102,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEICAYAAABS0fM3AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAAbi0lEQVR4nO3debxdZXn28d9FIDKFQRIVEiDMNk6IERAVZWpDq2AREV4nrEwtsSi+VlTUSp3qhFWxCgRBKfACgo0WZShKHYGAKIRBwhzGMAmiLxC4+sd6Dm6O++yzMqx1yFnX9/PZn6z5vvc+sO+9nmetZ8k2ERHRXSuNdQIRETG2UggiIjouhSAiouNSCCIiOi6FICKi41IIIiI6LoUgoiZJlrR5mf66pI8s5XF+L2nT5ZvdwHiS9E1JD0i6pK24seJIIRiHJN0sadc+y18r6cnyRTT0+l7P+i0lnSHpXkm/k/QbSYdLmrCM+cyWNE/So5JOXMJ995H0c0l/kPTjUbbtfX8PS7pO0juXJfeR2D7E9r+Mtp2kH0s6YNi+a9q+sYm8RvAqYDdgmu1tl/VgkqaXorjysqcWzwQpBN1zR/kiGnq9HkDSZsDFwG3Ai2yvDbwJmAlMWtaYwCeAE5Zi3/uBLwGfqRvL9prAWsAHgOMkzRi+Uce+xDYGbrb9yJLu2LHPqbNSCGLIx4Gf2z7c9p0Atq+z/X9sPzh8Y0k7SbqyZ/58SZf2zP9E0hvKcc6y/V3gvj7HWVfS9yUtKk0X35c0bWi97Qtsn05VTGpz5bvAA8AMSftL+pmkoyXdB/yzpGdJ+rykWyXdXZp7VuvJ7f2S7pR0h6S/G5b3iZI+0TO/p6QrJD0k6QZJsyR9Eng18NVylvLVsm1vE9Pakr5V3v8tko6UtFJZt7+kn5YcH5B0k6Tde2LuL+nGcvZzk6S39Pl83wUcD7yi5PDxsvxASQsk3S9prqQNevaxpEMlXQ9cP9pnXT6Lr0n6QYnxM0nPk/Slkve1kl7as/0R5TN6WNLVkv62Z90ESV8oZ6U3lbPJp84+yuc1p/xdbpf0iWU9Y40UgviTXYEzl2D7XwJbSJosaRXgxcAGkiaVL9OZwE9qHGcl4JtUv1o3Av4IfHWJMu9D0krlC2YdYKhgbQfcCDwX+CTVWcaWwNbA5sBU4KNl/1nA/6VqUtmC6vMZKda2wLeA95d4O1L9Av8w1Wcwu5x9ze6z+1eAtYFNgdcAbwd6m7O2A64DJgOfBeaosgbwZWB325OAHYArhh/c9hzgEOAXJYePSdoZ+DSwD7A+cAtw2rBd31Bi/9nZ1Aj2AY4seT4K/AK4vMyfCXyxZ9sbqArk2lQ/QE6WtH5ZdyCwO9XfZJuSR68TgcVUf6+XAn8JHEAsG9t5jbMXcDOwa5/lrwWeBB7see1T1j0OzFrCOD8B9gK2B84DTgdmATsBv+mz/SeAE0c55tbAA32WHwD8eJR9e9/f/VRfjPuWdfsDt/ZsK+ARYLOeZa8AbirTJwCf6Vm3JWBg8zJ/IvCJMv0N4OgRcvoxcMCwZab6IpsAPAbM6Fl38ND7LDkv6Fm3etn3ecAa5X2+EVhtlM9lf+CnPfNzgM/2zK9Z/v7Te/LbecDxppdtVu75LI7rWf9u4Jqe+RcBDw443hXAnmX6QuDgnnW7DsWiKuCP9r5fYD/gR23/PzbeXmn/6547bE/rs/w+ql+HfUn6OvDWMvsp258CLqL68l1Yph+g+lX7aJkflaTVgaOpCsi6ZfEkSRNsP1HnGMOM9P6g6v8YMoXqi/UySU+lQ/XlDLABcFnP9rcMiLkhcM6Sp8pkYJVhx76F6sxkyF1DE7b/UHJd0/Zdkt5MddYyR9LPgPfZvrZG3A2ofq0PHff3pblsKtWPCHj6Z1XH3T3Tf+wzv+bQjKS3A4dTFRTKusk9ufXG7p3emOrzurPnb7bSUuQaw6RpKIZcQPXrsi9XV8kMdTB/qiweKgQ7lumLqArBa6hZCID3AVsB29leqxwLqi/l5a13qN17qb6gXmB7nfJa21VHM8CdVF/wQzYacNzbgM1qxBzuXqpf4hsPi3P7gH3+dGD7XNu7URXwa4Hj6uxH1d/yVMzSzLTesLiNDEssaWOqPGcD69leB7iKP/297wR6C3nv3+A2qh8Zk3v+ZmvZfkETuXZJCsH4tYqkVXteo539fQzYQdLnJD0PQNLmkk6WtM4I+/yc6kt8W+AS2/OpvmC2A/5naCNJK0talerX9oRh+Uyi+kJ+UNKzSx707Duh7LsysFLZd5X6H0N/tp+k+kI6WtJzSqypkv6qbHI6sL+kGeWs5WMjHAqqppZ3Stql9E1MlfT8su5uqvb/fjk8UeJ8svStbEz1S/nk0fKX9NzSQb0G1Zfj76maxeo4teS7taRnAZ8CLrZ9c839l8UaVEVmEYCqy3tf2LP+dOCw8hmuQ3XlFwCuLmI4D/iCpLXKZ72ZpNe0kPe4lkIwfp1D9QU79PrnQRvbvoGqjXw6MF/S74DvAPOAh0fY5xGqJob5th8ri38B3GL7np5Njyw5HEHVvPTHsgyqS0NXo/p1/Evgh8PCvK1s/+9UHYx/pP4v39F8AFgA/FLSQ1RnRVuV9/aDktuFZZsLRzqI7UuoOniPBn5HdTY09Iv734C9y9UzX+6z+7up+ipuBH4KnEK9y2xXoioad1D1h7wG+Psa+2H7AuAjVH/fO6nOZvats++ysn018AWq/07upuo/+FnPJsdRfdn/BvgV1X/Hi4GhZsK3AxOBq6maIs9kQJNm1KPS4RIR8YxTLpf9uu2NR904llrOCCLiGUPSapL+ujQnTqVqkjt7rPMa73JGEBHPGKU/5iLg+VTNgP8FHGb7oTFNbJxLIYiI6LhGm4ZU3WZ/XbmV/Yg+6/dXdWv9FeWVOwQjIlrW2A1lZfyPY6hu0V8IXCppbrlqoNf/c/9b7/uaPHmyp0+fvvwSjYjogMsuu+xe21P6rWvyzuJtqW6PvxFA0mnAnlSXfS216dOnM2/evOWQXkREd0ga8e74JpuGpvL0W78X8vRb54e8UdW492dK2rDPeiQdpGo8+3mLFi1qIteIiM4a68tHv0c10NWLgfOBk/ptZPtY2zNtz5wype+ZTURELKUmC8HtPH2ckGkMG0PF9n22Hy2zxwMvazCfiIjoo8lCcCnVePWbSJpIdQv73N4NesYgB9gDuKbBfCIioo/GOottL5Y0GziXarCxE2zPl3QUMM/2XOAfJe1BNZbI/VTjpkdERItWuBvKZs6c6Vw1FBGxZCRdZntmv3Vj3VkcERFjLIUgIqLjUggiIjouzyzugKPP/23jMd6725aNx4iIZuSMICKi41IIIiI6LoUgIqLjUggiIjouhSAiouNSCCIiOi6FICKi41IIIiI6LoUgIqLjUggiIjouhSAiouNSCCIiOi6FICKi41IIIiI6LoUgIqLjUggiIjouhSAiouNSCCIiOi6FICKi41IIIiI6LoUgIqLjUggiIjouhSAiouNSCCIiOi6FICKi41IIIiI6LoUgIqLjUggiIjouhSAiouNSCCIiOq7RQiBplqTrJC2QdMSA7d4oyZJmNplPRET8ucYKgaQJwDHA7sAMYD9JM/psNwk4DLi4qVwiImJkTZ4RbAsssH2j7ceA04A9+2z3L8C/Av+/wVwiImIETRaCqcBtPfMLy7KnSNoG2ND2fw06kKSDJM2TNG/RokXLP9OIiA4bs85iSSsBXwTeN9q2to+1PdP2zClTpjSfXEREhzRZCG4HNuyZn1aWDZkEvBD4saSbge2BuekwjohoV5OF4FJgC0mbSJoI7AvMHVpp+3e2J9uebns68EtgD9vzGswpIiKGaawQ2F4MzAbOBa4BTrc9X9JRkvZoKm5ERCyZlZs8uO1zgHOGLfvoCNu+tslcIiKiv9xZHBHRcSkEEREdl0IQEdFxKQQRER2XQhAR0XEpBBERHZdCEBHRcSkEEREdN2ohkPRuSeu2kUxERLSvzhnBc4FLJZ1enjimppOKiIj2jFoIbB8JbAHMAfYHrpf0KUmbNZxbRES0oFYfgW0Dd5XXYmBd4ExJn20wt4iIaMGog85JOgx4O3AvcDzwftuPlwfLXA/8U7MpRkREk+qMPvpsYC/bt/QutP2kpNc1k1ZERLSlTtPQpsOLgKRvA9i+ppGsIiKiNXUKwQt6ZyRNAF7WTDoREdG2EQuBpA9Kehh4saSHyuth4B7gP1vLMCIiGjViIbD9aduTgM/ZXqu8Jtlez/YHW8wxIiIaNGJnsaTn274WOEPSNsPX27680cwiIqIVg64aeh9wIPCFPusM7NxIRhER0aoRC4HtA8u/O7WXTkREtG1Q09Beg3a0fdbyTyciIto2qGno9QPWGUghiIgYBwY1Db2zzUQiImJsDGoaeqvtkyUd3m+97S82l1ZERLRlUNPQGuXfSW0kEhERY2NQ09A3yr8fby+diIhoW51HVW4q6XuSFkm6R9J/Stq0jeQiIqJ5dQadOwU4HVgf2AA4Azi1yaQiIqI9dQrB6ra/bXtxeZ0MrNp0YhER0Y5BVw09u0z+QNIRwGlU9w+8GTinhdwiIqIFg64auozqi19l/uCedQYyAmlExDgw6KqhTdpMJCIixkadZxYj6YXADHr6Bmx/q6mkIiKiPXUuH/0Y8JXy2gn4LLBHnYNLmiXpOkkLSj/D8PWHSLpS0hWSfippxhLmHxERy6jOVUN7A7sAd5Xxh14CrD3aTuXZxscAu1OdTezX54v+FNsvsr01VYHJsBURES2rUwj+aPtJYLGktaieWbxhjf22BRbYvtH2Y1RXHe3Zu4Hth3pm16DqhI6IiBbV6SOYJ2kd4DiqK4l+D/yixn5Tgdt65hcC2w3fSNKhwOHARPLUs4iI1o16RmD7H2w/aPvrwG7AO5bnENW2j7G9GfAB4Mh+20g6SNI8SfMWLVq0vEJHRAT1moaQtJekLwLvBjareezbeXoT0rSybCSnAW/ot8L2sbZn2p45ZcqUmuEjIqKOOlcNfQ04BLgSuAo4WNIxNY59KbCFpE0kTQT2BeYOO/YWPbN/A1xfN/GIiFg+6vQR7Az8hW0DSDoJmD/aTrYXS5oNnAtMAE6wPV/SUcA823OB2ZJ2BR4HHgDesZTvIyIillKdQrAA2Ai4pcxvWJaNyvY5DBuXyPZHe6YPq5dmREQ0ZdCgc9+jupxzEnCNpEvKqm2BS0baLyIiViyDzgg+31oWERExZgYNOnfR0LSk5wIvL7OX2L6n6cQiIqIdda4a2oeqKehNwD7AxZL2bjqxiIhoR53O4g8DLx86C5A0BbgAOLPJxCIioh11bihbaVhT0H0194uIiBVAnTOCH0o6lz89sD6PqoyIGEcGFgJJAr5M1VH8qrL4WNtnN51YRES0Y2AhsG1J59h+EXBWSzlFRESL6rT1Xy7p5aNvFhERK6I6fQTbAW+VdDPwCCCqk4UXN5lYRES0o04h+KvGs4iIiDEzaKyh5wAfAjanGoL608MeLRkREePAoD6Cb1E1BX0FWJPq6qGIiBhnBjUNrW/7w2X6XEmXt5FQRES0a7T7CNal6hwGmNA7b/v+hnOLiIgWDCoEawOX8adCADB0VmBg06aSioiI9gwahnp6i3lERMQYyeBxEREdl0IQEdFxKQQRER036IayZw/aMVcNRUSMD4OuGrqM6uogARsBD5TpdYBbgU2aTi4iIpo3YtOQ7U1sb0r1WMrX255sez3gdcB5bSUYERHNqtNHsL3tp55IZvsHwA7NpRQREW2qM/roHZKOBE4u828B7mgupYiIaFOdM4L9gCnA2VRPKZtSlkVExDgw6hlBuTroMElr2H6khZwiIqJFo54RSNpB0tXANWX+JZK+1nhmERHRijpNQ0dTPaXsPgDbvwZ2bDKpiIhoT607i23fNmzREw3kEhERY6DOVUO3SdoBsKRVgMMozUQREbHiq3NGcAhwKDAVuB3YGviHBnOKiIgW1Tkj2Mr2W3oXSHol8LNmUoqIiDbVOSP4Ss1lf0bSLEnXSVog6Yg+6w+XdLWk30j6b0kb1zluREQsP4NGH30F1VASUyQd3rNqLWDCaAeWNAE4BtgNWAhcKmmu7at7NvsVMNP2HyT9PfBZ4M1L/jYiImJpDTojmAisSVUsJvW8HgL2rnHsbYEFtm+0/RhwGrBn7wa2f2T7D2X2l8C0JUs/IiKW1aBnFl8EXCTpRNu3LMWxpwK9l50uBLYbsP27gB8sRZyIiFgGdfoIjpe0ztCMpHUlnbs8k5D0VmAm8LkR1h8kaZ6keYsWLVqeoSMiOq9OIZhs+8GhGdsPAM+psd/twIY989PKsqeRtCvwYWAP24/2O5DtY23PtD1zypQpNUJHRERddQrBk5I2GpopV/a4xn6XAltI2kTSRGBfYG7vBpJeCnyDqgjcUz/tiIhYXurcR/Bh4KeSLqJ6VOWrgYNG28n2YkmzgXOprjI6wfZ8SUcB82zPpWoKWhM4QxLArbb3WLq3EhERS6POMNQ/lLQNsH1Z9B7b99Y5eHmy2TnDln20Z3rXJcg1IiIaMGLTkKTnl3+3oXp4/R3ltVFZFhER48CgM4L3AQcCX+izzsDOjWQUEbGUjj7/t43HeO9uWzYeo22D7iM4sPy7U3vpRERE2wYNMbHXoB1tn7X804mIiLYNahp6ffn3OVRjDl1Y5ncCfk71IPuIiFjBDWoaeieApPOAGbbvLPPrAye2kl1ERDSuzg1lGw4VgeJuqquIIiJiHKhzQ9l/l7GFTi3zbwYuaC6liIhoU50bymZL+ltgx7LoWNtnN5tWRES0pc4ZAcDlwMO2L5C0uqRJth9uMrGIiGjHqH0Ekg4EzqQaHA6q5wx8t8GcIiKiRXU6iw8FXkn1ZDJsX0+9YagjImIFUKcQPFoeNQmApJWpNwx1RESsAOoUgoskfQhYTdJuwBnA95pNKyIi2lKnEHwAWARcCRxMNaz0kU0mFRER7Rl41ZCkCcB8288HjmsnpYiIaNPAMwLbTwDX9T6qMiIixpc69xGsC8yXdAnwyNDCPFIyImJ8qFMIPtJ4FhERMWYGPY9gVeAQYHOqjuI5the3lVhERLRjUB/BScBMqiKwO/0fWRkRESu4QU1DM2y/CEDSHOCSdlKKiIg2DTojeHxoIk1CERHj16AzgpdIeqhMi+rO4ofKtG2v1Xh2ERHRuEGPqpzQZiIRETE26gwxERER41gKQUREx6UQRER0XApBRETHpRBERHRcCkFERMelEEREdFwKQUREx6UQRER0XApBRETHNVoIJM2SdJ2kBZKO6LN+R0mXS1osae8mc4mIiP4aKwTlwffHUD3LYAawn6QZwza7FdgfOKWpPCIiYrA6j6pcWtsCC2zfCCDpNGBP4OqhDWzfXNY92WAeTzn6/N82HuO9u23ZeIyIiOWpyaahqcBtPfMLy7IlJukgSfMkzVu0aNFySS4iIiorRGex7WNtz7Q9c8qUKWOdTkTEuNJkIbgd2LBnflpZFhERzyBNFoJLgS0kbSJpIrAvMLfBeBERsRQaKwTlOcezgXOBa4DTbc+XdJSkPQAkvVzSQuBNwDckzW8qn4iI6K/Jq4awfQ5wzrBlH+2ZvpSqySgiIsbICtFZHBERzUkhiIjouBSCiIiOSyGIiOi4FIKIiI5LIYiI6LgUgoiIjkshiIjouBSCiIiOSyGIiOi4FIKIiI5LIYiI6LhGB52LyONBI575UggiIpaDFflHT5qGIiI6LoUgIqLj0jQU41rTp+vpn4jxIIUgYhxKAYwlkaahiIiOSyGIiOi4FIKIiI5LIYiI6LgUgoiIjkshiIjouBSCiIiOSyGIiOi4FIKIiI5LIYiI6LgUgoiIjkshiIjouBSCiIiOSyGIiOi4DEPdkhX5MXYRMb6lEEQ0JM8EiBVFo4VA0izg34AJwPG2PzNs/bOAbwEvA+4D3mz75iZziohm5ex3xdNYH4GkCcAxwO7ADGA/STOGbfYu4AHbmwNHA//aVD4REdFfk53F2wILbN9o+zHgNGDPYdvsCZxUps8EdpGkBnOKiIhhZLuZA0t7A7NsH1Dm3wZsZ3t2zzZXlW0Wlvkbyjb3DjvWQcBBZXYr4LpGku5vMnDvqFsldmIndmI/s2NvbHtKvxUrRGex7WOBY8citqR5tmcmdmIndmKPl9jDNdk0dDuwYc/8tLKs7zaSVgbWpuo0joiIljRZCC4FtpC0iaSJwL7A3GHbzAXeUab3Bi50U21VERHRV2NNQ7YXS5oNnEt1+egJtudLOgqYZ3suMAf4tqQFwP1UxeKZZkyapBI7sRM7sdvSWGdxRESsGDLWUEREx6UQRER0XArBCCTNknSdpAWSjmg59gmS7in3WbQZd0NJP5J0taT5kg5rMfaqki6R9OsS++Ntxe7JYYKkX0n6/hjEvlnSlZKukDSv5djrSDpT0rWSrpH0ipbiblXe79DrIUnvaSN2if/e8t/aVZJOlbRqi7EPK3Hnt/meR2Q7r2Evqs7tG4BNgYnAr4EZLcbfEdgGuKrl970+sE2ZngT8tq33DQhYs0yvAlwMbN/y+z8cOAX4fptxS+ybgcltxy2xTwIOKNMTgXXGIIcJwF1UNz21EW8qcBOwWpk/Hdi/pdgvBK4CVqe6YOcCYPOx+NsPvXJG0F+d4TEaY/t/qK6iapXtO21fXqYfBq6h+h+mjdi2/fsyu0p5tXYlg6RpwN8Ax7cV85lA0tpUPzzmANh+zPaDY5DKLsANtm9pMebKwGrlHqbVgTtaivsXwMW2/2B7MXARsFdLsftKIehvKnBbz/xCWvpCfKaQNB14KdUv87ZiTpB0BXAPcL7t1mIDXwL+CXiyxZi9DJwn6bIypEpbNgEWAd8szWLHS1qjxfhD9gVObSuY7duBzwO3AncCv7N9XkvhrwJeLWk9SasDf83Tb75tXQpB/BlJawLfAd5j+6G24tp+wvbWVHehbyvphW3ElfQ64B7bl7URbwSvsr0N1Wi9h0rasaW4K1M1Q/677ZcCjwBt94lNBPYAzmgx5rpUZ/mbABsAa0h6axuxbV9DNdLyecAPgSuAJ9qIPZIUgv7qDI8xLklahaoI/Ifts8Yih9I08SNgVkshXwnsIelmqmbAnSWd3FJs4KlfqNi+BzibqnmyDQuBhT1nX2dSFYY27Q5cbvvuFmPuCtxke5Htx4GzgB3aCm57ju2X2d4ReICqP27MpBD0V2d4jHGnDAE+B7jG9hdbjj1F0jplejVgN+DaNmLb/qDtabanU/2tL7Tdyq9DAElrSJo0NA38JVXzQeNs3wXcJmmrsmgX4Oo2YvfYjxabhYpbge0lrV7+u9+Fqk+sFZKeU/7diKp/4JS2YvezQow+2jaPMDxGW/ElnQq8FpgsaSHwMdtzWgj9SuBtwJWlrR7gQ7bPaSH2+sBJ5YFGKwGn2279Ms4x8lzg7PIojpWBU2z/sMX47wb+o/zouRF4Z1uBS+HbDTi4rZgAti+WdCZwObAY+BXtDvnwHUnrAY8Dh45RB/1TMsRERETHpWkoIqLjUggiIjouhSAiouNSCCIiOi6FICKi41IIIiI6LoUgIqLj/hdRB2LXFx7MKAAAAABJRU5ErkJggg==\n",
-      "text/plain": [
-       "<Figure size 432x288 with 1 Axes>"
-      ]
-     },
-     "metadata": {
-      "needs_background": "light"
-     },
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "import numpy as np\n",
     "objects = [str(x) for x in range(10)]\n",
@@ -529,7 +133,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -548,39 +152,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Stopping http://0.0.0.0:8081\n",
-      "Serving '/tmp/LFCW1A1.onnx' at http://0.0.0.0:8081\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "\n",
-       "        <iframe\n",
-       "            width=\"100%\"\n",
-       "            height=\"400\"\n",
-       "            src=\"http://0.0.0.0:8081/\"\n",
-       "            frameborder=\"0\"\n",
-       "            allowfullscreen\n",
-       "        ></iframe>\n",
-       "        "
-      ],
-      "text/plain": [
-       "<IPython.lib.display.IFrame at 0x7f3a27be9ac8>"
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "showInNetron('/tmp/LFCW1A1.onnx')"
    ]
@@ -603,27 +177,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "input: \"37\"\n",
-       "input: \"38\"\n",
-       "output: \"40\"\n",
-       "op_type: \"MatMul\""
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "from finn.core.modelwrapper import ModelWrapper\n",
     "model = ModelWrapper(export_onnx_path)\n",
-    "model.graph.node[9]"
+    "model.graph.node[8]"
    ]
   },
   {
@@ -635,28 +195,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([[-1., -1.,  1., ..., -1.,  1., -1.],\n",
-       "       [ 1.,  1., -1., ...,  1., -1.,  1.],\n",
-       "       [-1., -1., -1., ...,  1., -1.,  1.],\n",
-       "       ...,\n",
-       "       [ 1., -1., -1., ..., -1., -1.,  1.],\n",
-       "       [ 1., -1., -1., ...,  1.,  1.,  1.],\n",
-       "       [ 1., -1.,  1., ...,  1., -1.,  1.]], dtype=float32)"
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "model.get_initializer(model.graph.node[9].input[1])"
+    "model.get_initializer(model.graph.node[8].input[1])"
    ]
   },
   {
@@ -668,42 +211,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<DataType.BIPOLAR: 34>"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "model.get_tensor_datatype(model.graph.node[9].input[1])"
+    "model.get_tensor_datatype(model.graph.node[8].input[1]).name"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[784, 1024]"
-      ]
-     },
-     "execution_count": 20,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "model.get_tensor_shape(model.graph.node[9].input[1])"
+    "model.get_tensor_shape(model.graph.node[8].input[1])"
    ]
   },
   {
@@ -715,7 +236,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -729,39 +250,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Stopping http://0.0.0.0:8081\n",
-      "Serving '/tmp/LFCW1A1-clean.onnx' at http://0.0.0.0:8081\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "\n",
-       "        <iframe\n",
-       "            width=\"100%\"\n",
-       "            height=\"400\"\n",
-       "            src=\"http://0.0.0.0:8081/\"\n",
-       "            frameborder=\"0\"\n",
-       "            allowfullscreen\n",
-       "        ></iframe>\n",
-       "        "
-      ],
-      "text/plain": [
-       "<IPython.lib.display.IFrame at 0x7f3a27b49e10>"
-      ]
-     },
-     "execution_count": 22,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "showInNetron('/tmp/LFCW1A1-clean.onnx')"
    ]
@@ -775,22 +266,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([[-1.3736125, -3.5715756,  0.1768887, -1.9529207, -2.1233053,\n",
-       "        -3.9293835, -2.1914592, -3.9634604, -0.7772659, -1.9869976]],\n",
-       "      dtype=float32)"
-      ]
-     },
-     "execution_count": 23,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "import finn.core.onnx_exec as oxe\n",
     "input_dict = {\"0\": nph.to_array(input_tensor)}\n",
@@ -802,20 +280,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "np.isclose(produced, produced_finn).all()"
    ]
@@ -844,7 +311,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.8"
+   "version": "3.8.5"
   }
  },
  "nbformat": 4,
diff --git a/notebooks/end2end_example/bnn-pynq/cnv_end2end_example.ipynb b/notebooks/end2end_example/bnn-pynq/cnv_end2end_example.ipynb
index b49c5f3c3eb68961f08041a2c51a46bf66452c81..2d668f3e041e54bd82e79a79efca8a82210bfcbc 100644
--- a/notebooks/end2end_example/bnn-pynq/cnv_end2end_example.ipynb
+++ b/notebooks/end2end_example/bnn-pynq/cnv_end2end_example.ipynb
@@ -203,7 +203,7 @@
     "model = model.transform(MergeONNXModels(pre_model))\n",
     "# add input quantization annotation: UINT8 for all BNN-PYNQ models\n",
     "global_inp_name = model.graph.input[0].name\n",
-    "model.set_tensor_datatype(global_inp_name, DataType.UINT8)"
+    "model.set_tensor_datatype(global_inp_name, DataType[\"UINT8\"])"
    ]
   },
   {
diff --git a/notebooks/end2end_example/bnn-pynq/tfc_end2end_example.ipynb b/notebooks/end2end_example/bnn-pynq/tfc_end2end_example.ipynb
index 5ed4b170b4eeee4b438d9539d2317a7d5eab5df2..a1a8450225f6bd375443a10a66739ca9dd00017e 100644
--- a/notebooks/end2end_example/bnn-pynq/tfc_end2end_example.ipynb
+++ b/notebooks/end2end_example/bnn-pynq/tfc_end2end_example.ipynb
@@ -384,7 +384,7 @@
     "model = model.transform(MergeONNXModels(pre_model))\n",
     "# add input quantization annotation: UINT8 for all BNN-PYNQ models\n",
     "global_inp_name = model.graph.input[0].name\n",
-    "model.set_tensor_datatype(global_inp_name, DataType.UINT8)\n",
+    "model.set_tensor_datatype(global_inp_name, DataType[\"UINT8\"])\n",
     "\n",
     "model.save(build_dir+\"/tfc_w1_a1_with_preproc.onnx\")\n",
     "showInNetron(build_dir+\"/tfc_w1_a1_with_preproc.onnx\")"
@@ -1799,7 +1799,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.8"
+   "version": "3.8.5"
   }
  },
  "nbformat": 4,
diff --git a/notebooks/end2end_example/cybersecurity/1-train-mlp-with-brevitas.ipynb b/notebooks/end2end_example/cybersecurity/1-train-mlp-with-brevitas.ipynb
index e0ce00c1beefe8172ac5fd2aeaaa076b9bb574c1..2c9f4a99ed3edd05a8e8d32db2fe6bcdad204716 100644
--- a/notebooks/end2end_example/cybersecurity/1-train-mlp-with-brevitas.ipynb
+++ b/notebooks/end2end_example/cybersecurity/1-train-mlp-with-brevitas.ipynb
@@ -98,7 +98,7 @@
     "\n",
     "Following Murovic and Trost's open-source implementation provided as a Matlab script [here](https://github.com/TadejMurovic/BNN_Deployment/blob/master/cybersecurity_dataset_unswb15.m), we've created a [Python version](dataloader_quantized.py).\n",
     "\n",
-    "<font color=\"red\">**FPGA'21 tutorial:** Downloading the original dataset and quantizing it can take some time, so we provide a download link to the pre-quantized version for your convenience. </font>"
+    "<font color=\"red\">**Live FINN tutorial:** Downloading the original dataset and quantizing it can take some time, so we provide a download link to the pre-quantized version for your convenience. </font>"
    ]
   },
   {
@@ -110,16 +110,16 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "--2021-05-10 18:14:00--  https://zenodo.org/record/4519767/files/unsw_nb15_binarized.npz?download=1\n",
+      "--2021-10-12 15:49:17--  https://zenodo.org/record/4519767/files/unsw_nb15_binarized.npz?download=1\n",
       "Resolving zenodo.org (zenodo.org)... 137.138.76.77\n",
       "Connecting to zenodo.org (zenodo.org)|137.138.76.77|:443... connected.\n",
       "HTTP request sent, awaiting response... 200 OK\n",
       "Length: 13391907 (13M) [application/octet-stream]\n",
       "Saving to: â€˜unsw_nb15_binarized.npzâ€™\n",
       "\n",
-      "unsw_nb15_binarized 100%[===================>]  12.77M  3.96MB/s    in 3.4s    \n",
+      "unsw_nb15_binarized 100%[===================>]  12.77M  3.56MB/s    in 3.7s    \n",
       "\n",
-      "2021-05-10 18:14:04 (3.77 MB/s) - â€˜unsw_nb15_binarized.npzâ€™ saved [13391907/13391907]\n",
+      "2021-10-12 15:49:22 (3.44 MB/s) - â€˜unsw_nb15_binarized.npzâ€™ saved [13391907/13391907]\n",
       "\n"
      ]
     }
@@ -422,9 +422,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Training loss:   0%|          | 0/10 [00:00<?, ?it/s]/opt/conda/lib/python3.8/site-packages/torch/autograd/__init__.py:130: UserWarning: CUDA initialization: Found no NVIDIA driver on your system. Please check that you have an NVIDIA GPU and installed a driver from http://www.nvidia.com/Download/index.aspx (Triggered internally at  /opt/conda/conda-bld/pytorch_1607370172916/work/c10/cuda/CUDAFunctions.cpp:100.)\n",
-      "  Variable._execution_engine.run_backward(\n",
-      "Training loss = 0.131708 test accuracy = 0.805398: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 10/10 [01:04<00:00,  6.42s/it]\n"
+      "Training loss = 0.132918 test accuracy = 0.798341: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 10/10 [00:44<00:00,  4.45s/it]\n"
      ]
     }
    ],
@@ -459,7 +457,7 @@
    "outputs": [
     {
      "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY4AAAEWCAYAAABxMXBSAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAApFUlEQVR4nO3df5xddX3n8dd77vzK/LyTzJCfMyRAAAPi3O6AP6i0KrbAtsB2qYK/oGtLbYvVRa2ou9pl5bFWWrXuUhdWxVqpqKglVRCsAtJWMIGEQIKBGAJJSMjk9ySTzM/P/nHPTO5MJsncZG7unZn38/G4j3vP9/y4n3Mheeec7znfo4jAzMxsvMqKXYCZmU0uDg4zM8uLg8PMzPLi4DAzs7w4OMzMLC8ODjMzy4uDw+w4SLpf0rUTvWyeNfympE0TvV2zYykvdgFmJ4ukfTmTNUAPMJBM/3FE3DXebUXEpYVY1mwycHDYtBERdUOfJW0A/jAi/mX0cpLKI6L/ZNZmNpn4VJVNe0OnfCR9VNJW4E5JTZJ+IKlT0q7k84KcdR6W9IfJ5+sk/aukv06WfUHSpce57CJJP5PUJelfJN0m6Rvj3I9XJd+1W9JqSZfnzLtM0ppku5slfThpb072bbeknZIeleS/F+yo/D+IWdYcYCZwKnA92T8bdybTbcAB4P8cZf3XAmuBZuCzwFck6TiW/UfgF8As4C+Bd4+neEkVwD8DDwKnAO8H7pJ0VrLIV8iejqsHzgV+mrR/CNgEtACzgY8DHofIjsrBYZY1CHwqInoi4kBE7IiI70ZEd0R0AbcAv3GU9V+MiP8XEQPA3wNzyf5FPO5lJbUB5wOfjIjeiPhXYOk4638dUAd8Jln3p8APgGuS+X3AEkkNEbErIp7MaZ8LnBoRfRHxaHgAOzsGB4dZVmdEHByakFQj6XZJL0raC/wMSEtKHWH9rUMfIqI7+ViX57LzgJ05bQAbx1n/PGBjRAzmtL0IzE8+/2fgMuBFSY9Ien3SfiuwDnhQ0npJN43z+2wac3CYZY3+V/aHgLOA10ZEA3BR0n6k008TYQswU1JNTlvrONd9GWgd1T/RBmwGiIhlEXEF2dNY/wR8O2nviogPRcRpwOXAjZLecmK7YVOdg8NsbPVk+zV2S5oJfKrQXxgRLwLLgb+UVJkcFfzuOFd/HOgG/kJShaTfTNa9O9nWOyU1RkQfsJfsqTkk/Y6kM5I+lj1kL08eHPMbzBIODrOxfQGYAWwHHgN+dJK+953A64EdwKeBb5G93+SoIqKXbFBcSrbmvwPeExG/TBZ5N7AhOe32vuR7ABYD/wLsA34O/F1EPDRhe2NTktwPZla6JH0L+GVEFPyIx2y8fMRhVkIknS/pdEllki4BriDbJ2FWMnznuFlpmQN8j+x9HJuAP4mIFcUtyWwkn6oyM7O8+FSVmZnlZVqcqmpubo6FCxcWuwwzs0nliSee2B4RLaPbp0VwLFy4kOXLlxe7DDOzSUXSi2O1+1SVmZnlxcFhZmZ5cXCYmVleHBxmZpYXB4eZmeXFwWFmZnlxcJiZWV4cHEdx78rNfOOxMS9jNjObthwcR/HA6q3c/rNfFbsMM7OS4uA4ivbWNBt3HmD7vmM+R8fMbNooaHBIukTSWknrJN00xvz3SXpa0kpJ/yppSc68jyXrrZX02+Pd5kTKtDUBsPKl3YX8GjOzSaVgwSEpBdxG9lGWS4BrcoMh8Y8R8eqIaAc+C3wuWXcJcDVwDnAJ8HeSUuPc5oQ5d14j5WVixcZdhfoKM7NJp5BHHBcA6yJiffI85LvJPs1sWETszZmsBYYeDnIFcHdE9ETEC8C6ZHvH3OZEmlGZ4lVzG1jhIw4zs2GFDI75wMac6U1J2wiS/kzSr8gecfz5MdYd1zaT7V4vabmk5Z2dnce9E5m2NE9t3M3AoB94ZWYGJdA5HhG3RcTpwEeB/zaB270jIjoioqOl5bDh5Mct05Zmf+8Az2/rmqjSzMwmtUIGx2agNWd6QdJ2JHcDVx5j3Xy3ecLaW91BbmaWq5DBsQxYLGmRpEqynd1LcxeQtDhn8j8CzyeflwJXS6qStAhYDPxiPNucaAtn1ZCuqXA/h5lZomBPAIyIfkk3AA8AKeCrEbFa0s3A8ohYCtwg6WKgD9gFXJusu1rSt4E1QD/wZxExADDWNgu1D8n3kWlN+8oqM7NEQR8dGxH3AfeNavtkzucPHGXdW4BbxrPNQsu0NfHwc53sPdhHQ3XFyfxqM7OSU/TO8ckg05YmAlZt3FPsUszMis7BMQ6vaU0jwYqXfLrKzMzBMQ4N1RWc3lLHyo27i12KmVnROTjGKdtBvpsI3whoZtObg2OcMm1N7Nzfy0s7u4tdiplZUTk4xinTlgbw/RxmNu05OMbpzNn11FSm3EFuZtOeg2OcUmXivAWNrHAHuZlNcw6OPGTamljz8l4O9g0UuxQzs6JxcOQh05qmfzBY/bJvBDSz6cvBkYd2d5CbmTk48nFKfTULmmY4OMxsWnNw5CnT1uQrq8xsWnNw5Km9Nc3Lew7yyt6DxS7FzKwoHBx58o2AZjbdOTjydM68BipTZX6wk5lNWw6OPFWVp1gyr8FHHGY2bTk4jkOmLc2qTbvpHxgsdilmZiedg+M4ZNqaONg3yC+3dhW7FDOzk87BcRwyrWkAP9jJzKalggaHpEskrZW0TtJNY8y/UdIaSask/UTSqUn7myStzHkdlHRlMu9rkl7ImddeyH0Yy4KmGTTXVbqfw8ympfJCbVhSCrgNeCuwCVgmaWlErMlZbAXQERHdkv4E+Czw9oh4CGhPtjMTWAc8mLPeRyLinkLVfiySaG9t8pVVZjYtFfKI4wJgXUSsj4he4G7gitwFIuKhiBh6pN5jwIIxtnMVcH/OciUh05Zmfed+dnf3FrsUM7OTqpDBMR/YmDO9KWk7kvcC94/RfjXwzVFttySntz4vqWqsjUm6XtJyScs7OzvzqXtchm4EdD+HmU03JdE5LuldQAdw66j2ucCrgQdymj8GnA2cD8wEPjrWNiPijojoiIiOlpaWCa/5vAVpJN9BbmbTTyGDYzPQmjO9IGkbQdLFwCeAyyOiZ9TstwHfj4i+oYaI2BJZPcCdZE+JnXR1VeWcNbveRxxmNu0UMjiWAYslLZJUSfaU09LcBSRlgNvJhsa2MbZxDaNOUyVHIUgScCXwzMSXPj6ZtjQrN+5mcDCKVYKZ2UlXsOCIiH7gBrKnmZ4Fvh0RqyXdLOnyZLFbgTrgO8mltcPBImkh2SOWR0Zt+i5JTwNPA83Apwu1D8eSaW1iz4E+Xtixv1glmJmddAW7HBcgIu4D7hvV9smczxcfZd0NjNGZHhFvnsAST0juSLmnt9QVtxgzs5OkJDrHJ6vTW+qoryr3g53MbFpxcJyAsjLxmta0r6wys2nFwXGCMm1p1r7SRXdvf7FLMTM7KRwcJyjTlmZgMHh6055il2JmdlI4OE5Qe2sTACt8P4eZTRMOjhM0s7aShbNq3EFuZtOGg2MCZNqaWPHSbiJ8I6CZTX0OjgnQ3ppmW1cPW/YcLHYpZmYF5+CYALk3ApqZTXUOjglw9pwGqsrL3M9hZtOCg2MCVJaX8er5jb6yysymBQfHBMm0pXl68x56+weLXYqZWUE5OCZIe2sTvf2DPLtlb7FLMTMrKAfHBPGjZM1sunBwTJC5jdXMbqhyB7mZTXkOjgkiiUxrkzvIzWzKc3BMoExbmhd3dLNj3+hHp5uZTR0OjgmUacsOeOh+DjObyhwcE+jV8xtJlcl3kJvZlFbQ4JB0iaS1ktZJummM+TdKWiNplaSfSDo1Z96ApJXJa2lO+yJJjyfb/JakykLuQz5mVKY4e069jzjMbEorWHBISgG3AZcCS4BrJC0ZtdgKoCMizgPuAT6bM+9ARLQnr8tz2v8K+HxEnAHsAt5bqH04Hpm2NCs37mZg0CPlmtnUVMgjjguAdRGxPiJ6gbuBK3IXiIiHIqI7mXwMWHC0DUoS8GayIQPw98CVE1n0icq0NrGvp59fde4rdilmZgVRyOCYD2zMmd6UtB3Je4H7c6arJS2X9JikK5O2WcDuiBh6wPextnnSHRop1/dzmNnUVBKd45LeBXQAt+Y0nxoRHcA7gC9IOj3PbV6fBM/yzs7OCaz26BY119I4o8Id5GY2ZRUyODYDrTnTC5K2ESRdDHwCuDwihm+AiIjNyft64GEgA+wA0pLKj7bNZL07IqIjIjpaWlpOfG/GSRLtrWl3kJvZlFXI4FgGLE6ugqoErgaW5i4gKQPcTjY0tuW0N0mqSj43AxcCayL7bNaHgKuSRa8F7i3gPhyXTFuata90sa+n/9gLm5lNMgULjqQf4gbgAeBZ4NsRsVrSzZKGrpK6FagDvjPqsttXAcslPUU2KD4TEWuSeR8FbpS0jmyfx1cKtQ/HK9PWRASs8lGHmU1B5cde5PhFxH3AfaPaPpnz+eIjrPfvwKuPMG892Su2Slb7gjQAKzbu5g1nNBe3GDOzCVYSneNTTWNNBae31PrKKjObkhwcBdLe2sSKl3aT7ZYxM5s6HBwFkmlLs2N/L5t2HSh2KWZmE8rBUSBDNwI+6dNVZjbFODgK5KzZ9cyoSPlGQDObchwcBVKeKuO8BY1+IqCZTTkOjgLKtDWx5uU9HOwbKHYpZmYTxsFRQO2tafoGgtUv7y12KWZmE8bBUUBDHeQet8rMphIHRwHNbqhmfnqGbwQ0synFwVFg7W1pX1llZlOKg6PAMq1pNu8+wLa9B4tdipnZhHBwFFimrQnAl+Wa2ZTh4Ciwc+Y1UJGST1eZ2ZTh4Ciw6ooUS+Y2sHKjO8jNbGpwcJwEmbYmVm3aQ//AYLFLMTM7YQ6OkyDTlqa7d4DnXtlX7FLMzE6Yg+MkyLQOdZD7dJWZTX4OjpOgdeYMZtZWuoPczKYEB8dJIIlMa9pDj5jZlFDQ4JB0iaS1ktZJummM+TdKWiNplaSfSDo1aW+X9HNJq5N5b89Z52uSXpC0Mnm1F3IfJkqmLc26bfvYc6Cv2KWYmZ2QggWHpBRwG3ApsAS4RtKSUYutADoi4jzgHuCzSXs38J6IOAe4BPiCpHTOeh+JiPbktbJQ+zCRhm4EfMpHHWY2yRXyiOMCYF1ErI+IXuBu4IrcBSLioYjoTiYfAxYk7c9FxPPJ55eBbUBLAWstuPMWNCLhfg4zm/TGFRySaiWVJZ/PlHS5pIpjrDYf2JgzvSlpO5L3AveP8d0XAJXAr3Kab0lOYX1eUtURar5e0nJJyzs7O49RauHVV1dw5in1vrLKzCa98R5x/AyoljQfeBB4N/C1iSpC0ruADuDWUe1zgX8A/iAihu6e+xhwNnA+MBP46FjbjIg7IqIjIjpaWkrjYKU96SCPiGKXYmZ23MYbHEpOKf0e8HcR8fvAOcdYZzPQmjO9IGkbuWHpYuATwOUR0ZPT3gD8EPhERDw21B4RWyKrB7iT7CmxSSHTlmZ3dx8bdnQfe2EzsxI17uCQ9HrgnWT/MgdIHWOdZcBiSYskVQJXA0tHbTQD3E42NLbltFcC3we+HhH3jFpn7lBBwJXAM+Pch6IbHinXD3Yys0lsvMHxQbKniL4fEaslnQY8dLQVIqIfuAF4AHgW+Hay7s2SLk8WuxWoA76TXFo7FCxvAy4Crhvjstu7JD0NPA00A58e5z4U3Rmn1FFXVe4OcjOb1JTv+fakk7wuIvYWpqSJ19HREcuXLy92GQC888uPsedAHz94/xuLXYqZ2VFJeiIiOka3j/eqqn+U1CCpluypoTWSPjLRRU4HmdYmnt3SxYHegWKXYmZ2XMZ7qmpJcoRxJdlLZheRvbLK8tTemmZgMHh6855il2JmdlzGGxwVyX0bVwJLI6IP8DWlx6G9LQ3gBzuZ2aQ13uC4HdgA1AI/S8aUmjR9HKWkua6Ktpk17iA3s0lrXMEREV+MiPkRcVlyD8WLwJsKXNuUlWlLOzjMbNIab+d4o6TPDQ3hIelvyB592HHItKbZuvcgW/YcKHYpZmZ5G++pqq8CXWTvr3gb2dNUdxaqqKmuffhGwN3FLcTM7DiMNzhOj4hPJSPdro+I/wGcVsjCprIlcxuoLC/zg53MbFIab3AckPTrQxOSLgR8nuU4VZaXce68Bg89YmaTUvk4l3sf8HVJjcn0LuDawpQ0PWTamvjGYy/SNzBIRcpP8DWzyWO8V1U9FRGvAc4DzouIDPDmglY2xWXa0vT0D/LLLV3FLsXMLC95/VM3IvbmjFF1YwHqmTaGR8r1jYBmNsmcyDkSTVgV09C8xmpa6qt8ZZWZTTonEhwecuQESCKTPBHQzGwyOWrnuKQuxg4IATMKUtE0kmlr4sE1r7Brfy9NtZXFLsfMbFyOesQREfUR0TDGqz4ixntFlh1BZnjAw91FrcPMLB++DrSIzlvQSJn8KFkzm1wcHEVUU1nO2XMaWOEjDjObRBwcRdbelmblS7sZHPS1BmY2OTg4iizTmqarp5/12/cVuxQzs3EpaHBIukTSWknrJN00xvwbJa2RtErST5IHRA3Nu1bS88nr2pz2/yDp6WSbX5Q0qe8nGboR8Enfz2Fmk0TBgkNSCrgNuBRYAlwjacmoxVYAHRFxHnAP8Nlk3ZnAp4DXAhcAn5LUlKzzJeCPgMXJ65JC7cPJcFpzLQ3V5b4R0MwmjUIecVwArEuGYe8F7gauyF0gIh6KiO5k8jFgQfL5t4EfR8TOiNgF/Bi4RNJcoCEiHouIAL5O9jnok1ZZmWhva/KVVWY2aRQyOOYDG3OmNyVtR/Je4P5jrDs/+XzMbUq6fuiJhZ2dnXmWfnK1t6Z57pUu9vX0F7sUM7NjKonOcUnvAjqAWydqmxFxR0R0RERHS0vLRG22IDJtaQYDVm3aXexSzMyOqZDBsRlozZlekLSNIOli4BPA5RHRc4x1N3PodNYRtznZtC9IA76D3Mwmh0IGxzJgsaRFkiqBq4GluQtIygC3kw2NbTmzHgB+S1JT0in+W8ADEbEF2CvpdcnVVO8B7i3gPpwUTbWVnNZc6w5yM5sUCjbeVET0S7qBbAikgK9GxGpJNwPLI2Ip2VNTdcB3kqtqX4qIyyNip6T/STZ8AG6OiJ3J5z8FvkZ2kMX7OdQvMqm1t6X52XPbiQgm+RXGZjbFFXSgwoi4D7hvVNsncz5ffJR1vwp8dYz25cC5E1hmSci0NfG9JzezadcBWmfWFLscM7MjKonOccveQQ543CozK3kOjhJx1px6qivKWOl+DjMrcQ6OElGRKuO8+Wk/g9zMSp6Do4Rk2tKs3ryXnv6BYpdiZnZEDo4SkmlL0zswyJqX9xa7FDOzI3JwlJChkXJ9P4eZlTIHRwmZ3VDN3MZqX1llZiXNwVFiMm1pVrqD3MxKmIOjxGRam9i48wCdXT3HXtjMrAgcHCUm05YGPOChmZUuB0eJOXd+I+Vl8oOdzKxkOThKTHVFilfNbfCVVWZWshwcJSjTlmbVpt0MDEaxSzEzO4yDowRl2tLs7x3g+W1dxS7FzOwwDo4SlGn1jYBmVrocHCXo1Fk1NNVUuIPczEqSg6MESSLT1uQjDjMrSQ6OEtXemmZd5z72HuwrdilmZiM4OEpUpi1NBKzauKfYpZiZjVDQ4JB0iaS1ktZJummM+RdJelJSv6SrctrfJGllzuugpCuTeV+T9ELOvPZC7kOxvKY1jYT7Ocys5JQXasOSUsBtwFuBTcAySUsjYk3OYi8B1wEfzl03Ih4C2pPtzATWAQ/mLPKRiLinULWXgobqCs5oqfNIuWZWcgp5xHEBsC4i1kdEL3A3cEXuAhGxISJWAYNH2c5VwP0R0V24UktTpi3Nipd2EeEbAc2sdBQyOOYDG3OmNyVt+boa+OaotlskrZL0eUlVY60k6XpJyyUt7+zsPI6vLb5MWxO7uvt4cce0y0wzK2El3TkuaS7wauCBnOaPAWcD5wMzgY+OtW5E3BERHRHR0dLSUvBaC6G9NQ3ACj+fw8xKSCGDYzPQmjO9IGnLx9uA70fE8DWpEbElsnqAO8meEpuSzpxdT01lipW+n8PMSkghg2MZsFjSIkmVZE85Lc1zG9cw6jRVchSCJAFXAs+ceKmlKVUmXrMg7Q5yMyspBQuOiOgHbiB7mulZ4NsRsVrSzZIuB5B0vqRNwO8Dt0taPbS+pIVkj1geGbXpuyQ9DTwNNAOfLtQ+lIJMW5o1L+/lYN9AsUsxMwMKeDkuQETcB9w3qu2TOZ+XkT2FNda6GxijMz0i3jyxVZa2TFsT/YPBM5v30LFwZrHLMTMr7c5xO9RBvmyDO8jNrDQ4OEpcS30V58xr4K8fXMv/uv9ZDvT6lJWZFZeDYxK46w9fy1W/toDbH1nPWz//CA+v3VbsksxsGnNwTALpmkr+6qrz+Nb1r6OyvIzr7lzG+7+5gs6unmKXZmbTkINjEnntabO4/wNv5IMXL+aBZ7bylr95mG/+4iUG/WxyMzuJHByTTFV5ig9efCb3f/CNvGpuAx/73tO8/Y6f8/wrfj65mZ0cDo5J6vSWOu6+/nV89qrzeH7bPi774qP8zYNrfb+HmRWcg2MSk8TbOlr5yY2/we+eN4///dN1XPq3j/Lv67YXuzQzm8IcHFPArLoqPvf2dr7x3tcyGME7vvw4N357JTv39xa7NDObghwcU8ivL27mgQ9exJ+96XSWrnyZt/zNw9zzxCY/z8PMJpSDY4qprkjxkd8+mx/++Rs5raWOD3/nKd7x/x5nfee+YpdmZlOEg2OKOmtOPd/549dzy386l2de3sMlf/soX/zJ8/T2H+1hi2Zmx+bgmMLKysQ7X3sqP7nxN3jrktl87sfPcdkXH2XZhp3FLs3MJjEHxzRwSkM1t73j17jzuvM50DvA7//fn3PTd1exp7vv2CubmY3i4JhG3nT2Kfz4xou4/qLT+M4Tm3jL5x7m3pWb3XluZnlxcEwzNZXlfPyyV7H0hguZl57BB+5eybV3LmPjzu5il2Zmk4SDY5o6Z14j3//TC/nU7y7hiQ07eevnH+FLD/+KvgF3npvZ0Tk4prFUmfiDCxfxLx/6DS5a3MJf/eiX/O7//ldWvOSHRpnZkTk4jLmNM7jjPR3c/u7/wO7uPn7vS//OJ+99hr0H3XluZodzcNiw3z5nDj++8SKuff1C/uGxF3nr5x7h/qe3uPPczEYoaHBIukTSWknrJN00xvyLJD0pqV/SVaPmDUhambyW5rQvkvR4ss1vSaos5D5MN/XVFfzl5efwT396ITNrq/iTu57kj76+nM27DxS7NDMrEQULDkkp4DbgUmAJcI2kJaMWewm4DvjHMTZxICLak9flOe1/BXw+Is4AdgHvnfDijde0pvnnGy7k45edzb+t28FbP/cIX350PXsO+PSV2XRXXsBtXwCsi4j1AJLuBq4A1gwtEBEbknnjupRHkoA3A+9Imv4e+EvgSxNVtB1Snirj+otO59Jz5/Lf732GT//wWT79w2eZ21jNmbPrOWtOffZ9dj1nnFLHjMpUsUs2s5OgkMExH9iYM70JeG0e61dLWg70A5+JiH8CZgG7I6I/Z5vzx1pZ0vXA9QBtbW35VW4jtM6s4c7rzufn63ewatMe1m7tYu3WLn6+fsfw2FcSLJxVy5mz6zhrdj1nzskGysLmWipS7kozm0oKGRwn6tSI2CzpNOCnkp4G9ox35Yi4A7gDoKOjw727J0gSbzi9mTec3jzc1j8wyIs7u3luaxdrX+niuVeygfLjNa8w9Bj0ipQ4vaVu+AjlrOR9fnoGZWUq0t6Y2YkoZHBsBlpzphckbeMSEZuT9/WSHgYywHeBtKTy5Kgjr23axCpPlXF6Sx2nt9Rx6avnDrcf7BvgV537kiDJvj/x4i6WPvXy8DI1lSkWz67nrNkjQ6WlvorsGUkzK1WFDI5lwGJJi8j+5X41h/omjkpSE9AdET2SmoELgc9GREh6CLgKuBu4Fri3INXbcauuSHHOvEbOmdc4or3rYB/Pb9s3fISydmsXP/3lNr69fNPwMk01FSP7T+bUc+Yp9TTWVJzs3TCzI1Ahr9GXdBnwBSAFfDUibpF0M7A8IpZKOh/4PtAEHAS2RsQ5kt4A3A4Mkr3y6wsR8ZVkm6eRDY2ZwArgXRHRc7Q6Ojo6Yvny5QXZRztx2/f18NwrXUmg7Bv+3NXTP7zMnIbqpN8ke4RyWkstp86qZVZtpY9QzApE0hMR0XFY+3S4ucvBMflEBFv2HBw+Mhk6Snl+274RD6OqrypnYXNt9jWrhoWzDn2e6VAxOyFHCo5S7hy3aUwS89IzmJeewZvOOmW4fWAweGlnNxu27+eF7ft5ccd+XtjRzVMbd/PDVS8Pd8oD1FeXs6g5e2SyaFYNC4c+N9fSVFPhUDE7Tg4Om1RSZWJRc/Yv/zeNmtfbP8imXd1s2LGfF7Z3Z0Nl+35Wbtx1WKg0VCdHKskRyqLmmiRgammq9WAEZkfj4LApo7K8jNNa6jitpe6web39g2zclXukkg2YJ1/axQ9GhUrjjIrsaa/hYMmeAlvUXEu6xqFi5uCwaaGy/NClw6P19A+wcecBNmzfz4YdyWt79/AlxLndgOmaiuFTX22zapnXWM3sxmrmNlYzt2EGDTPKfQrMpjwHh017VeUpzjiljjNOOVKodI849bVhx36WbdjFvaNCBWBGRYo5jdXMaciGyZyhV0M1cxtnMKexmlm1lb750SY1B4fZUWRDpZ4zTqk/bF7fwCDbunrYuucAW/YcZGvy2rI3+/74Czt5Ze9B+gdHpktFSpxSnw2W2Y3VzG3Ihks2WKqY0ziDU+qrPFSLlSwHh9lxqkiVMT89g/npGUdcZnAw2L6/ZzhUtu49yJY9B3llT/Z9zct7+cmzr3Cwb+Q4nxK01FWNOnpJgqVhxvDRTHWFB5a0k8/BYVZAZWXZo4tT6qs5b8HYy0QEew/0s2XvyCOXoaOXDTv289j6Hew92H/YuumaCprrqqitTFFbVU5NZTm1Vanse9I2PF2VorayPFkuNfxel6xXWe4jHBsfB4dZkUmisaaCxpoKzp7TcMTl9vf0s3XvwVFHLwfYub+X/T0DdPf28/LuA+zv7R+e7u4dGHcdFSmNCJyaquznmspy6qpSI6Zrq5JQqjwUQo0zKjilvoqZtZWU+zTblObgMJskaqvKj3hl2JEMDgbdfQN09/Szv3eA/T3ZMNnf08/+3n66eway770D7OvpH16uOwmf/T397Oo+MDw9njCSYGZNJS31VbTUV9Fcl31vqauiub6SlrrqpL2SphpfKDAZOTjMprCyMlFXVU5d1cT9UR8YDA6MCqN9Pf3s7u6jc18P27t66NzXQ2dXD9v39fDC9v10dvXQ03/489pSZaK5rnJUuGTfRwdPQ7UvdS4VDg4zy0vqOMIoItjX009n11Cg9NLZdTAJmt7hoPnlli627+s57Eo0yN6LMzJYKocDpjnnvb66nBmVKarLUz6aKRAHh5kVnCTqqyuor64Y887+XIODwZ4Dhx+9DL939bB59wFWbtzNjv09h91Lk6uqvGw4RGZUpqiuSDGjIts2oyI7nW3LnX9omdHzD61TdmidaRhQDg4zKyllZaKptpKm2krOnH34/TO5BgaDnft7h4Nle1cP+3v7OdA7wIG+7Otg7wAH+wYPTfcNcKB3gN3dfRzsy5mXrHM8hgIqN4yqysuoLC+jKnlVlpdRmSqjqjyV/ZzTPrqtamjZijIqU6kRy45YL5VKlik7qeHl4DCzSStVpuG+kIkQEfT0D3Kgd4CD/YfCJBs2g4c+5wTQUCD19A2OnO4fpLc/2we0c/8gvf2DSdsgvQOD9PQN0DswSN/AxDzaorxMo8IlGzhffk8HC5trJ+Q7hr9rQrdmZjaJSRo+YjhZBgcjGyT9g/T0D2SD5bCQGaR3YCB5H1p2MGfZgZHrDLUNDDKjcuL3xcFhZlZEZWWiumworCbHI5J9l46ZmeXFwWFmZnkpaHBIukTSWknrJN00xvyLJD0pqV/SVTnt7ZJ+Lmm1pFWS3p4z72uSXpC0Mnm1F3IfzMxspIL1cUhKAbcBbwU2AcskLY2INTmLvQRcB3x41OrdwHsi4nlJ84AnJD0QEbuT+R+JiHsKVbuZmR1ZITvHLwDWRcR6AEl3A1cAw8ERERuSeSPGIoiI53I+vyxpG9AC7C5gvWZmNg6FPFU1H9iYM70pacuLpAuASuBXOc23JKewPi9pYi7gNjOzcSnpznFJc4F/AP4gIoaOSj4GnA2cD8wEPnqEda+XtFzS8s7OzpNSr5nZdFDI4NgMtOZML0jaxkVSA/BD4BMR8dhQe0Rsiawe4E6yp8QOExF3RERHRHS0tLQc1w6YmdnhCtnHsQxYLGkR2cC4GnjHeFaUVAl8H/j66E5wSXMjYouy4ytfCTxzrO098cQT2yW9mGf9Q5qB7ce57lTk3+MQ/xYj+fcYaSr8HqeO1ag42tCSJ0jSZcAXgBTw1Yi4RdLNwPKIWCrpfLIB0QQcBLZGxDmS3kX2aGJ1zuaui4iVkn5KtqNcwErgfRGxr4D7sDwiOgq1/cnGv8ch/i1G8u8x0lT+PQoaHFPBVP6Pfzz8exzi32Ik/x4jTeXfo6Q7x83MrPQ4OI7tjmIXUGL8exzi32Ik/x4jTdnfw6eqzMwsLz7iMDOzvDg4zMwsLw6OozjW6L7ThaRWSQ9JWpOMWPyBYtdUCiSlJK2Q9INi11JsktKS7pH0S0nPSnp9sWsqFkn/Nflz8oykb0qqLnZNE83BcQQ5o/teCiwBrpG0pLhVFU0/8KGIWAK8Dvizafxb5PoA8GyxiygRfwv8KCLOBl7DNP1dJM0H/hzoiIhzyd7DdnVxq5p4Do4jGx7dNyJ6gaHRfaedZJiXJ5PPXWT/Ush7wMqpRNIC4D8CXy52LcUmqRG4CPgKQET05jwCYToqB2ZIKgdqgJeLXM+Ec3Ac2YSM7jvVSFoIZIDHi1xKsX0B+Atg8BjLTQeLgE7gzuTU3Zcl1Ra7qGKIiM3AX5N91tAWYE9EPFjcqiaeg8PGTVId8F3ggxGxt9j1FIuk3wG2RcQTxa6lRJQDvwZ8KSIywH5gWvYJSmoie2ZiETAPqE2GUJpSHBxHdkKj+041kirIhsZdEfG9YtdTZBcCl0vaQPYU5pslfaO4JRXVJmBTRAwdhd5DNkimo4uBFyKiMyL6gO8BbyhyTRPOwXFkw6P7JqP1Xg0sLXJNRZGMRPwV4NmI+Fyx6ym2iPhYRCyIiIVk/7/4aURMuX9VjldEbAU2SjoraXoLOU/6nGZeAl4nqSb5c/MWpuCFAoUcVn1Si4h+STcAD3BodN/Vx1htqroQeDfwtKSVSdvHI+K+4pVkJeb9wF3JP7LWA39Q5HqKIiIel3QP8CTZqxFXMAWHHvGQI2ZmlhefqjIzs7w4OMzMLC8ODjMzy4uDw8zM8uLgMDOzvDg4zI5B0r7kfaGkd0zwtj8+avrfJ3L7ZoXg4DAbv4VAXsGRDHR3NCOCIyKm3F3GNvU4OMzG7zPAGyWtTJ65kJJ0q6RlklZJ+mMASb8p6VFJS0nuoJb0T5KeSJ7TcH3S9hmyo6iulHRX0jZ0dKNk289IelrS23O2/XDOsy/uSu5QRtJnkmemrJL01yf917Fpw3eOm43fTcCHI+J3AJIA2BMR50uqAv5N0tBIqL8GnBsRLyTT/yUidkqaASyT9N2IuEnSDRHRPsZ3/R7QTvbZFs3JOj9L5mWAc8gO1/1vwIWSngX+E3B2RISk9MTuutkhPuIwO36/BbwnGYblcWAWsDiZ94uc0AD4c0lPAY+RHTxzMUf368A3I2IgIl4BHgHOz9n2pogYBFaSPYW2BzgIfEXS7wHdJ7hvZkfk4DA7fgLeHxHtyWtRzrMX9g8vJP0m2VFTXx8RryE7ftGJPE60J+fzAFAeEf1kHz52D/A7wI9OYPtmR+XgMBu/LqA+Z/oB4E+SIeeRdOYRHmDUCOyKiG5JZ5N9/O6QvqH1R3kUeHvSj9JC9gl7vzhSYcmzUhqTgSf/K9lTXGYF4T4Os/FbBQwkp5y+RvY52wuBJ5MO6k7gyjHW+xHwvqQfYi3Z01VD7gBWSXoyIt6Z0/594PXAU0AAfxERW5PgGUs9cK+karJHQjce1x6ajYNHxzUzs7z4VJWZmeXFwWFmZnlxcJiZWV4cHGZmlhcHh5mZ5cXBYWZmeXFwmJlZXv4//lzH8IMQHB8AAAAASUVORK5CYII=\n",
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY4AAAEWCAYAAABxMXBSAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAAofElEQVR4nO3de3Rd5X3m8e+jo5slSzq2JRMsHWMbTIi5SLSGXEsTSlpIO8C0uUCbhLRpmXRKmpY2DWlmpR2mWSuFTpJ2SjowJSSZkFJCksaTQsiNQNKEBBOMb9yM8N1g+Spbsu6/+eNsiSMhyTq2js+R9HzW0tLe776c3z4herz3u/e7FRGYmZlNVVmxCzAzs5nFwWFmZnlxcJiZWV4cHGZmlhcHh5mZ5cXBYWZmeXFwmJ0ASQ9Ium66182zhjdL2jnd+zU7nvJiF2B2qkg6mjNbA/QCg8n8f4mIu6e6r4i4ohDrms0EDg6bMyJi/vC0pK3A70fEd8euJ6k8IgZOZW1mM4kvVdmcN3zJR9JHJL0I3CVpgaRvSuqQdDCZbsnZ5geSfj+Zfp+kH0n6u2TdFyRdcYLrLpf0iKQjkr4r6TZJX5ricbwm+axDkjZJujJn2dskbU72u0vSnyftjcmxHZJ0QNIPJfnvgk3K/4GYZb0KWAicAVxP9v8bdyXzS4FjwD9Osv1rgWeARuAW4E5JOoF1vwz8DFgE/DXwnqkUL6kC+H/At4HFwAeBuyW9OlnlTrKX4+qA84DvJ+1/BuwEmoDTgL8EPA6RTcrBYZY1BPxVRPRGxLGI2B8RX42I7og4AnwC+OVJtt8WEf8nIgaBLwCnk/1DPOV1JS0FLgI+HhF9EfEjYM0U638dMB/4ZLLt94FvAtcmy/uBVZLqI+JgRPw8p/104IyI6I+IH4YHsLPjcHCYZXVERM/wjKQaSbdL2iapE3gESEtKTbD9i8MTEdGdTM7Pc90lwIGcNoAdU6x/CbAjIoZy2rYBzcn0bwFvA7ZJeljS65P2W4EtwLcltUu6aYqfZ3OYg8Msa+y/sv8MeDXw2oioBy5J2ie6/DQd9gALJdXktGWmuO1uIDOmf2IpsAsgIh6LiKvIXsb6N+DepP1IRPxZRKwArgRulPQrJ3cYNts5OMzGV0e2X+OQpIXAXxX6AyNiG7AW+GtJlclZwX+a4uY/BbqBv5BUIenNybb3JPv6HUkNEdEPdJK9NIek35B0VtLHcpjs7clD436CWcLBYTa+zwDzgH3Ao8C3TtHn/g7wemA/8DfAv5J93mRSEdFHNiiuIFvzZ4H3RsTTySrvAbYml90+kHwOwErgu8BR4CfAZyPioWk7GpuV5H4ws9Il6V+BpyOi4Gc8ZlPlMw6zEiLpIklnSiqTdDlwFdk+CbOS4SfHzUrLq4CvkX2OYyfwhxHxRHFLMhvNl6rMzCwvvlRlZmZ5mROXqhobG2PZsmXFLsPMbEZ5/PHH90VE09j2OREcy5YtY+3atcUuw8xsRpG0bbx2X6oyM7O8ODjMzCwvDg4zM8uLg8PMzPLi4DAzs7w4OMzMLC8ODjMzy4uDYxLfWLeLLz067m3MZmZzloNjEt/a+CJ3PNJe7DLMzEqKg2MSrZk02w90c6Crr9ilmJmVDAfHJFpb0gA8ueNQUeswMyslDo5JXNDSQJlgnYPDzGyEg2MStVXlrFxcx5M7DxW7FDOzkuHgOI62TJondxzCL7wyM8tycBxHaybNwe5+th/oLnYpZmYloaDBIelySc9I2iLppnGWf0DSBknrJP1I0qqcZR9NtntG0q9NdZ/TrTXTALifw8xsWMGCQ1IKuA24AlgFXJsbDIkvR8T5EdEG3AJ8Ktl2FXANcC5wOfBZSakp7nNavfq0OqoryhwcZmaJQp5xXAxsiYj2iOgD7gGuyl0hIjpzZmuB4Y6Eq4B7IqI3Il4AtiT7O+4+p1t5qozzmxt8S66ZWaKQwdEM7MiZ35m0jSLpjyQ9T/aM44+Ps+2U9pns93pJayWt7ejoOOGDgOzzHBt3d9I/OHRS+zEzmw2K3jkeEbdFxJnAR4D/No37vSMiVkfE6qamV7xrPS9tS9P0DQzx9J4j01SdmdnMVcjg2AVkcuZbkraJ3ANcfZxt893ntBh+gnydn+cwMytocDwGrJS0XFIl2c7uNbkrSFqZM/vrwHPJ9BrgGklVkpYDK4GfTWWfhdCyYB6LaitZt/1QoT/KzKzklRdqxxExIOkG4EEgBXwuIjZJuhlYGxFrgBskXQb0AweB65JtN0m6F9gMDAB/FBGDAOPts1DHMExS9kFAn3GYmRUuOAAi4n7g/jFtH8+Z/tAk234C+MRU9nkqtGbSfP+ZvXT29FNfXXGqP97MrGQUvXN8pmjLpImADTsPF7sUM7OicnBM0QUtfoLczAwcHFOWrqlkeWOtHwQ0sznPwZGHtkyadR4p18zmOAdHHlpbGth7pJcXO3uKXYqZWdE4OPLQmkkDfpWsmc1tDo48rFpST0VKPOHgMLM5zMGRh6ryFKtOr/cZh5nNaQ6OPLVm0mzYeZjBIXeQm9nc5ODIU1smTVffIFv2Hi12KWZmReHgyJM7yM1srnNw5Gn5olrqq8s9xLqZzVkOjjyVlYnWTNpDrJvZnOXgOAGtLWmeeekIx/oGi12Kmdkp5+A4AW2ZNINDwcbdHinXzOYeB8cJuCCTHSnXHeRmNhc5OE7A4rpqmtPzPMS6mc1JDo4TNDxSrpnZXOPgOEGtmQZ2HjzGvqO9xS7FzOyUKmhwSLpc0jOStki6aZzlN0raLGm9pO9JOiNpf4ukdTk/PZKuTpZ9XtILOcvaCnkME2ltSQOw3s9zmNkcU7DgkJQCbgOuAFYB10paNWa1J4DVEXEBcB9wC0BEPBQRbRHRBlwKdAPfztnuw8PLI2JdoY5hMue3NFAm/DyHmc05hTzjuBjYEhHtEdEH3ANclbtCEhDdyeyjQMs4+3k78EDOeiWhprKcs0+rY91O35JrZnNLIYOjGdiRM78zaZvI+4EHxmm/BviXMW2fSC5vfVpS1Xg7k3S9pLWS1nZ0dORT95S1ZdI86VfJmtkcUxKd45LeDawGbh3TfjpwPvBgTvNHgXOAi4CFwEfG22dE3BERqyNidVNTU0HqbsukOXysn637S+pkyMysoAoZHLuATM58S9I2iqTLgI8BV0bE2FuU3gl8PSL6hxsiYk9k9QJ3kb0kVhQeKdfM5qJCBsdjwEpJyyVVkr3ktCZ3BUkXAreTDY294+zjWsZcpkrOQpAk4Gpg4/SXPjVnn1ZHTWXKz3OY2ZxSXqgdR8SApBvIXmZKAZ+LiE2SbgbWRsQaspem5gNfyeYA2yPiSgBJy8iesTw8Ztd3S2oCBKwDPlCoYzieVJk4r7nBwWFmc0rBggMgIu4H7h/T9vGc6csm2XYr43SmR8Sl01jiSWvLpPn8f2ylb2CIyvKS6DIyMyso/6U7SW2ZNH2DQzy1p7PYpZiZnRIOjpM00kHuJ8jNbI5wcJykJQ3VNM6vcj+Hmc0ZDo6TJMkj5ZrZnOLgmAZtmQbaO7o4fKz/+Cubmc1wDo5pMNzPscHjVpnZHODgmAYXJEOsr9txsLiFmJmdAg6OadAwr4IVTbWs2+EzDjOb/Rwc02S4g9wj5ZrZbOfgmCZtmTT7jvay+3BPsUsxMysoB8c0GX6VrEfKNbPZzsExTV5zej2VqTIHh5nNeg6OaVJZXsaqJfU84eAws1nOwTGN2jJpNuw8zMDgULFLMTMrGAfHNGrLpDnWP8hze48WuxQzs4JxcEwjv0rWzOYCB8c0WraohoZ5FR5i3cxmNQfHNJJEaybNE9sPFbsUM7OCcXBMs7aWBp596QjdfQPFLsXMrCAKGhySLpf0jKQtkm4aZ/mNkjZLWi/pe5LOyFk2KGld8rMmp325pJ8m+/xXSZWFPIZ8tS1NMxSwcZdfJWtms1PBgkNSCrgNuAJYBVwradWY1Z4AVkfEBcB9wC05y45FRFvyc2VO+98Cn46Is4CDwPsLdQwnwiPlmtlsV8gzjouBLRHRHhF9wD3AVbkrRMRDEdGdzD4KtEy2Q0kCLiUbMgBfAK6ezqJPVuP8KloWzONJj5RrZrNUIYOjGdiRM78zaZvI+4EHcuarJa2V9Kikq5O2RcChiBjuQJhwn5KuT7Zf29HRcUIHcKL8Klkzm81KonNc0ruB1cCtOc1nRMRq4LeBz0g6M599RsQdEbE6IlY3NTVNY7XH15ZJs+vQMTqO9J7SzzUzOxUKGRy7gEzOfEvSNoqky4CPAVdGxMhf2ojYlfxuB34AXAjsB9KSyifbZ7H5QUAzm80KGRyPASuTu6AqgWuANbkrSLoQuJ1saOzNaV8gqSqZbgTeCGyO7FuSHgLenqx6HfCNAh7DCTlvSQOpMvlBQDOblQoWHEk/xA3Ag8BTwL0RsUnSzZKG75K6FZgPfGXMbbevAdZKepJsUHwyIjYnyz4C3ChpC9k+jzsLdQwnal5lilefVud+DjOblcqPv8qJi4j7gfvHtH08Z/qyCbb7MXD+BMvayd6xVdJaM2n+ff1uhoaCsjIVuxwzs2lTEp3js9GFmTSdPQNs3d9V7FLMzKaVg6NAhjvIfbnKzGYbB0eBnLV4PrWVKd9ZZWazjoOjQFJl4vyWBp9xmNms4+AooNZMms17OukdGCx2KWZm08bBUUAXZtL0DwZP7TlS7FLMzKaNg6OARjrIt3ukXDObPRwcBfSq+moW11Xx5E6PlGtms4eDo4Ak0ZZJ+84qM5tVHBwF1ppJ076vi8Pd/cUuxcxsWjg4CqxteKRcD3hoZrOEg6PAzm9pQPIQ62Y2ezg4Cqy+uoIzm+b7QUAzmzUcHKdAa0uaJ3ceIvs6ETOzmc3BcQq0LU2z72gfuw4dK3YpZmYnzcFxCrS1pAGPlGtms4OD4xQ45/Q6KsvL3EFuZrOCg+MUqEiVcd6Sep9xmNms4OA4RVozaTbsOszA4FCxSzEzOylTCg5JtZLKkumzJV0pqWIK210u6RlJWyTdNM7yGyVtlrRe0vcknZG0t0n6iaRNybJ35WzzeUkvSFqX/LRN+WiLqC2Tpqd/iGdfOlrsUszMTspUzzgeAaolNQPfBt4DfH6yDSSlgNuAK4BVwLWSVo1Z7QlgdURcANwH3JK0dwPvjYhzgcuBz0hK52z34YhoS37WTfEYiqrNr5I1s1liqsGhiOgGfhP4bES8Azj3ONtcDGyJiPaI6APuAa7KXSEiHkr2C/Ao0JK0PxsRzyXTu4G9QNMUay1JSxfWsKCmwh3kZjbjTTk4JL0e+B3g35O21HG2aQZ25MzvTNom8n7ggXE++GKgEng+p/kTySWsT0uqmqDg6yWtlbS2o6PjOKUWniRaM2mPWWVmM95Ug+NPgI8CX4+ITZJWAA9NVxGS3g2sBm4d03468H+B342I4V7ljwLnABcBC4GPjLfPiLgjIlZHxOqmptI4WWltSfPsS0fo6h0odilmZidsSsEREQ9HxJUR8bdJJ/m+iPjj42y2C8jkzLckbaNIugz4GHBlRPTmtNeTPbv5WEQ8mlPLnsjqBe4ie0lsRmjLpBkK2LDLL3Yys5lrqndVfVlSvaRaYCOwWdKHj7PZY8BKScslVQLXAGvG7PdC4HayobE3p70S+DrwxYi4b8w2pye/BVyd1DMjDL9K1v0cZjaTTfVS1aqI6CT7h/oBYDnZO6smFBEDwA3Ag8BTwL3JZa6bJV2ZrHYrMB/4SnJr7XCwvBO4BHjfOLfd3i1pA7ABaAT+ZorHUHQLaytZurDGd1aZ2YxWPsX1KpLnNq4G/jEi+iUdd6jXiLgfuH9M28dzpi+bYLsvAV+aYNmlU6y5JLVl0qzdeqDYZZiZnbCpnnHcDmwFaoFHkgf1OgtV1GzWmkmz+3APezt7il2KmdkJmWrn+D9ERHNEvC3pmN4GvKXAtc1KbZkGwA8CmtnMNdXO8QZJnxp+LkLS/yR79mF5OndJA+Vl8vMcZjZjTfVS1eeAI2Q7rd9J9jLVXYUqajarrkhxzul1PuMwsxlrqp3jZ0bEb+XM/3dJ6wpQz5zQ2pJmzbrdDA0FZWUqdjlmZnmZ6hnHMUlvGp6R9EbA70E9QW2ZNEd6B2jf11XsUszM8jbVM44PAF+U1JDMHwSuK0xJs1/uSLlnLZ5f3GLMzPI01buqnoyIVuAC4IKIuBCY0c9TFNOKpvnMryr3E+RmNiPl9QbAiOhMniAHuLEA9cwJqTJxQUuD76wysxnpZF4d617dk9CaSfPUnk56+geLXYqZWV5OJjiOO+SITay1JU3/YLB5jx/AN7OZZdLOcUlHGD8gBMwrSEVzxIVL00B2pNxfWLqguMWYmeVh0uCIiLpTVchcc1p9Na+qr/aDgGY245zMpSo7SW2ZtO+sMrMZx8FRRK2ZNFv3d3Oou6/YpZiZTZmDo4haPVKumc1ADo4iuqAljQRP7vA7yM1s5nBwFNH8qnJWLp7vBwHNbEZxcBRZa0uadTsOEeHHYsxsZihocEi6XNIzkrZIummc5TdK2ixpvaTvJa+kHV52naTnkp/rctp/UdKGZJ//IGlGP8HetjTNga4+dh70YMNmNjMULDgkpYDbgCuAVcC1klaNWe0JYHVEXADcB9ySbLsQ+CvgtcDFwF9JGn5K7p+APwBWJj+XF+oYToXWljQAT7iD3MxmiEKecVwMbImI9ojoA+4BrspdISIeiojuZPZRoCWZ/jXgOxFxICIOAt8BLpd0OlAfEY9G9trOF4GrC3gMBffqV9VRVV7m5znMbMYoZHA0Azty5ncmbRN5P/DAcbZtTqaPu09J1w+/I72joyPP0k+dilQZ5zc3ODjMbMYoic5xSe8GVgO3Ttc+I+KOiFgdEaubmpqma7cF0ZpJs2HXYfoHh4pdipnZcRUyOHYBmZz5lqRtFEmXAR8DroyI3uNsu4uXL2dNuM+Zpi2TpndgiGdePFLsUszMjquQwfEYsFLSckmVwDXAmtwVJF0I3E42NPbmLHoQ+FVJC5JO8V8FHoyIPUCnpNcld1O9F/hGAY/hlBh+layf5zCzmaBgwRERA8ANZEPgKeDeiNgk6WZJVyar3QrMB74iaZ2kNcm2B4D/QTZ8HgNuTtoA/ivwz8AW4Hle7heZsVoWzGNhbSXrth8qdilmZsc16bDqJysi7gfuH9P28ZzpyybZ9nPA58ZpXwucN41lFp2k7Ei5PuMwsxmgJDrHLfs8x3N7j3K0d6DYpZiZTcrBUSJaMw1EwHqfdZhZiXNwlIiRDnKPlGtmJc7BUSLSNZUsW1TjBwHNrOQ5OEpIaybtlzqZWclzcJSQtkyaFzt7ePFwT7FLMTObkIOjhLQm/Rw+6zCzUubgKCGrTq+nIiU/z2FmJc3BUUKqK1K85vR6d5CbWUlzcJSY1pY063ceZnDIr5I1s9Lk4CgxbZk0R3sHaO84WuxSzMzG5eAoMe4gN7NS5+AoMSsaa6mrLndwmFnJcnCUmLIy0drikXLNrHQ5OEpQa6aBp/ccoad/sNilmJm9goOjBLW2pBkYCjbt9oCHZlZ6HBwlqG2kg9zBYWalx8FRghbXV7OkodoPAppZSXJwlCiPlGtmpaqgwSHpcknPSNoi6aZxll8i6eeSBiS9Paf9LZLW5fz0SLo6WfZ5SS/kLGsr5DEUS1smzfYD3Rzo6it2KWZmoxQsOCSlgNuAK4BVwLWSVo1ZbTvwPuDLuY0R8VBEtEVEG3Ap0A18O2eVDw8vj4h1hTmC4modeSPgoaLWYWY2ViHPOC4GtkREe0T0AfcAV+WuEBFbI2I9MDTJft4OPBAR3YUrtfSc39xAmfwEuZmVnkIGRzOwI2d+Z9KWr2uAfxnT9glJ6yV9WlLViRZYymqryjn7tDo/CGhmJaekO8clnQ6cDzyY0/xR4BzgImAh8JEJtr1e0lpJazs6OgpeayG0tqR5cschIjxSrpmVjkIGxy4gkzPfkrTl453A1yOif7ghIvZEVi9wF9lLYq8QEXdExOqIWN3U1JTnx5aGtqVpDnb3s/3AnLpKZ2YlrpDB8RiwUtJySZVkLzmtyXMf1zLmMlVyFoIkAVcDG0++1NLU2pIG3M9hZqWlYMEREQPADWQvMz0F3BsRmyTdLOlKAEkXSdoJvAO4XdKm4e0lLSN7xvLwmF3fLWkDsAFoBP6mUMdQbGefNp95FSkHh5mVlPJC7jwi7gfuH9P28Zzpx8hewhpv262M05keEZdOb5WlqzxVxvnNDb4l18xKSkl3jlt2pNyNuzvpH5zsjmUzs1PHwVHiWjNp+gaGeHrPkWKXYmYGODhK3shIuX6ew8xKhIOjxDWn59Gcnsct33qaf/jecxzp6T/+RmZmBeTgKHGS+MLvXcwbzlzEp77zLJfc8hC3P/w8x/r8dkAzKw7NhaeSV69eHWvXri12GSdt/c5DfOo7z/KDZzponF/FDW85k2tfu5Sq8lSxSzOzWUjS4xGx+hXtDo6ZZ+3WA/zdt5/h0fYDLGmo5oZLV/KO1S1UpHwCaWbTx8Exi4Jj2I+37OPWbz/DE9sPsXRhDX9y2UquamsmVaZil2Zms8BEweF/os5gbzirka/94Ru4630XUVddzo33Psmvfvphvrl+N0NDs/8fBGZWHA6OGU4SbzlnMd/84Jv43+/+BVJl4oYvP8Hb/uGHfGfzSx5Z18ymnYNjlpDE5eedzgMfuoS/v6aNnv5B/uCLa7n6tv/gkWc7HCBmNm0cHLNMqkxc1dbMd2/8ZW75rQvYd7SP937uZ7zr9kf5afv+YpdnZrOAO8dnud6BQe59bAf/6/tb2Hukl19a2ciNbz2bC5cuKHZpZlbifFfVHA2OYT39g3zp0W189gfPc6Crj8tes5g/fevZnLukodilmVmJcnDM8eAY1tU7wOd/vJXbH36ezp4Bfv380/nTt67krMV1xS7NzEqMg8PBMcrhY/3c+cN27vzRCxzrH+TqtmY+dNlKzlhUW+zSzKxEODgcHOM60NXH7Q8/zxd+spX+weAdv9jCB39lJc3pecUuzcyKzMHh4JjU3s4ePvuD5/nyT7cDcO3FGf7oLWexuL66yJWZWbE4OBwcU7Lr0DH+8fvP8ZW1O0mVievesIwP/PKZLKytLHZpZnaKOTgcHHnZtr+Lv//uc3x93S5qKlL83puW8/u/tIKGeRXFLs3MTpGiBIeky4G/B1LAP0fEJ8csvwT4DHABcE1E3JezbBDYkMxuj4grk/blwD3AIuBx4D0R0TdZHQ6OE/fcS0f4zHef49837KGuupxLVjZxbnM95zc3cN6SBhb4TMRs1jrlwSEpBTwLvBXYCTwGXBsRm3PWWQbUA38OrBkTHEcjYv44+70X+FpE3CPpfwNPRsQ/TVaLg+Pkbdp9mP/zSDuPbz/IjgPHRtqb0/M4LwmSc5MwaaqrKmKlZjZdJgqO8gJ+5sXAlohoTwq4B7gKGAmOiNiaLBuayg4lCbgU+O2k6QvAXwOTBoedvHOXNPCZay4E4FB3H5t2d7Jh12E27jrMpt2dPLjppZF1X1VfzXnN9Zy7pCF7ZtLcwGn1VWT/5zOzma6QwdEM7MiZ3wm8No/tqyWtBQaAT0bEv5G9PHUoIgZy9tk83saSrgeuB1i6dGl+lduk0jWVvPGsRt54VuNIW2dPP5t3d7IxCZONuzv53tN7GT6hbZxfxXnN9Zy3JBsk5zXX05ye5zAxm4EKGRwn64yI2CVpBfB9SRuAw1PdOCLuAO6A7KWqAtVoifrqCl63YhGvW7FopK2rd4Cn9mTDZMOuTjbtPswPn9vHYPKukAU1FZzX3JBzZlLP0oU1DhOzElfI4NgFZHLmW5K2KYmIXcnvdkk/AC4EvgqkJZUnZx157dNOrdqqclYvW8jqZQtH2nr6B7NhsruTjTsPs3H3Ye78UTv9g9kwqasuT85K6pMzkwaWL6qlzG81NCsZhQyOx4CVyV1Qu4BreLlvYlKSFgDdEdErqRF4I3BLRISkh4C3k72z6jrgGwWp3gqiuiLFhUsXjBqdt3dgkGdfPMrG3YfZsOswm3Yd5gs/2UbfQLbrq7YyxblLGkbu5jp3SQPLGmuoKk8V6zDM5rRC3477NrK326aAz0XEJyTdDKyNiDWSLgK+DiwAeoAXI+JcSW8AbgeGyL4z5DMRcWeyzxVkQ2Mh8ATw7ojonawO31U18/QPDvHcS9kwGe432bynk57+bJiUCTILa1jeWMuKxvmsaKplRWMtK5rmuyPebJr4AUAHx4w3MDhE+74uNu0+zAsdXTy/r4v2ji5e2Hd0JFAAaipT2UBpms/yxlrObMqGy/KmWuZXlXK3nllpKcbtuGbTqjxVxtmn1XH2aaOHgB8aCl7s7BkJkec7umjf18W6HQf55vrd5P7baHFdFSuaalneOD8bKMl0ZsE8ylN+IabZVDg4bMYrKxNL0vNYkp7Hm1Y2jlrW0z/I9gPdtHdkA+WFfV20dxzlgY17ONTdP7JeRUosXVgzEijDZywrmmpZVFvpS19mORwcNqtVV6TGPUsBONjVR/u+o7QnZyjtHUd5YV8XjzzbQd/gy5e+6qrLWdE0nzMba0ddAlveWMu8SnfQ29zj4LA5a0FtJb9Yu5BfPGPhqPbBoWDXwWMjofLCvi7a9x3lJ+37+doTo+/+flV9NWcsqmHZolrOaEx+L6rhjEXuT7HZy/9lm42RKhNLF9WwdFENb3716GXdfQPJ5a5soGzb3822/V187+m97Ds6+ua+xvlVLEtCZFmyv2WLalm2qJaGGo8ybDOXg8MsDzWV5dlnSpY0vGLZ0d4Btu3PhsnW/V1s29fNtgNd/Pj5fXz15z2j1k3XVIwEytjfC92nYiXOwWE2TeZXTRwqw530W/flBMv+bh7fdpD/9+RuhnLu/KqrKueMxrGBkr0EtrjOz6hY8Tk4zE6ByTrpewcG2XnwGNv2d7F1X3c2YPZ3sXl3Jw9ufJGBnFSZV5FK+lCG+1NqWTS/kqGhYGAoGBz5PUT/4Oj5gaFgcPDl9fqHhkbNj1ovmR8YHMpZFgyM7DPb3p8zX1NZzuK6KhbXV3NafRWnJb8X11WzuL6KRbVVpDx0zKzg4DArsqryFGc2zefMple8foaBwSF2H+pJzlC62Jr0qTzf0cVDT4+++ysfqTJRnvykykR5qmykLfd3xSvas/NVFeU5+8i2He0dYPfhHtbtOMT+rle+Wy1VJprmV2XDpL6axXU54VJfzWlJwCysqfTYZCXOwWFWwspTZSMd9dA0atlg8uDjwa4+ylPDf9zLcsLg5T/qYwOh0Je7+gaG6Djay0udPezt7GXvkR5e6uzhpc5s244D3azdeoCDOc/SjBxzmUaduSyuywmXnLYFNRW+bFckDg6zGSpVJprT82hOzyt2Ka9QWV42pdp6+gfpOJINlr1JqLx05OXAeWFfF4+2H+DwsVcGTGWqjKa6qpHLYsNhU1ddTlV5GVXlqezvipenqytSLy+rKBuZriwv82W0PDg4zKxoqitSZBbWkFlYM+l6Pf2D2WAZFTAvTz+39yg/2rKPIz0Dk+5nMhUpvRw25WVUDYdMRU7bmMAZG0zD4VRZXkZFaswZYHJJcOzZ38hZYWr89orUmPXKVPRLeQ4OMyt51RWpnEt2EzvWN0hX3wC9A0P09g9mf4+dHhiktz873TPSnvzuz5ketd0gR3sH2H+0b2R5T866w68AOFUkXhk8EwTUnddddNzvLV8ODjObNeZVpooyDMzQUNA3ODqYcu9G6x97d9pQzvzgOO2D49/l9vKdba9sf+VnZNerqpj+wTsdHGZmJ6msTFSXpaiuSAGzf1QAjyNtZmZ5cXCYmVleHBxmZpYXB4eZmeWloMEh6XJJz0jaIummcZZfIunnkgYkvT2nvU3STyRtkrRe0rtyln1e0guS1iU/bYU8BjMzG61gd1VJSgG3AW8FdgKPSVoTEZtzVtsOvA/48zGbdwPvjYjnJC0BHpf0YEQcSpZ/OCLuK1TtZmY2sULejnsxsCUi2gEk3QNcBYwER0RsTZaNenomIp7Nmd4taS/ZgXoOFbBeMzObgkJeqmoGduTM70za8iLpYqASeD6n+RPJJaxPS6qaYLvrJa2VtLajoyPfjzUzswmU9AOAkk4H/i9wXUQMn5V8FHiRbJjcAXwEuHnsthFxR7IcSR2Stp1gGY3AvhPcdjby9/Eyfxej+fsYbTZ8H2eM11jI4NgFZHLmW5K2KZFUD/w78LGIeHS4PSL2JJO9ku7ilf0jrxARTcdbZ5I61kbE6hPdfrbx9/Eyfxej+fsYbTZ/H4W8VPUYsFLSckmVwDXAmqlsmKz/deCLYzvBk7MQlB2I/2pg43QWbWZmkytYcETEAHAD8CDwFHBvRGySdLOkKwEkXSRpJ/AO4HZJm5LN3wlcArxvnNtu75a0AdhA9lTwbwp1DGZm9kqKiOOvNYdJuj7pLzH8feTydzGav4/RZvP34eAwM7O8eMgRMzPLi4PDzMzy4uCYxPHG2porJGUkPSRpczJ+2IeKXVMpkJSS9ISkbxa7lmKTlJZ0n6SnJT0l6fXFrqlYJP1p8v+TjZL+RVJ1sWuabg6OCeSMtXUFsAq4VtKq4lZVNAPAn0XEKuB1wB/N4e8i14fI3jFo8PfAtyLiHKCVOfq9SGoG/hhYHRHnASmyjyLMKg6OiY2MtRURfcDwWFtzTkTsiYifJ9NHyP5RyHv4mNlEUgvw68A/F7uWYpPUQPb2+TsBIqIvZ0DSuagcmCepHKgBdhe5nmnn4JjYtIy1NdtIWgZcCPy0yKUU22eAvwCGjrPeXLAc6ADuSi7d/bOk2mIXVQwRsQv4O7Ijf+8BDkfEt4tb1fRzcNiUSZoPfBX4k4joLHY9xSLpN4C9EfF4sWspEeXALwD/FBEXAl3AnOwTlLSA7JWJ5cASoFbSu4tb1fRzcEzspMbamm0kVZANjbsj4mvFrqfI3ghcKWkr2UuYl0r6UnFLKqqdwM6IGD4LvY9skMxFlwEvRERHRPQDXwPeUOSapp2DY2InPNbWbJOMC3Yn8FREfKrY9RRbRHw0IloiYhnZ/y6+HxGz7l+VUxURLwI7JL06afoVct67M8dsB14nqSb5/82vMAtvFCjpYdWLKSIGJA2PtZUCPhcRm46z2Wz1RuA9wAZJ65K2v4yI+4tXkpWYD5IdR64SaAd+t8j1FEVE/FTSfcDPyd6N+ATJ6x1mEw85YmZmefGlKjMzy4uDw8zM8uLgMDOzvDg4zMwsLw4OMzPLi4PD7DgkHU1+L5P029O8778cM//j6dy/WSE4OMymbhmQV3AkA91NZlRwRMSse8rYZh8Hh9nUfRL4JUnrkncupCTdKukxSesl/RcASW+W9ENJa0ieoJb0b5IeT97TcH3S9kmyo6iuk3R30jZ8dqNk3xslbZD0rpx9/yDn3Rd3J08oI+mTyTtT1kv6u1P+7dic4SfHzabuJuDPI+I3AJIAOBwRF0mqAv5D0vBIqL8AnBcRLyTzvxcRByTNAx6T9NWIuEnSDRHRNs5n/SbQRvbdFo3JNo8kyy4EziU7XPd/AG+U9BTwn4FzIiIkpaf30M1e5jMOsxP3q8B7k2FYfgosAlYmy36WExoAfyzpSeBRsoNnrmRybwL+JSIGI+Il4GHgopx974yIIWAd2Utoh4Ee4E5Jvwl0n+SxmU3IwWF24gR8MCLakp/lOe9e6BpZSXoz2VFTXx8RrWTHLzqZ14n25kwPAuURMUD25WP3Ab8BfOsk9m82KQeH2dQdAepy5h8E/jAZch5JZ0/wAqMG4GBEdEs6h+zrd4f1D28/xg+BdyX9KE1k37D3s4kKS96V0pAMPPmnZC9xmRWE+zjMpm49MJhccvo82fdsLwN+nnRQdwBXj7Pdt4APJP0Qz5C9XDXsDmC9pJ9HxO/ktH8deD3wJBDAX0TEi0nwjKcO+IakarJnQjee0BGaTYFHxzUzs7z4UpWZmeXFwWFmZnlxcJiZWV4cHGZmlhcHh5mZ5cXBYWZmeXFwmJlZXv4/QAgzW/yBXxUAAAAASUVORK5CYII=\n",
       "text/plain": [
        "<Figure size 432x288 with 1 Axes>"
       ]
@@ -485,7 +483,7 @@
    "outputs": [
     {
      "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEWCAYAAAB8LwAVAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAAn/klEQVR4nO3de5xdZ13v8c937pOZyaXJ5J4maZs2DRRSGCtYxQtQq2KL4MEUEVCx4KGAqHCKx4NYjp56AeFgX0qBKsqlQFEMnkoFBURuJoVy6eyWpmmhSXaSyXXPJHOf3/ljrZnsTPbM7DTZWXvv+b5fr51Z61nrWfs3+5VZv72eZ63nUURgZmY2XUPWAZiZWXVygjAzs5KcIMzMrCQnCDMzK8kJwszMSnKCMDOzkpwgzMysJCcIq3mSBopeE5IGi9Z/+Ukc7wuSXlWJWM1qSVPWAZidq4jonFyW9Djwqoj4XHYRVZakpogYyzoOq3++grC6JalB0q2SHpV0WNLHJV2UbmuT9KG0/JikHZJWSPoj4MeAv0yvQP5yhmN/QtJ+Sccl/YekpxRta5f0DknfT7f/p6T2dNuPSvpK+p5PSHplWn7aVYukV0r6z6L1kPRaSY8Aj6Rl706PUZB0v6QfK9q/UdLvpb97f7p9naQ7JL1j2u+yXdIbz/0Tt3rjBGH17HXAC4EfB1YDR4E70m2vABYB64ClwGuAwYj4n8CXgFsiojMibpnh2P8CbAKWA98APly07c+BZwI/AlwEvBmYkLQ+rfceoBvYCjxwFr/PC4EfBrak6zvSY1wEfAT4hKS2dNtvAzcBPwssBH4NOAl8ELhJUgOApGXA89L6ZqdxE5PVs9eQnOj3AEh6G/ADSb8CjJIkhssi4tvA/Wdz4Ii4a3I5Pe5RSYuAfpKT8bMiYm+6y1fS/V4KfC4iPpqWH05f5fo/EXGkKIYPFW17h6TfB64AvgW8CnhzRDycbv/W5HtKOg48F/gssA34QkQcOIs4bJ7wFYTVs/XAP6bNOceAHDAOrAD+HrgPuFvSPkl/Kqm5nIOmzTe3p803BeDxdNOy9NUGPFqi6roZysv1xLQ4fldSLm3GOkZyRbSsjPf6IPCydPllJJ+F2RmcIKyePQH8TEQsLnq1RcTeiBiNiD+MiC0kTUEvAF6e1ptriOOXAjeSNM0sAjak5QIOAUPApTPEU6oc4ASwoGh9ZYl9puJK+xveDLwEWBIRi4HjaQxzvdeHgBslPR24EvjUDPvZPOcEYfXsr4E/Stv+kdQt6cZ0+SclXSWpESiQNDlNpPUOAJfMctwuYJikeWgB8MeTGyJiArgLeKek1enVxrMltZL0UzxP0kskNUlaKmlrWvUB4EWSFki6DPj1OX63LmAM6AOaJL2VpK9h0vuBt0vapMTTJC1NY9xD0n/x98AnI2JwjveyecoJwurZu4HtwL9K6ge+RtLJC8k39HtIkkMO+CKnmlreDfyipKOS/m+J4/4d8H1gL9CbHrfY7wLfITkJHwH+BGiIiB+QdBr/Tlr+APD0tM5fACMkyemDnN7pXcp9wGeA76WxDHF6E9Q7gY8D/5r+jh8A2ou2fxC4Cjcv2SzkCYPM5h9JzyFpalofPgnYDHwFYTbPpJ3xbwDe7+Rgs3GCMJtHJF0JHANWAe/KNBirem5iMjOzknwFYWZmJdXNk9TLli2LDRs2ZB2GmVlNuf/++w9FRHepbXWTIDZs2MDOnTuzDsPMrKZI+v5M29zEZGZmJTlBmJlZSU4QZmZWkhOEmZmV5ARhZmYlOUGYmVlJThBmZlZS3TwHYWb1ISIYGp2gf2iUwtAYhaFR+ofGkvXB5OfI2ATdXa2sWtzOqkVtrFzUxsK2siYErBsTE8GhE8McLAwzEcHT1i4+7+9R0QQh6XqSsfUbSUaOvH3a9otJxqVfnO5za0Tcm257C8mkKePA6yPivkrGambnx8jYqZN7/7ST++TJvvikf/r6GIXBUcYmzn6MuM7WJlYuamNV+lq5qH1qedWi9jSJNCFp7oNlKCIoDI1xsDDE/sIQBwrDHCgMTb32F4Y5WBiir3946nN6+rrF/NNrrz3vsVQsQaQzdd0BPB/YA+yQtD0ieot2+33g4xHxV5K2APcCG9LlbcBTgNXA5yRdHhHjlYrXzE6JCAaGxzg8MMLhE8McGhhJlgeGTz+ZT0sEhcFRhscm5jx+Z2sTC9ua6GprpqutieVdbVza3URXWrYwLe9qa2Jhe/Np+y5sa6a5sYGD/UPsPz5E/vgQ+eOD5I8n6/uOD/G9A30c7B9m+likHS2NaRJpPz2RLE4TycJ2FrZXLokMjY6nJ/ph9heGOFh00j+Qru8vDDE0euZnuKi9mRULW1mxsI1Ny5dNLa9Y2MbaJe0l3u3cVfIK4hpgV0TsBpB0N8k8vsUJIjg1TeIiYF+6fCNwd0QMA49J2pUe76sVjNesrg2PjXPkRHKiPzQwPHXyT9ZPLR8eGObQiRFGZjjRtzU3FJ3Am1nU3szaJe1TJ/HpJ/Opk3578rOztYnGhnM/Aa9dsoC1SxbMuH10fIKD/cPsT5NH/tjpyeRLjxziYP8Q0y9WFkwlkVOJZOWiNlanVyGrFrWxqL35tCQyNj7BoYGR9Bv/0Azf/oc5Pjh6RpytTQ2sXJSc6K9au5jndbWyclEbyxe2sWJyuauN9pbGc/7MzlYlE8QaTp8CcQ+npnuc9DaS6SBfB3SQTAI/Wbd4Gsc9adlpJN0M3Axw8cUXn5egzWrFxERwbHA0OaGXOMEfnkoCSULoHxoreZyWpgaWdbSwtLOVpZ0tXL6ii2WdLSztbGFpR1K2rLOVZZ2tLOloprXpwp+onozmxgbWLG5nzeKZv12PpUlkMnFMvyL58q5DHCicmUTamxtZtaiNBa2NHCgMc2jgzKuVxgaxvKuV5Qvb2LC0g2ddsnTqG3/xt/9qbvbKupP6JuBvI+Idkp4N/L2kp5ZbOSLuBO4E6Onp8cQWVhcmOx/3HRsif2yQvceSk9WhgeGpb/6HBkY4enKE8RJt9RJctODUCf4pqxeyrLOVpUVJYFnRyb+ztXpPUJXW1NjA6sXtrF7cDiwpuc/Y+AR9A8NFVyGnEsnJkTGesmoRKxalJ/2utvTbfytLO1rPy5VSliqZIPYC64rW16ZlxX4duB4gIr4qqQ1YVmZds5pUGBpl37FB8seG0pP/IPuODSVl6bfX0fHTT/xtzQ0s72pjaWcLa5csYOu6xSW/4S/tbGHJgpaaPzFVk6bGhrSpqR3mWUNFJRPEDmCTpI0kJ/dtwEun7fMD4LnA36ZTIbYBfcB24COS3knSSb0J+K8Kxmp2XgyPjbP/eHriT0/6+45PnvyTRDAwfHpTT2ODWLkwadveum4xP3PVStYsbp9q/16zuJ3FC5rn7bd8y07FEkREjEm6BbiP5BbWuyLiQUm3ATsjYjvwO8D7JL2RpMP6lekk6g9K+jhJh/YY8FrfwWRZm5gI+gaGk5N+2tQwlQjSk/+hgeEz6i3taGHV4qQd+kcuXcbqxUnn5+rFbaxe3M7yrjZ/47eqVDdzUvf09IQnDLJzNTY+wQ+OnOTRvhPsOjjAroMDPHH0JPuODXKgMHRG009HSyOr0jbs1YuSE/7kt/7Jh7jammujU9fmJ0n3R0RPqW1Zd1KbZWJwZJxH+wZ4tC9JApM/Hz90kpHxU7d3rljYyvqLOuhZvyQ5+S9uZ83kFcCiyt4zb5Y1Jwira0dOjEyd/Cdfj/YNsPfY4NRtiQ2C9Us7uLS7k5/avILLlndyaXcHly7vnHfDN5gVc4KwmjcxEew7Ppie/JOmoUcPDrCrb4AjJ0am9mtrbuCSZZ084+IlvKRnHZct7+Sy5Z2sX7qgZu7tN7uQnCCsZoyMTfD9wydOuxLY1TfAowdPMDh66h6GJQuauWx5J9dtSa8GlndyWXcnaxa30+DOYLOyOUFYVTpyYoTPP3SQXZN9BAcH+P6Rk6c9GLZmcTuXLu/kh665KLka6E6uCJZ2tmYYuVn9cIKwqvS//7mXf/jmXpoaxIZlHVy+ooufvWoVly7v4LLuLi7p7qCj1f99zSrJf2FWlb699zjPubybD7yih+ZGz2tllgX/5VnVGRodZ3ffAFvXLnJyMMuQ//qs6nzvQD8TAVeuWjj3zmZWMU4QVnVy+QLgBGGWNScIqzq5fD8dLY1cfNHMk8GYWeU5QVjV6d1X4IqVXX5mwSxjThBWVSKC3P4CW1a7ecksa04QVlX2HB2kf2jM/Q9mVcAJwqqKO6jNqocThFWV3nwBCTav7Mo6FLN5zwnCqkouX2Dj0g4WtPghf7OsOUFYVcnl+928ZFYlnCCsavQPjfKDIye5cpWbl8yqQUUThKTrJT0saZekW0ts/wtJD6Sv70k6VrRtvGjb9krGadXh4f39gDuozapFxRp6JTUCdwDPB/YAOyRtj4jeyX0i4o1F+78OuLroEIMRsbVS8Vn16fUdTGZVpZJXENcAuyJid0SMAHcDN86y/03ARysYj1W5XL7A4gXNrFrUlnUoZkZlE8Qa4Imi9T1p2RkkrQc2Av9eVNwmaaekr0l64Qz1bk732dnX13eewras9Ob7uXLlQiQPsWFWDaqlk3obcE9EjBeVrY+IHuClwLskXTq9UkTcGRE9EdHT3d19oWK1ChifCB7eX3DzklkVqWSC2AusK1pfm5aVso1pzUsRsTf9uRv4Aqf3T1idefzwCYZGJ3wHk1kVqWSC2AFskrRRUgtJEjjjbiRJm4ElwFeLypZIak2XlwHXAr3T61r96N3nDmqzalOxu5giYkzSLcB9QCNwV0Q8KOk2YGdETCaLbcDdERFF1a8E3itpgiSJ3V5895PVn1y+QFOD2LSiM+tQzCxV0fEMIuJe4N5pZW+dtv62EvW+AlxVydisuuTyBS5b3klrU2PWoZhZqlo6qW2e8xAbZtXHCcIyd+TECPsLQ+6gNqsyThCWuck5ILasWpRxJGZWzAnCMndqkiBfQZhVEycIy1xvvsDyrlaWdrZmHYqZFXGCsMy5g9qsOjlBWKZGxibYddAJwqwaOUFYpnYdHGB0PNiy2gnCrNo4QVimTt3B5A5qs2rjBGGZyuULtDY1sGFpR9ahmNk0ThCWqd58gStWdtHU6P+KZtXGf5WWmYggly9w5Ur3P5hVIycIy8yBwjBHT466g9qsSjlBWGZOPUHtBGFWjZwgLDO9aYLY7DuYzKqSE4RlpjdfYO2Sdha2NWcdipmV4ARhmcnlC2xx85JZ1XKCsEwMjozz+KET7n8wq2JOEJaJhw/0MxHuoDarZhVNEJKul/SwpF2Sbi2x/S8kPZC+vifpWNG2V0h6JH29opJx2oV3aogNJwizatVUqQNLagTuAJ4P7AF2SNoeEb2T+0TEG4v2fx1wdbp8EfAHQA8QwP1p3aOVitcurN59BTpbm1i7pD3rUMxsBpW8grgG2BURuyNiBLgbuHGW/W8CPpou/zTw2Yg4kiaFzwLXVzBWu8By+QJXruqioUFZh2JmM6hkglgDPFG0victO4Ok9cBG4N/Ppq6kmyXtlLSzr6/vvARtlTcxETy033NAmFW7aumk3gbcExHjZ1MpIu6MiJ6I6Onu7q5QaHa+7Tk6yMDwmBOEWZWrZILYC6wrWl+blpWyjVPNS2db12pMb/444DuYzKpdJRPEDmCTpI2SWkiSwPbpO0naDCwBvlpUfB9wnaQlkpYA16VlVgd68/00CK5Y4SE2zKpZxe5iiogxSbeQnNgbgbsi4kFJtwE7I2IyWWwD7o6IKKp7RNLbSZIMwG0RcaRSsdqFlcsX2Lisg/aWxqxDMbNZVCxBAETEvcC908reOm39bTPUvQu4q2LBWWZy+QJb1y3OOgwzm0O1dFLbPHF8cJQ9Rwfd/2BWA5wg7IJ6yE9Qm9UMJwi7oKaG2PAscmZVb8Y+CEkvKqP+UNrPYFaWXL6fizpaWN7VmnUoZjaH2Tqp3wf8EzDbWAjPYVontNlscvuTITYkD7FhVu1mSxD/EhG/NltlSR86z/FYHRsbn+Ch/f28/Fnrsw7FzMowYx9ERLxsrsrl7GM26bFDJxgZm/AdTGY1ouxOakmXSfqQpE9KenYlg7L61OsOarOaMlsndVtEDBUVvR14c7r8aWBrBeOyOpTL99PcKC7t7sw6FDMrw2xXEJ+W9PKi9VFgA7AeOKtRV80gucX1suVdtDT57mqzWjDbX+r1wEJJn5H0HOB3SSby+QXgly9EcFZfetNJgsysNszYxJTOzfCXkv4e+F/AbwK/HxGPXqjgrH4cGhimr3/YT1Cb1ZDZ+iB+GHgTMAL8MTAI/JGkvcDbI+LYBYnQ6kLOQ2yY1ZzZnoN4L/CzQCfwNxFxLbBN0o8DHyNpbjIry2SC8C2uZrVjtgQxRtIp3UFyFQFARHwR+GJlw7J6k8v3s3JhG0s6WrIOxczKNFuCeCnwapLk8PJZ9jObU+8+d1Cb1ZrZOqm/B/zOBYzF6tTw2DiP9g3wvC3Lsw7FzM7CjLe5SvrnuSqXs4/ZIwcGGJsI9z+Y1ZjZmph+VNL2WbYL2HKe47E65A5qs9o0W4K4sYz6I7NtlHQ98G6gEXh/RNxeYp+XAG8DAvhWRLw0LR8HvpPu9oOIuKGMeKwK9eYLtDU3sGFpR9ahmNlZmK0P4pzuVJLUCNwBPB/YA+yQtD0ieov22QS8Bbg2Io5KKm6kHoyIrecSg1WHXL7AFSsX0tjgOSDMakklB8W5BtgVEbsjYgS4mzOvSn4DuCMijgJExMEKxmMZiAhy+X4/IGdWgyqZINYATxSt70nLil0OXC7py5K+ljZJTWqTtDMtf2GpN5B0c7rPzr6+vvMavJ0f+eNDHB8cZYtvcTWrOXMmCEk/L6lSiaQJ2AT8BHAT8D5Ji9Nt6yOih+R5jHdJunR65Yi4MyJ6IqKnu7u7QiHauXAHtVntKufE/0vAI5L+VNLmszj2XmBd0fratKzYHmB7RIxGxGPA90gSBhGxN/25G/gCcPVZvLdVid59SYLY7ARhVnPmTBDptKJXA48Cfyvpq2nTzlxtBjuATZI2SmoBtgHTb5v9FMnVA5KWkTQ57Za0RFJrUfm1QC9Wc3L7C6xfuoDO1tlumDOzalRW01FEFIB7SDqaV5HMCfENSa+bpc4YcAtwH5ADPh4RD0q6TdLkLav3AYcl9QKfB94UEYeBK4Gdkr6Vlt9efPeT1Y5cvp8rV/rqwawWzfm1Lj2Z/ypwGfB3wDURcVDSApJv9e+ZqW5E3AvcO63srUXLAfx2+ire5yvAVeX/GlaNTo6M8fjhE7xw6/R7E8ysFpRz3f9i4C8i4j+KCyPipKRfr0xYVg8e2t9PBB6kz6xGlZMg3gbkJ1cktQMrIuLxiPi3SgVmtW+yg9p3MJnVpnL6ID4BTBStj6dlZrPK5Qt0tTWxdkl71qGY2ZNQToJoSp+EBiBd9qwvNqdcvsCVqxYieYgNs1pUToLoK7rrCEk3AocqF5LVg4mJ4KH9HmLDrJaV0wfxGuDDkv6SZIjvJ/AMczaH7x85ycmRcXdQm9WwORNERDwKPEtSZ7o+UPGorOZ5iA2z2lfW462Sfg54CskAegBExG0VjMtqXC5foLFBXL7CVxBmtaqcwfr+mmQ8pteRNDH9N2B9heOyGpfLF7hkWQdtzY1Zh2JmT1I5ndQ/EhEvB45GxB8CzyYZM8lsRrl8v5uXzGpcOQliKP15UtJqYJRkPCazko6dHGHvsUEnCLMaV04fxKfTORr+DPgGydzR76tkUFbbcvl+ALasdoIwq2WzJoh0oqB/i4hjwCcl/TPQFhHHL0RwVptO3cHkDmqzWjZrE1NETAB3FK0POznYXHL5Ass6W1je1ZZ1KGZ2Dsrpg/g3SS+Wx0uwMvWmQ2yYWW0rJ0G8mmRwvmFJBUn9kgoVjstq1Oj4BI8cGHCCMKsD5TxJ7YZkK9vuvhOMjE94DCazOlDOjHLPKVU+fQIhM/AQG2b1pJwmpjcVvf4X8GmSSYTmJOl6SQ9L2iXp1hn2eYmkXkkPSvpIUfkrJD2Svl5RzvtZ9nL5Ai2NDVzS3ZF1KGZ2jsppYvr54nVJ64B3zVVPUiPJHVDPB/YAOyRtj4jeon02AW8Bro2Io5KWp+UXAX8A9JA8d3F/Wvdoub+YZaM3X2DTik6aG8v57mFm1ezJ/BXvAa4sY79rgF0RsTudZOhu4MZp+/wGcMfkiT8iDqblPw18NiKOpNs+C1z/JGK1CyznO5jM6kY5fRDvIfkWD0lC2UryRPVc1pDMHTFpD/DD0/a5PH2PLwONwNsi4jMz1F1TIrabgZsBLr744jJCsko62D/EoYERd1Cb1YlyhtrYWbQ8Bnw0Ir58Ht9/E/ATwFrgPyRdVW7liLgTuBOgp6cn5tjdKmxyiA1fQZjVh3ISxD3AUESMQ9K3IGlBRJyco95eYF3R+tq0rNge4OsRMQo8Jul7JAljL0nSKK77hTJitQxN3sHkKwiz+lDWk9RAe9F6O/C5MurtADZJ2iipBdgGbJ+2z6dIE4GkZSRNTruB+4DrJC2RtAS4Li2zKta7r8DqRW0sWtCcdShmdh6UcwXRVjzNaEQMSFowV6WIGJN0C8mJvRG4KyIelHQbsDMitnMqEfQC48CbIuIwgKS3kyQZgNsi4shZ/WZ2weXyBY/galZHykkQJyQ9IyK+ASDpmcBgOQePiHuBe6eVvbVoOYDfTl/T694F3FXO+1j2hkbH2X3oBNc/dWXWoZjZeVJOgvgt4BOS9pFMObqSZApSsymPHBhgfCLcQW1WR8p5UG6HpM3AFWnRw2mnstmU3nwyCrwThFn9mLOTWtJrgY6I+G5EfBfolPTfKx+a1ZJcvp8FLY2sv2jO7ikzqxHl3MX0G+mMcgCkTzb/RsUisprUmy+weWUXDQ2eNsSsXpSTIBqLJwtKx1hqqVxIVmsiwkNsmNWhcjqpPwN8TNJ70/VXp2VmAOw9Nkj/0JgThFmdKSdB/A+S8Y5+M13/LPC+ikVkNad3n+eAMKtHczYxRcRERPx1RPxiRPwi0Au8p/KhWa3I5fuRYPNKTz5oVk/KuYJA0tXATcBLgMeAf6hkUFZbcvkCG5Z20NFa1n8nM6sRM/5FS7qcJCncBBwCPgYoIn7yAsVmNSK3v8BTPMSGWd2ZrYnpIeCngBdExI9GxHtIxksymzIwPMb3D5/kypVOEGb1ZrYE8SIgD3xe0vskPZdkqA2zKQ/l3UFtVq9mTBAR8amI2AZsBj5PMibTckl/Jem6CxSfVbmpOSDcxGRWd8q5i+lERHwkIn6eZOKeb5Lc+mpGb76fRe3NrFrUlnUoZnaelfMk9ZSIOBoRd0bEcysVkNWW5AnqLooetjezOnFWCcKs2PhE8NB+D7FhVq+cIOxJe/zwCYZGJ5wgzOqUE4Q9aVMd1E4QZnXJCcKetFy+QFOD2LSiM+tQzKwCKpogJF0v6WFJuyTdWmL7KyX1SXogfb2qaNt4Ufn2SsZpT04u38+l3Z20NjVmHYqZVUDFBs9J5424A3g+sAfYIWl7RPRO2/VjEXFLiUMMRsTWSsVn5653X4FnXXJR1mGYWYVU8griGmBXROyOiBHgbuDGCr6fXUBHT4ywvzDkDmqzOlbJBLEGeKJofU9aNt2LJX1b0j2S1hWVt0naKelrkl5Y6g0k3Zzus7Ovr+/8RW5z8hPUZvUv607qTwMbIuJpJBMRfbBo2/qI6AFeCrxL0qXTK6cP7fVERE93d/eFidiAZA5q8BhMZvWskgliL1B8RbA2LZsSEYcjYjhdfT/wzKJte9Ofu4EvAFdXMFY7S735At1drSzrbM06FDOrkEomiB3AJkkbJbUA24DT7kaStKpo9QYgl5YvkdSaLi8DriWZyc6qRC7f76sHszpXsbuYImJM0i3AfUAjcFdEPCjpNmBnRGwHXi/pBmAMOAK8Mq1+JfBeSRMkSez2Enc/WUZGxibYdbCfH7/czXpm9ayic0RGxL3AvdPK3lq0/BbgLSXqfQW4qpKx2ZP3aN8Ao+PBlas8B7VZPcu6k9pqkIfYMJsfnCDsrPXuK9DS1MDGZR1Zh2JmFeQEYWctt7/AFSu6aGr0fx+zeua/cDsrEUEu3+/mJbN5wAnCzsrB/mGOnBhxB7XZPOAEYWfFT1CbzR9OEHZWevclCWKzE4RZ3XOCsLOSyxdYs7idRe3NWYdiZhXmBGFnJZcveARXs3nCCcLKNjgyzmOHTrj/wWyecIKwsj18oJ+JgC2+g8lsXnCCsLLlfAeT2bziBGFly+ULdLY2sW7JgqxDMbMLwAnCypbLF9i8souGBmUdipldAE4QVpaJifAkQWbzjBOElWXP0UEGhsecIMzmEScIK8upITZ8B5PZfOEEYWXJ5Qs0CDav9BWE2XzhBGFlyeULbFjWQXtLY9ahmNkFUtEEIel6SQ9L2iXp1hLbXympT9ID6etVRdteIemR9PWKSsZpc+vNF9z/YDbPNFXqwJIagTuA5wN7gB2StkdE77RdPxYRt0yrexHwB0APEMD9ad2jlYrXZlYYGmXP0UFuuubirEMxswuoklcQ1wC7ImJ3RIwAdwM3lln3p4HPRsSRNCl8Fri+QnHaHB7K9wN4FjmzeaaSCWIN8ETR+p60bLoXS/q2pHskrTubupJulrRT0s6+vr7zFbdN4yE2zOanrDupPw1siIinkVwlfPBsKkfEnRHRExE93d3dFQnQkgSxZEEzKxa2Zh2KmV1AlUwQe4F1Retr07IpEXE4IobT1fcDzyy3rl04kx3UkofYMJtPKpkgdgCbJG2U1AJsA7YX7yBpVdHqDUAuXb4PuE7SEklLgOvSMrvAxsYneHi/h9gwm48qdhdTRIxJuoXkxN4I3BURD0q6DdgZEduB10u6ARgDjgCvTOsekfR2kiQDcFtEHKlUrDazxw+fYHhswh3UZvNQxRIEQETcC9w7reytRctvAd4yQ927gLsqGZ/NrTe9g8lXEGbzT9ad1FblcvkCzY3isuWdWYdiZheYE4TNqndfgUu7O2lp8n8Vs/nGf/U2q1y+4P4Hs3nKCcJmdHhgmIP9w2xZ7QRhNh85QdiMcu6gNpvXnCBsRr3544AThNl85QRhM8rl+1mxsJWLOlqyDsXMMuAEYTNyB7XZ/OYEYSUNj42z6+CAm5fM5jEnCCtp18EBxibCCcJsHnOCsJJ693kOCLP5zgnCSsrl+2lrbmDjso6sQzGzjDhBWEm5fIErVi6kscFzQJjNV04QdoaIILe/wJZVXVmHYmYZcoKwM+wvDHHs5Kj7H8zmOScIO4M7qM0MnCCshFw+SRCbV7qJyWw+c4KwM+Ty/Vx80QK62pqzDsXMMlTRBCHpekkPS9ol6dZZ9nuxpJDUk65vkDQo6YH09deVjNNOl8sXuNId1GbzXsXmpJbUCNwBPB/YA+yQtD0ieqft1wW8Afj6tEM8GhFbKxWflXZyZIzHDp/ghq2rsw7FzDJWySuIa4BdEbE7IkaAu4EbS+z3duBPgKEKxmJlemh/PxHuoDazCl5BAGuAJ4rW9wA/XLyDpGcA6yLi/0l607T6GyV9EygAvx8RX6pEkEdPjPCC9/znaWXSDMtohvLi/VWyfHrBTHUiIvk59U/yIyImV4mAIIg4tV5cP4rKJvcrrju5Nlle/J6jYxMAHsXVzCqaIGYlqQF4J/DKEpvzwMURcVjSM4FPSXpKRBSmHeNm4GaAiy+++EnF0dQonnXJ0qn1U6dhmGFx6oR6Znnp/Werc/p7RJKE0nwhkuQxmT6kU2WT20/tq6ntp/bVVCJTurNOO/appFd87NWL21i7pB0zm98qmSD2AuuK1temZZO6gKcCX0hPeCuB7ZJuiIidwDBARNwv6VHgcmBn8RtExJ3AnQA9PT3Tz8ll6Wpr5h0vefqTqWpmVtcq2QexA9gkaaOkFmAbsH1yY0Qcj4hlEbEhIjYAXwNuiIidkrrTTm4kXQJsAnZXMFYzM5umYlcQETEm6RbgPqARuCsiHpR0G7AzIrbPUv05wG2SRoEJ4DURcaRSsZqZ2ZlU3DZey3p6emLnzp1z72hmZlMk3R8RPaW2+UlqMzMryQnCzMxKcoIwM7OSnCDMzKwkJwgzMyupbu5iktQHfP8cDrEMOHSewql1/ixO58/jdP48TqmHz2J9RHSX2lA3CeJcSdo5061e840/i9P58zidP49T6v2zcBOTmZmV5ARhZmYlOUGccmfWAVQRfxan8+dxOn8ep9T1Z+E+CDMzK8lXEGZmVpIThJmZlTTvE4Sk6yU9LGmXpFuzjidLktZJ+rykXkkPSnpD1jFlTVKjpG9K+uesY8mapMWS7pH0kKScpGdnHVOWJL0x/Tv5rqSPSmrLOqbzbV4niHRSojuAnwG2ADdJ2pJtVJkaA34nIrYAzwJeO88/D4A3ALmsg6gS7wY+ExGbgaczjz8XSWuA1wM9EfFUkjlvtmUb1fk3rxMEcA2wKyJ2R8QIcDdwY8YxZSYi8hHxjXS5n+QEsCbbqLIjaS3wc8D7s44la5IWkUzk9QGAiBiJiGOZBpW9JqBdUhOwANiXcTzn3XxPEGuAJ4rW9zCPT4jFJG0Arga+nnEoWXoX8GaSWQ3nu41AH/A3aZPb+yV1ZB1UViJiL/DnwA+APHA8Iv4126jOv/meIKwESZ3AJ4HfiohC1vFkQdILgIMRcX/WsVSJJuAZwF9FxNXACWDe9tlJWkLS2rARWA10SHpZtlGdf/M9QewF1hWtr03L5i1JzSTJ4cMR8Q9Zx5Oha4EbJD1O0vT4U5I+lG1ImdoD7ImIySvKe0gSxnz1POCxiOiLiFHgH4AfyTim826+J4gdwCZJGyW1kHQybc84psxIEkkbcy4i3pl1PFmKiLdExNqI2EDy/+LfI6LuviGWKyL2A09IuiItei7Qm2FIWfsB8CxJC9K/m+dSh532TVkHkKWIGJN0C3AfyV0Id0XEgxmHlaVrgV8BviPpgbTs9yLi3uxCsiryOuDD6Zep3cCvZhxPZiLi65LuAb5BcvffN6nDYTc81IaZmZU035uYzMxsBk4QZmZWkhOEmZmV5ARhZmYlOUGYmVlJThBmKUkD6c8Nkl56no/9e9PWv3I+j29WCU4QZmfaAJxVgkgHbJvNaQkiIuruqVurP04QZme6HfgxSQ+kY/43SvozSTskfVvSqwEk/YSkL0naTvpUsaRPSbo/nSfg5rTsdpJRPx+Q9OG0bPJqRemxvyvpO5J+qejYXyiaf+HD6RO7SLo9nbPj25L+/IJ/OjZvzOsnqc1mcCvwuxHxAoD0RH88In5IUivwZUmTI3c+A3hqRDyWrv9aRByR1A7skPTJiLhV0i0RsbXEe70I2Eoyv8KytM5/pNuuBp5CMoz0l4FrJeWAXwA2R0RIWnx+f3WzU3wFYTa364CXp8OPfB1YCmxKt/1XUXIAeL2kbwFfIxkIchOz+1HgoxExHhEHgC8CP1R07D0RMQE8QNL0dRwYAj4g6UXAyXP83cxm5ARhNjcBr4uIrelrY9HY/yemdpJ+gmSUz2dHxNNJxuc5l2koh4uWx4GmiBgjmejqHuAFwGfO4fhms3KCMDtTP9BVtH4f8JvpUOhIunyGyXIWAUcj4qSkzSTTtk4anaw/zZeAX0r7ObpJZm37r5kCS+fqWJQOoPhGkqYps4pwH4TZmb4NjKdNRX9LMhfzBuAbaUdxH/DCEvU+A7wm7Sd4mKSZadKdwLclfSMifrmo/B+BZwPfAgJ4c0TsTxNMKV3AP0lqI7my+e0n9RualcGjuZqZWUluYjIzs5KcIMzMrCQnCDMzK8kJwszMSnKCMDOzkpwgzMysJCcIMzMr6f8DtnEhqj6H3isAAAAASUVORK5CYII=\n",
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEWCAYAAAB8LwAVAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAAot0lEQVR4nO3deXxcZ33v8c9Xuyx5X+LEexI7djYSUMOSsrQ0kFJIKFDqpBRyWxrgEkqhhZv09lIaXuXSjcItebUNNMAtS6CBUtObktKyFgq1Q0xCrHHiOIttNIls2dZYsvbf/eMcyWN5JI1tjWak+b5fr3nNOc9Z5qeJc37znOc5z6OIwMzMbLyacgdgZmaVyQnCzMwKcoIwM7OCnCDMzKwgJwgzMyvICcLMzApygjAzs4KcIGzWk3Qs7zUi6Xje+q+dwfm+JenNpYjVbDapK3cAZmcrIlpHlyU9Abw5Iv6tfBGVlqS6iBgqdxw297kGYXOWpBpJt0p6TNIhSV+UtCTd1iTpM2n5EUnbJZ0j6Y+BFwIfS2sgH5vg3P8gKSvpqKTvSLokb1uzpL+Q9GS6/T8kNafbflbS99PP3CfpprT8pFqLpJsk/Ufeekh6u6RHgUfTso+m5+iWdL+kF+btXyvp99O/PZduXyPpDkl/Me5v2SbpXWf/jdtc4wRhc9k7gFcDLwbOAw4Dd6Tb3gQsBNYAS4G3Ascj4n8C3wVuiYjWiLhlgnP/C7ARWAH8CPhs3rY/B54DvABYArwXGJG0Lj3ur4DlwBXAztP4e14NPBe4OF3fnp5jCfA54B8kNaXb3g3cALwCWAD8BtALfBq4QVINgKRlwC+kx5udxLeYbC57K8mFfj+ApPcDT0n6dWCQJDFcGBEPAvefzokj4q7R5fS8hyUtBHIkF+PnRcSBdJfvp/vdCPxbRHw+LT+Uvor1vyOiKy+Gz+Rt+wtJfwBcBPwYeDPw3ojYnW7/8ehnSjoKvBT4OrAV+FZEPH0acViVcA3C5rJ1wD+mt3OOAO3AMHAO8PfAfcDdkn4q6U8l1Rdz0vT2zYfS2zfdwBPppmXpqwl4rMChayYoL9a+cXH8nqT29DbWEZIa0bIiPuvTwBvS5TeQfBdmp3CCsLlsH/CLEbEo79UUEQciYjAi/igiLia5FfRK4I3pcVMNcXwjcD3JrZmFwPq0XMBBoA+4YIJ4CpUD9ADz8tZXFthnLK60veG9wOuBxRGxCDiaxjDVZ30GuF7Ss4AtwFcm2M+qnBOEzWV/A/xxeu8fScslXZ8u/5ykyyTVAt0kt5xG0uOeBs6f5LzzgX6S20PzgA+OboiIEeAu4MOSzktrG8+X1EjSTvELkl4vqU7SUklXpIfuBF4jaZ6kC4HfnOJvmw8MAZ1AnaT3kbQ1jPoE8AFJG5W4XNLSNMb9JO0Xfw98KSKOT/FZVqWcIGwu+yiwDfhXSTngBySNvJD8Qr+HJDm0A9/mxK2WjwKvk3RY0v8pcN7/CzwJHAB2pefN93vAQyQX4S7gT4CaiHiKpNH4d9PyncCz0mP+EhggSU6f5uRG70LuA74GPJLG0sfJt6A+DHwR+Nf0b/w7oDlv+6eBy/DtJZuEPGGQWfWR9CKSW03rwhcBm4BrEGZVJm2MfyfwCScHm4wThFkVkbQFOAKcC3ykrMFYxfMtJjMzK8g1CDMzK2jOPEm9bNmyWL9+fbnDMDObVe6///6DEbG80LY5kyDWr1/Pjh07yh2GmdmsIunJibb5FpOZmRXkBGFmZgU5QZiZWUFOEGZmVpAThJmZFeQEYWZmBZU0QUi6VtJuSXsk3Vpg+1pJ35T0gKQHJb0ib9tt6XG7Jb28lHGamdmpSvYcRDrO/h3ANcB+YLukbRGxK2+3PwC+GBF/Leli4F5gfbq8FbiEZC7hf5O0KSKGSxWvmRUWETzd3c+D+4/w6DPHaKyrYdG8BhbPqx97XzyvgYXN9dTUaOoT2qxRygflrgL2RMReAEl3k8zClZ8gghOTnCwEfpouXw/cHRH9wOOS9qTn+88SxmtmwDPdfTy4/ygPHTjx6sz1T3mcBAubk2SxaN7J7yeSSd5yS7Ktqb52Bv4qOxOlTBCrOHkCk/2cmKxl1PtJJnN5B9BCMoXj6LH5k7DsT8tOIulm4GaAtWvXTkvQZtWkM9fPTw4cTRPCER46cJSnu5NkUCO4YHkrL9y4jMtXLeSy1QvZvHIBQ8PB4d4BjhwfTN57BzjcM5i8946WDfJ0dx+7szkO9w7QOzBx5b+pviZNJg0saq5nccvJNZNCtZUFzfXUVkFtpW9wmEM9A3QdG+BQTz+Hjg3Q1TPAwZ5+usaWB1i/dB4f3XrltH9+uYfauAH4VET8haTnA38v6dJiD46IO4E7Adra2jwsrdkkDh3rT2oEebWDjqN9QPLr//xlLbzggmVcliaDi89dQEtj4UvEwnn1p/XZ/UPDHEmTx6nJJFk+kiaW3dkcR3oHOXJ8kOGRwv9bS9DaUMf8pjrmN9WzoDl5T9ZPLC8Y9z5W3lxPS0Mt0swmmb7BYbp6BjiUXvBPLA/QlSaAQz3ptmMD9EyQWOtrxdKWRpa0NLC0tYFzFzYX3O9slTJBHADW5K2vTsvy/SZwLUBE/KekJmBZkcea2QQO9wycuEWUJoQDR05MPX3+shau2rAkSQarFnLJqoW0TpAMpkNjXS3nLKjlnAVNRR8zMhLk+odOTSY9SfLI9Q3SfXyIXN8gub4hnsn18VjnELm+IbqPDzI0QXIZVSNobTw5mUyVaOY31bMgr7y2RnT1JL/kD/UMcOhY/ynLB9Nf+l09AxzrHyoYS32tkot9SyNLWxtYt3Te2PLSloaxRLC0pZElrQ3Mb6ybkeRWygSxHdgoaQPJxX0rcOO4fZ4CXgp8Kp3IpIlkEvZtwOckfZikkXoj8F8ljNVszOGeAXbuP8IDTx1h574j9PYPsaA5uTAk78mFJHk/dX1+Ux31tTPXg/xo7yA/+enJt4n2dZ1IBuuXzuPZ6xbzphes47JVi7hk1QIWNJ1eDaAcamrEwuZ6FjbXs27p6R0bEfQNjiRJpO9EEulO33Nj70nZaKL56ZE+cv25sW0T1WCmUlcjlrY2sKSlkaUtyQV/SUsDy1qTX/3JcrJ9SUsDC5pm5oJ/ukqWICJiSNItJJOr1wJ3RcTDkm4HdkTENpLJ2z8u6V0kDdY3pVMgPizpiyQN2kPA292DyUphcHiE3dkcDzx1mAeeOsID+47w+MEeIPmFuemc+SxpaeCZXB97nhm9mAwy1XVjXkNtgURy9gmmu2+Qn4y7TfTkod6x7WuXzOPyVYv4teeu4/K0ZrCwufKTwXSTRHNDLc0NtaxYMPX+hUQEvQPDYwmlUKIZHgkWz2sY+6W/tLWyL/ina87MKNfW1hYe7tumkj3alySDfUd44KnDPHTgKH2DIwAsa23kyrWLkteaxVy+emHBe/CjF47RX56jSWNsPX+5b7DAflP/Mh2fYFoa63iqq3cseQGsXtw81l5w+apFXLpqAYvmNUzvF2ZznqT7I6Kt0LZyN1KblUzf4DAPHTjKA08dZue+5JbRaKNsQ20Nl6xawI1XrRtLCqsWNRf1q08SLY11tDTWce7C04/rTBLM4d4BNp3Tyuues5pL03aDJS1OBlZaThB2kogg291Ha2MdrTPUEDYdIoInDvWyc196q+ipI7R3dI81VK5Z0szPrF+SJoPFbDl3Po115el/f7YJxmymOEHYSe7evo/bvvwQkPzKXtxSnzakpe/z0vfWBpbMaxhrcFvSkvRRr5uhxtnuvkF+nNYKRmsIh3sHAWhpqOVZaxbxlhefz5VrFnPF2kUsa22ckbjM5hInCDvJ/U8eZvG8et72kgvo6hmkK+2r3dUzwEOHj9DVM0B3X+GuepA8STvaLW9xS9JwN/q+ZNzykpYG5jVM/U9weCR45OncWDJ4YN8RHus8RkTSH37jilZedvFKrkhvFW1cMb8qHqIyKzUnCDtJJtvNpasWcvOLLphwn8HhEQ73DNDVmzzh2dU7MPbAz+HepA/44Z4B9nX1Jr/sewYm7JPeVF+T1ERGu/zNO1Fj6RkYZudTR/jx/iNjT+IuaWngyjWLuP5Z53Hl2sVcvmbhrOiyaTYbOUHYmKHhER55+hg3vWD9pPvV19awYkETK4p86Cki6O4bGquJdKUJZPTp0bGaSu8gjx88NvYEaV2NuOS8BfzKc1Zz5drFXLl2EWuXzJs17SJms50ThI154lAPA0MjbF45f1rPK5144GnDspaijukbHEaibA3JZuYEYXnaO3IAbF55hk8WTSOP8GlWfp5RzsZkst3U1YgLVhT3K9/M5jYnCBuT6chxwfJW39YxM8AJwvJksjk2nzu97Q9mNns5QRgAR48PcuDI8YpofzCzyuAEYQDszqYN1K5BmFnKCcIA2J3tBmCLaxBmlnKCMADaszkWzavnnAUes8jMEk4QBkCmo5vNK+f7KWUzG+MEYYyMBLuzOTdQm9lJnCCM/YeP0zMwPO1DbJjZ7OYEYbSnDdSbz3UNwsxOKGmCkHStpN2S9ki6tcD2v5S0M309IulI3rbhvG3bShlntct05JBg0zmt5Q7FzCpIyQbrk1QL3AFcA+wHtkvaFhG7RveJiHfl7f8O4Mq8UxyPiCtKFZ+dkMl2s35pS1GT95hZ9ShlDeIqYE9E7I2IAeBu4PpJ9r8B+HwJ47EJZLI5tz+Y2SlKmSBWAfvy1venZaeQtA7YAHwjr7hJ0g5JP5D06gmOuzndZ0dnZ+c0hV1degeGeOJQj3swmdkpKqWReitwT0QM55Wti4g24EbgI5JOmQMzIu6MiLaIaFu+fPlMxTqnPPJ0Mrezh9gws/FKmSAOAGvy1lenZYVsZdztpYg4kL7vBb7Fye0TNk0yHR5iw8wKK2WC2A5slLRBUgNJEjilN5KkzcBi4D/zyhZLakyXlwFXA7vGH2tnL5PN0dJQy+rFzeUOxcwqTMm6rUTEkKRbgPuAWuCuiHhY0u3AjogYTRZbgbsjIvIO3wL8raQRkiT2ofzeTzZ92ju6uWjlfGpqPMSGmZ2spP0aI+Je4N5xZe8bt/7+Asd9H7islLEZRASZbI5fuvzccodiZhWoUhqprQyy3X0cPT7IFndxNbMCnCCqWKZjdJIgN1Cb2amcIKrY6BhMF7kGYWYFOEFUsUxHjlWLmlnQVF/uUMysAjlBVLFMtpstfkDOzCbgBFGl+oeGeazTQ2yY2cScIKrUnmeOMTwSHmLDzCbkBFGlxnowuQZhZhNwgqhSmWw3jXU1rF86r9yhmFmFcoKoUplsjk3nzKeu1v8EzKwwXx2qVHuHJwkys8k5QVShzlw/B4/1+wlqM5uUE0QV2p1NGqg9BpOZTcYJogplPMSGmRXBCaIKtXfkWDG/kaWtjeUOxcwqmBNEFcpku93+YGZTcoKoMkPDIzz69DG3P5jZlJwgqszjB3sYGB7xEBtmNiUniCrTnvUQG2ZWnJImCEnXStotaY+kWwts/0tJO9PXI5KO5G17k6RH09ebShlnNcl0dFNXIy5Y3lruUMyswtWV6sSSaoE7gGuA/cB2SdsiYtfoPhHxrrz93wFcmS4vAf4QaAMCuD899nCp4q0WmWyOC1e00lDnyqOZTa6UV4mrgD0RsTciBoC7gesn2f8G4PPp8suBr0dEV5oUvg5cW8JYq0amo9tDbJhZUUqZIFYB+/LW96dlp5C0DtgAfON0jpV0s6QdknZ0dnZOS9Bz2dHeQX56tM9dXM2sKJVyn2ErcE9EDJ/OQRFxZ0S0RUTb8uXLSxTa3DH6BLVrEGZWjFImiAPAmrz11WlZIVs5cXvpdI+1ImVGx2ByDcLMilDKBLEd2Chpg6QGkiSwbfxOkjYDi4H/zCu+D3iZpMWSFgMvS8vsLGSy3SyeV8+K+R5iw8ymVrJeTBExJOkWkgt7LXBXRDws6XZgR0SMJoutwN0REXnHdkn6AEmSAbg9IrpKFWu1yGRzbF65AEnlDsXMZoGSJQiAiLgXuHdc2fvGrb9/gmPvAu4qWXBVZmQk2J3N8as/s2bqnc3MqJxGaiuxfYd76R0YdgO1mRXNCaJKtHd4iA0zOz1OEFUik+1Ggk3nuAZhZsVxgqgSmY4cG5a20NxQW+5QzGyWcIKoEskkQa49mFnxJuzFJOk1RRzfl/ZUsgrW0z/Ek129vObZq8sdipnNIpN1c/048E/AZJ3mX8S4bqxWeR55OkeEh9gws9MzWYL4l4j4jckOlvSZaY7HSsBDbJjZmZiwDSIi3jDVwcXsY+WX6eimtbGOVYuayx2Kmc0iRTdSS7pQ0mckfUnS80sZlE2v9myOi1bOp6bGQ2yYWfEma6Ruioi+vKIPAO9Nl78KXFHCuGyaRASZjm5e9azzyh2Kmc0yk9UgvirpjXnrg8B6YB1wWvM2WPl0HO2ju2/IkwSZ2WmbLEFcCyyQ9DVJLwJ+j2Qq0F8Gfm0mgrOzNzpJ0Bb3YDKz0zThLaZ0drePSfp74H8BbwP+ICIem6ng7OyNjsG0yQnCzE7TZG0QzwXeAwwAHwSOA38s6QDwgYg4MiMR2lnJZHOsXtzMgqb6codiZrPMZM9B/C3wCqAV+GREXA1slfRi4Askt5uswmU6uj2Cq5mdkcnaIIY40Sg9MFoYEd+OCCeHWaBvcJi9B3vY4jGYzOwMTFaDuBF4C0lyeOMk+1mF2vPMMYZHwjUIMzsjkzVSPwL87gzGYtNsdIgNj+JqZmdiwltMkv55qoOn2kfStZJ2S9oj6dYJ9nm9pF2SHpb0ubzyYUk709e2qWKxU2U6ummsq2H90pZyh2Jms9Bkt5h+dooLs4CLJ9wo1QJ3ANcA+4HtkrZFxK68fTYCtwFXR8RhSSvyTnE8Iq4o4m+wCWTSITZqPcSGmZ2ByRLE9UUcPzDJtquAPRGxF0DS3ek5d+Xt81vAHRFxGCAininiM61ImWw3P795xdQ7mpkVMFkbxLfP8tyrgH156/uB547bZxOApO8BtcD7I+Jr6bYmSTtIelN9KCK+Mv4DJN0M3Aywdu3aswx3bunM9XPw2IAbqM3sjE1Wg5ipz98IvARYDXxH0mXpQ3jrIuKApPOBb0h6aPxT3BFxJ3AnQFtbW8xo5BVudIgNN1Cb2Zkq5ZzUB4A1eeur07J8+4FtETEYEY8Dj5AkDCLiQPq+F/gWcGUJY51zMukQG65BmNmZmjJBSHqVpDNJJNuBjZI2SGoAtgLjG72/QlJ7QNIykltOeyUtltSYV341J7dd2BTas92cs6CRJS0N5Q7FzGapYi78vwo8KulPJW0u9sQRMQTcAtwHtANfjIiHJd0u6bp0t/uAQ5J2Ad8E3hMRh4AtwA5JP07LP5Tf+8mmlunIufZgZmdlyjaIiHiDpAXADcCnJAXwSeDzEZGb4th7gXvHlb0vbzmAd6ev/H2+D1xW7B9hJxscHmHPM8d44aZl5Q7FzGaxom4dRUQ3cA9wN3AuyZwQP5L0jhLGZmfo8YM9DAyPsMU1CDM7C8W0QVwn6R9JGorrgasi4heBZ+GhOCpSe4d7MJnZ2Summ+trgb+MiO/kF0ZEr6TfLE1YdjYy2Rz1teL8Za3lDsXMZrFiEsT7gY7RFUnNwDkR8URE/HupArMzl+no5oLlrTTUlbIXs5nNdcVcQf4BGMlbH07LrEJlsjm2nOv2BzM7O8UkiLqIyJ8waABw5/oKdaR3gI6jfWz2HNRmdpaKSRCdec8tIOl64GDpQrKzcWIOCNcgzOzsFNMG8Vbgs5I+RjLE9z48w1zFyqQ9mLa4BmFmZ6mYB+UeA54nqTVdP1byqOyMZbI5lrQ0sHx+Y7lDMbNZrqjRXCX9EnAJyRDcAETE7SWMy85QezbH5pXzGf3vZGZ2pop5UO5vSMZjegfJLaZfAdaVOC47AyMjwSNZj8FkZtOjmEbqF0TEG4HDEfFHwPNJJ/qxyvJUVy/HB4f9BLWZTYtiEkRf+t4r6TxgkGQ8JqswY5MEuYHazKZBMW0QX5W0CPgz4EdAAB8vZVB2Zto7ctQINq5wgjCzszdpgkgnCvr3dArQL0n6Z6ApIo7ORHB2ejLZbtYva6G5obbcoZjZHDDpLaaIGAHuyFvvd3KoXJlszkN8m9m0KaYN4t8lvVbuN1nRevqHePJQr9sfzGzaFJMg3kIyOF+/pG5JOUndJY7LTtPupz3EhplNr2KepPZP0lkg05EmCNcgzGyaFPOg3IsKvYo5uaRrJe2WtEfSrRPs83pJuyQ9LOlzeeVvkvRo+npT8X9Sdcpku2ltrGP14uZyh2Jmc0Qx3Vzfk7fcBFwF3A/8/GQHSaolaeC+BtgPbJe0LSJ25e2zEbgNuDoiDktakZYvAf4QaCPpVnt/euzhov+yKpPp8BAbZja9pqxBRMSr8l7XAJcCxVyorwL2RMTedA6Ju4Hrx+3zW8Adoxf+iHgmLX858PWI6Eq3fR24trg/qfpEBO3Zbj9BbWbT6kzmpNwPbCliv1UkQ4PnH7dq3D6bgE2SvifpB5KuPY1jkXSzpB2SdnR2dhb9B8w1Pz3aR65vyGMwmdm0mvIWk6S/IrnNA0lCuYLkierp+vyNwEuA1cB3JF1W7MERcSdwJ0BbW1tMsfucNTYHhGsQZjaNimmD2JG3PAR8PiK+V8RxB4A1eeur07J8+4EfRsQg8LikR0gSxgGSpJF/7LeK+MyqNDqL3KZznCDMbPoUkyDuAfoiYhiSxmdJ8yKid4rjtgMbJW0gueBvBW4ct89XgBuAT0paRnLLaS/wGPBBSYvT/V5G0phtBbR3dLNmSTPzm+rLHYqZzSFFPUkN5PedbAb+baqDImIIuAW4D2gHvhgRD0u6PW+O6/uAQ5J2Ad8E3hMRhyKiC/gASZLZDtyellkBGc8BYWYlUEwNoil/mtGIOCZpXjEnj4h7gXvHlb0vbzmAd6ev8cfeBdxVzOdUs77BYfZ2HuMVl64sdyhmNscUU4PokfTs0RVJzwGOly4kOx17njnGSHiIDTObfsXUIH4H+AdJPyWZcnQlyRSkVgHaOzxJkJmVRjFjMW2XtBm4KC3anfY6sgqQyeZoqq9h3dKWcodiZnNMMWMxvR1oiYifRMRPgFZJ/730oVkxMtluLjpnPrU1HmLDzKZXMW0Qv5XOKAdAOvTFb5UsIitaRNDe4R5MZlYaxSSI2vzJgtJB+BpKF5IVq/NYP109Ax6DycxKophG6q8BX5D0t+n6W9IyK7MTc0C4BmFm06+YBPE/gJuBt6XrXwc+XrKIrGiZrHswmVnpFDPc90hE/E1EvC4iXgfsAv6q9KHZVDIdOVYuaGJxi+/4mdn0K6YGgaQrScZMej3wOPDlUgZlxWnP5tz+YGYlM2GCkLSJJCncABwEvgAoIn5uhmKzSQwOj7DnmRwv3rS83KGY2Rw1WQ0iA3wXeGVE7AGQ9K4ZicqmtLezh8Hh8BwQZlYyk7VBvAboAL4p6eOSXkoy1IZVgBMN1O7BZGalMWGCiIivRMRWYDPJUNy/A6yQ9NeSXjZD8dkE2jty1NeK85d7iA0zK41iejH1RMTnIuJVJDO7PUDS9dXKKJPt5sIV86mvPZNpxc3MpnZaV5eIOBwRd0bES0sVkBUn05Fji59/MLMS8s/PWehwzwDZ7j53cTWzknKCmIUyWQ+xYWal5wQxC431YHINwsxKqKQJQtK1knZL2iPp1gLbb5LUKWln+npz3rbhvPJtpYxztsl05Fja0sDy1sZyh2Jmc1hRQ22ciXRY8DuAa4D9wHZJ2yJi17hdvxARtxQ4xfGIuKJU8c1mmWw3m8+dT94o7GZm066UNYirgD0RsTciBoC7getL+HlVYXgk2P20Jwkys9IrZYJYBezLW9+flo33WkkPSrpH0pq88iZJOyT9QNKrC32ApJvTfXZ0dnZOX+QV7MlDPfQNjniIbzMruXI3Un8VWB8Rl5PMM/HpvG3rIqINuBH4iKQLxh+cPpPRFhFty5dXx6B1u9MeTFvOdQ3CzEqrlAniAJBfI1idlo2JiEMR0Z+ufgJ4Tt62A+n7XuBbwJUljHXWaM/mqBFcuKK13KGY2RxXygSxHdgoaYOkBmArcFJvJEnn5q1eB7Sn5YslNabLy4CrSSYqqnqZjm42LGuhqb623KGY2RxXsl5METEk6RbgPqAWuCsiHpZ0O7AjIrYBvy3pOmAI6AJuSg/fAvytpBGSJPahAr2fqlImm+Oy1QvLHYaZVYGSJQiAiLgXuHdc2fvylm8Dbitw3PeBy0oZ22x0rH+Ip7p6eX3b6nKHYmZVoNyN1HYadnuIDTObQU4Qs4iH2DCzmeQEMYtkOnLMb6xj1aLmcodiZlXACWIW8RAbZjaTnCBmiYgg0+EhNsxs5jhBzBIHjhwn1z/k9gczmzFOELNEpsM9mMxsZjlBzBKjPZgu8iB9ZjZDnCBmifZsjrVL5tHaWNJnG83MxjhBzBKZjm4P8W1mM8oJYhboGxzm8YM9bPYQ32Y2g5wgZoFHnz7GSMAW1yDMbAY5QcwC7WNDbLgGYWYzxwliFsh05Giur2XtknnlDsXMqogTxCyQyXazaeV8ams8xIaZzRwniAoXEbR3dLv9wcxmnBNEhevM9XO4d9BdXM1sxjlBVLj20UmC3EBtZjPMCaLCZTrSHkyuQZjZDCtpgpB0raTdkvZIurXA9pskdUramb7enLftTZIeTV9vKmWclSyTzXHuwiYWzWsodyhmVmVKNrCPpFrgDuAaYD+wXdK2iNg1btcvRMQt445dAvwh0AYEcH967OFSxVup2j3EhpmVSSlrEFcBeyJib0QMAHcD1xd57MuBr0dEV5oUvg5cW6I4K9bA0AiPdR5z+4OZlUUpE8QqYF/e+v60bLzXSnpQ0j2S1pzOsZJulrRD0o7Ozs7pirti7D14jMHhcA3CzMqi3I3UXwXWR8TlJLWET5/OwRFxZ0S0RUTb8uXLSxJgOY1OErTFNQgzK4NSJogDwJq89dVp2ZiIOBQR/enqJ4DnFHtsNWjPdtNQW8OGZS3lDsXMqlApE8R2YKOkDZIagK3AtvwdJJ2bt3od0J4u3we8TNJiSYuBl6VlVSXTkePCFa3U15a7omdm1ahkvZgiYkjSLSQX9lrgroh4WNLtwI6I2Ab8tqTrgCGgC7gpPbZL0gdIkgzA7RHRVapYK1Um283VFy4rdxhmVqVKOn9lRNwL3Duu7H15y7cBt01w7F3AXaWMr5J19QzwdHc/W1a6/cHMysP3LipUZmwOCPdgMrPycIKoUKM9mDa7BmFmZeIEUaEy2W6WtTawfH5juUMxsyrlBFGhMtmcaw9mVlZOEBVoeCTYnc35CWozKysniAr0xKEe+odGPAaTmZWVE0QFOtFA7RqEmZWPE0QFymS7qa0RF65oLXcoZlbFnCAqUCab4/xlLTTV15Y7FDOrYk4QFSiT7eYi314yszJzgqgwub5B9nUd9xDfZlZ2ThAV5pGn3UBtZpXBCaLCtI/2YHINwszKzAmiwmSy3cxvquO8hU3lDsXMqpwTRIXJdOTYsnIBksodiplVOSeIChIRyRhMHuLbzCqAE0QF2X/4OMf6hzxIn5lVBCeICpLJjjZQuwZhZuXnBFFBMh3JLHIXneMEYWblV9IEIelaSbsl7ZF06yT7vVZSSGpL19dLOi5pZ/r6m1LGWSky2Rzrls6jpbGkU4WbmRWlZFciSbXAHcA1wH5gu6RtEbFr3H7zgXcCPxx3isci4opSxVeJ2rPdfkDOzCpGKX+qXgXsiYi9AJLuBq4Hdo3b7wPAnwDvKWEsExoeCQ4e60eCGil9gdL30bIT2xlbn86uqMcHhnniYA+vuvy8aTunmdnZKGWCWAXsy1vfDzw3fwdJzwbWRMT/kzQ+QWyQ9ADQDfxBRHy3FEEe6R3guR/89zM6Nj9pTJ1Q8ren+9ec2H9oZISRgC1uoDazClG2m92SaoAPAzcV2NwBrI2IQ5KeA3xF0iUR0T3uHDcDNwOsXbv2jOJoaazjg798GSMRRAQjASPpe7J+oiwCRkby14vYP68sIhgZOXX/4XS/521YytUXLjujv8PMbLqVMkEcANbkra9Oy0bNBy4FvpXeqlkJbJN0XUTsAPoBIuJ+SY8Bm4Ad+R8QEXcCdwK0tbXFmQTZVF/Ljc89s+RiZjaXlbIX03Zgo6QNkhqArcC20Y0RcTQilkXE+ohYD/wAuC4idkhanjZyI+l8YCOwt4SxmpnZOCWrQUTEkKRbgPuAWuCuiHhY0u3AjojYNsnhLwJulzQIjABvjYiuUsVqZmanUsQZ3ZmpOG1tbbFjx46pdzQzszGS7o+ItkLb/CS1mZkV5ARhZmYFOUGYmVlBThBmZlaQE4SZmRU0Z3oxSeoEnjyLUywDDk5TOLOdv4uT+fs4mb+PE+bCd7EuIpYX2jBnEsTZkrRjoq5e1cbfxcn8fZzM38cJc/278C0mMzMryAnCzMwKcoI44c5yB1BB/F2czN/Hyfx9nDCnvwu3QZiZWUGuQZiZWUFOEGZmVlDVJwhJ10raLWmPpFvLHU85SVoj6ZuSdkl6WNI7yx1TuUmqlfSApH8udyzlJmmRpHskZSS1S3p+uWMqJ0nvSv8/+Ymkz0tqKndM062qE0Q6KdEdwC8CFwM3SLq4vFGV1RDwuxFxMfA84O1V/n0AvBNoL3cQFeKjwNciYjPwLKr4e5G0CvhtoC0iLiWZ82ZreaOaflWdIICrgD0RsTciBoC7gevLHFPZRERHRPwoXc6RXABWlTeq8pG0Gvgl4BPljqXcJC0kmcjr7wAiYiAijpQ1qPKrA5ol1QHzgJ+WOZ5pV+0JYhWwL299P1V8QcwnaT1wJfDDModSTh8B3ksyq2G12wB0Ap9Mb7l9QlJLuYMql4g4APw58BTQARyNiH8tb1TTr9oThBUgqRX4EvA7EdFd7njKQdIrgWci4v5yx1Ih6oBnA38dEVcCPUDVttlJWkxyt2EDcB7QIukN5Y1q+lV7gjgArMlbX52WVS1J9STJ4bMR8eVyx1NGVwPXSXqC5Nbjz0v6THlDKqv9wP6IGK1R3kOSMKrVLwCPR0RnRAwCXwZeUOaYpl21J4jtwEZJGyQ1kDQybStzTGUjSST3mNsj4sPljqecIuK2iFgdEetJ/l18IyLm3C/EYkVEFtgn6aK06KXArjKGVG5PAc+TNC/9/+alzMFG+7pyB1BOETEk6RbgPpJeCHdFxMNlDqucrgZ+HXhI0s607Pcj4t7yhWQV5B3AZ9MfU3uB/1bmeMomIn4o6R7gRyS9/x5gDg674aE2zMysoGq/xWRmZhNwgjAzs4KcIMzMrCAnCDMzK8gJwszMCnKCMEtJOpa+r5d04zSf+/fHrX9/Os9vVgpOEGanWg+cVoJIB2ybzEkJIiLm3FO3Nvc4QZid6kPACyXtTMf8r5X0Z5K2S3pQ0lsAJL1E0nclbSN9qljSVyTdn84TcHNa9iGSUT93SvpsWjZaW1F67p9IekjSr+ad+1t58y98Nn1iF0kfSufseFDSn8/4t2NVo6qfpDabwK3A70XEKwHSC/3RiPgZSY3A9ySNjtz5bODSiHg8Xf+NiOiS1Axsl/SliLhV0i0RcUWBz3oNcAXJ/ArL0mO+k267EriEZBjp7wFXS2oHfhnYHBEhadH0/ulmJ7gGYTa1lwFvTIcf+SGwFNiYbvuvvOQA8NuSfgz8gGQgyI1M7meBz0fEcEQ8DXwb+Jm8c++PiBFgJ8mtr6NAH/B3kl4D9J7l32Y2IScIs6kJeEdEXJG+NuSN/d8ztpP0EpJRPp8fEc8iGZ/nbKah7M9bHgbqImKIZKKre4BXAl87i/ObTcoJwuxUOWB+3vp9wNvSodCRtGmCyXIWAocjolfSZpJpW0cNjh4/zneBX03bOZaTzNr2XxMFls7VsTAdQPFdJLemzErCbRBmp3oQGE5vFX2KZC7m9cCP0obiTuDVBY77GvDWtJ1gN8ltplF3Ag9K+lFE/Fpe+T8Czwd+DATw3ojIpgmmkPnAP0lqIqnZvPuM/kKzIng0VzMzK8i3mMzMrCAnCDMzK8gJwszMCnKCMDOzgpwgzMysICcIMzMryAnCzMwK+v9dS7Ovcb84WwAAAABJRU5ErkJggg==\n",
       "text/plain": [
        "<Figure size 432x288 with 1 Axes>"
       ]
@@ -509,7 +507,7 @@
     {
      "data": {
       "text/plain": [
-       "0.8053976582616722"
+       "0.798340863819657"
       ]
      },
      "execution_count": 15,
@@ -782,7 +780,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 25,
    "metadata": {
     "scrolled": true
    },
@@ -799,7 +797,9 @@
      "output_type": "stream",
      "text": [
       "<ipython-input-22-78c27bb59095>:15: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.\n",
-      "  x = (x + torch.tensor([1.0])) / 2.0\n"
+      "  x = (x + torch.tensor([1.0])) / 2.0\n",
+      "/workspace/brevitas/src/brevitas/quant_tensor/__init__.py:74: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.\n",
+      "  training = torch.tensor(training, dtype=torch.bool)\n"
      ]
     }
    ],
@@ -843,7 +843,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [
     {
@@ -867,10 +867,10 @@
        "        "
       ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x7f49738bffa0>"
+       "<IPython.lib.display.IFrame at 0x7fb36398c3a0>"
       ]
      },
-     "execution_count": 28,
+     "execution_count": 26,
      "metadata": {},
      "output_type": "execute_result"
     }
diff --git a/notebooks/end2end_example/cybersecurity/2-import-into-finn-and-verify.ipynb b/notebooks/end2end_example/cybersecurity/2-import-into-finn-and-verify.ipynb
index 6ac4e52072d71f527e4ec5d923a76851b77dc247..a0fef1ab6112e734abfc5d5a22a526b41f5503a5 100644
--- a/notebooks/end2end_example/cybersecurity/2-import-into-finn-and-verify.ipynb
+++ b/notebooks/end2end_example/cybersecurity/2-import-into-finn-and-verify.ipynb
@@ -169,7 +169,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
@@ -177,13 +177,13 @@
      "output_type": "stream",
      "text": [
       "Input tensor name: 0\n",
-      "Output tensor name: 78\n",
+      "Output tensor name: 73\n",
       "Input tensor shape: [1, 600]\n",
       "Output tensor shape: [1, 1]\n",
-      "Input tensor datatype: DataType.BIPOLAR\n",
-      "Output tensor datatype: DataType.FLOAT32\n",
+      "Input tensor datatype: BIPOLAR\n",
+      "Output tensor datatype: FLOAT32\n",
       "List of node operator types in the graph: \n",
-      "['Add', 'Div', 'MatMul', 'Add', 'Mul', 'Unsqueeze', 'BatchNormalization', 'Squeeze', 'MultiThreshold', 'Mul', 'MatMul', 'Add', 'Mul', 'Unsqueeze', 'BatchNormalization', 'Squeeze', 'MultiThreshold', 'Mul', 'MatMul', 'Add', 'Mul', 'Unsqueeze', 'BatchNormalization', 'Squeeze', 'MultiThreshold', 'Mul', 'MatMul', 'Add', 'Mul', 'MultiThreshold']\n"
+      "['Mul', 'Add', 'Div', 'MatMul', 'Mul', 'Add', 'BatchNormalization', 'MultiThreshold', 'Mul', 'MatMul', 'Mul', 'Add', 'BatchNormalization', 'MultiThreshold', 'Mul', 'MatMul', 'Mul', 'Add', 'BatchNormalization', 'MultiThreshold', 'Mul', 'MatMul', 'Mul', 'Add', 'MultiThreshold']\n"
      ]
     }
    ],
@@ -200,8 +200,8 @@
     "print(\"Output tensor shape: %s\" % str(finnonnx_model_out_shape))\n",
     "finnonnx_model_in_dt = model_for_sim.get_tensor_datatype(finnonnx_in_tensor_name)\n",
     "finnonnx_model_out_dt = model_for_sim.get_tensor_datatype(finnonnx_out_tensor_name)\n",
-    "print(\"Input tensor datatype: %s\" % str(model_for_sim.get_tensor_datatype(finnonnx_in_tensor_name)))\n",
-    "print(\"Output tensor datatype: %s\" % str(model_for_sim.get_tensor_datatype(finnonnx_out_tensor_name)))\n",
+    "print(\"Input tensor datatype: %s\" % str(finnonnx_model_in_dt.name))\n",
+    "print(\"Output tensor datatype: %s\" % str(finnonnx_model_out_dt.name))\n",
     "print(\"List of node operator types in the graph: \")\n",
     "print([x.op_type for x in model_for_sim.graph.node])"
    ]
@@ -226,7 +226,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -262,7 +262,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
@@ -286,10 +286,10 @@
        "        "
       ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x7f388298b470>"
+       "<IPython.lib.display.IFrame at 0x7f3be619b2b0>"
       ]
      },
-     "execution_count": 6,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -311,7 +311,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
@@ -320,7 +320,7 @@
        "torch.Size([100, 593])"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -356,16 +356,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "IncompatibleKeys(missing_keys=[], unexpected_keys=[])"
+       "<All keys matched successfully>"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -409,7 +409,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -441,7 +441,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -476,14 +476,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "ok 100 nok 0: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 100/100 [00:47<00:00,  2.09it/s]\n"
+      "ok 100 nok 0: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 100/100 [00:21<00:00,  4.72it/s]\n"
      ]
     }
    ],
@@ -511,7 +511,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
@@ -560,7 +560,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.8"
+   "version": "3.8.5"
   }
  },
  "nbformat": 4,
diff --git a/run-docker.sh b/run-docker.sh
index 5ce65ba201ca9a38d96f3bd42387102ca5a36bb7..2abd67f0679b32a09e51d03efe548bdc095c11a0 100755
--- a/run-docker.sh
+++ b/run-docker.sh
@@ -92,12 +92,11 @@ SCRIPTPATH=$(dirname "$SCRIPT")
 : ${FINN_DOCKER_PREBUILT="0"}
 : ${FINN_DOCKER_RUN_AS_ROOT="0"}
 : ${FINN_DOCKER_GPU="$(docker info | grep nvidia | wc -m)"}
+: ${FINN_DOCKER_EXTRA=""}
 : ${NVIDIA_VISIBLE_DEVICES=""}
 : ${DOCKER_BUILDKIT="1"}
-: ${FINN_DOCKER_EXTRA=""}
 
 DOCKER_INTERACTIVE=""
-DOCKER_EXTRA="$FINN_DOCKER_EXTRA "
 
 if [ "$1" = "test" ]; then
   gecho "Running test suite (all tests)"
@@ -113,20 +112,20 @@ elif [ "$1" = "notebook" ]; then
     JUPYTER_PASSWD_ARG="--NotebookApp.password='$JUPYTER_PASSWD_HASH'"
   fi
   DOCKER_CMD="jupyter notebook --allow-root --no-browser --ip=0.0.0.0 --port $JUPYTER_PORT $JUPYTER_PASSWD_ARG notebooks"
-  DOCKER_EXTRA+="-e JUPYTER_PORT=$JUPYTER_PORT "
-  DOCKER_EXTRA+="-e NETRON_PORT=$NETRON_PORT "
-  DOCKER_EXTRA+="-p $JUPYTER_PORT:$JUPYTER_PORT "
-  DOCKER_EXTRA+="-p $NETRON_PORT:$NETRON_PORT "
+  FINN_DOCKER_EXTRA+="-e JUPYTER_PORT=$JUPYTER_PORT "
+  FINN_DOCKER_EXTRA+="-e NETRON_PORT=$NETRON_PORT "
+  FINN_DOCKER_EXTRA+="-p $JUPYTER_PORT:$JUPYTER_PORT "
+  FINN_DOCKER_EXTRA+="-p $NETRON_PORT:$NETRON_PORT "
 elif [ "$1" = "build_dataflow" ]; then
   BUILD_DATAFLOW_DIR=$(readlink -f "$2")
-  DOCKER_EXTRA="-v $BUILD_DATAFLOW_DIR:$BUILD_DATAFLOW_DIR"
+  FINN_DOCKER_EXTRA="-v $BUILD_DATAFLOW_DIR:$BUILD_DATAFLOW_DIR "
   DOCKER_INTERACTIVE="-it"
   #FINN_HOST_BUILD_DIR=$BUILD_DATAFLOW_DIR/build
   gecho "Running build_dataflow for folder $BUILD_DATAFLOW_DIR"
   DOCKER_CMD="build_dataflow $BUILD_DATAFLOW_DIR"
 elif [ "$1" = "build_custom" ]; then
   BUILD_CUSTOM_DIR=$(readlink -f "$2")
-  DOCKER_EXTRA="-v $BUILD_CUSTOM_DIR:$BUILD_CUSTOM_DIR -w $BUILD_CUSTOM_DIR"
+  FINN_DOCKER_EXTRA="-v $BUILD_CUSTOM_DIR:$BUILD_CUSTOM_DIR -w $BUILD_CUSTOM_DIR "
   DOCKER_INTERACTIVE="-it"
   #FINN_HOST_BUILD_DIR=$BUILD_DATAFLOW_DIR/build
   gecho "Running build_custom: $BUILD_CUSTOM_DIR/build.py"
@@ -140,9 +139,9 @@ fi
 if [ "$FINN_DOCKER_GPU" != 0 ];then
   gecho "nvidia-docker detected, enabling GPUs"
   if [ ! -z "$NVIDIA_VISIBLE_DEVICES" ];then
-    DOCKER_EXTRA+="--runtime nvidia -e NVIDIA_VISIBLE_DEVICES=$NVIDIA_VISIBLE_DEVICES "
+    FINN_DOCKER_EXTRA+="--runtime nvidia -e NVIDIA_VISIBLE_DEVICES=$NVIDIA_VISIBLE_DEVICES "
   else
-    DOCKER_EXTRA+="--gpus all"
+    FINN_DOCKER_EXTRA+="--gpus all "
   fi
 fi
 
@@ -223,7 +222,7 @@ if [ ! -z "$FINN_XILINX_PATH" ];then
     DOCKER_EXEC+="-e ALVEO_TARGET_DIR=$ALVEO_TARGET_DIR "
   fi
 fi
-DOCKER_EXEC+="$DOCKER_EXTRA "
+DOCKER_EXEC+="$FINN_DOCKER_EXTRA "
 DOCKER_EXEC+="$FINN_DOCKER_TAG $DOCKER_CMD"
 
 $DOCKER_EXEC
diff --git a/setup.cfg b/setup.cfg
index 9a6ca312aff459fb29f6e33a866b911e1a038229..c1dff9bd9b44fc7ca7a02ad0891fd75f10009530 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -74,7 +74,17 @@ exclude =
 # PDF = ReportLab; RXP
 # finn-base is needed to build the full set of docs
 docs =
-    finn-base
+    finn-base==0.0.3
+    docutils==0.17.1
+    dataclasses-json==0.5.2
+    gspread==3.6.0
+    pytest
+    netron
+    vcdvcd
+    torchvision
+    torch
+    qonnx@git+https://github.com/fastmachinelearning/qonnx@main#egg=qonnx
+
 # Add here test requirements (semicolon/line-separated)
 testing =
     pytest
@@ -96,7 +106,6 @@ console_scripts =
 [test]
 # py.test options when running `python setup.py test`
 # addopts = --verbose
-extras = True
 
 [tool:pytest]
 # Options for py.test:
diff --git a/src/finn/analysis/verify_custom_nodes.py b/src/finn/analysis/verify_custom_nodes.py
index 9af1e9a4fe83de24f64a7e9df535bcf78f5fc234..62dac2827f11d290c5a50137e12684eb93326297 100644
--- a/src/finn/analysis/verify_custom_nodes.py
+++ b/src/finn/analysis/verify_custom_nodes.py
@@ -32,7 +32,8 @@ from finn.util.basic import is_finn_op
 
 def verify_nodes(model):
     """Checks if custom ops in graph are correctly built, with all attributes
-    and inputs.
+    and inputs. Please note that many FINN CustomOps don't yet implement the
+    verify_node function required for this analysis pass to work correctly.
 
     Returns {node op_type : info_messages}
 
diff --git a/src/finn/builder/build_dataflow.py b/src/finn/builder/build_dataflow.py
index 4aa1ad31e1ad73762ef46cc861b1a255ce57b926..c4664a5471984e1f88a70f1d9bb6ce674e38c782 100644
--- a/src/finn/builder/build_dataflow.py
+++ b/src/finn/builder/build_dataflow.py
@@ -62,7 +62,7 @@ class StreamToLogger(object):
         pass
 
 
-def resolve_build_steps(cfg: DataflowBuildConfig):
+def resolve_build_steps(cfg: DataflowBuildConfig, partial: bool = True):
     steps = cfg.steps
     if steps is None:
         steps = default_build_dataflow_steps
@@ -76,19 +76,56 @@ def resolve_build_steps(cfg: DataflowBuildConfig):
             steps_as_fxns.append(transform_step)
         else:
             raise Exception("Could not resolve build step: " + str(transform_step))
+    if partial:
+        step_names = list(map(lambda x: x.__name__, steps_as_fxns))
+        if cfg.start_step is None:
+            start_ind = 0
+        else:
+            start_ind = step_names.index(cfg.start_step)
+        if cfg.stop_step is None:
+            stop_ind = len(step_names) - 1
+        else:
+            stop_ind = step_names.index(cfg.stop_step)
+        steps_as_fxns = steps_as_fxns[start_ind : (stop_ind + 1)]
+
     return steps_as_fxns
 
 
+def resolve_step_filename(
+    step_name: str, cfg: DataflowBuildConfig, step_delta: int = 0
+):
+    step_names = list(
+        map(lambda x: x.__name__, resolve_build_steps(cfg, partial=False))
+    )
+    assert step_name in step_names, "start_step %s not found" + step_name
+    step_no = step_names.index(step_name) + step_delta
+    assert step_no >= 0, "Invalid step+delta combination"
+    assert step_no < len(step_names), "Invalid step+delta combination"
+    filename = cfg.output_dir + "/intermediate_models/"
+    filename += "%s.onnx" % (step_names[step_no])
+    return filename
+
+
 def build_dataflow_cfg(model_filename, cfg: DataflowBuildConfig):
     """Best-effort build a dataflow accelerator using the given configuration.
 
     :param model_filename: ONNX model filename to build
     :param cfg: Build configuration
     """
-    model = ModelWrapper(model_filename)
+    # if start_step is specified, override the input model
+    if cfg.start_step is None:
+        print("Building dataflow accelerator from " + model_filename)
+        model = ModelWrapper(model_filename)
+    else:
+        intermediate_model_filename = resolve_step_filename(cfg.start_step, cfg, -1)
+        print(
+            "Building dataflow accelerator from intermediate checkpoint"
+            + intermediate_model_filename
+        )
+        model = ModelWrapper(intermediate_model_filename)
     assert type(model) is ModelWrapper
     finn_build_dir = os.environ["FINN_BUILD_DIR"]
-    print("Building dataflow accelerator from " + model_filename)
+
     print("Intermediate outputs will be generated in " + finn_build_dir)
     print("Final outputs will be generated in " + cfg.output_dir)
     print("Build log is at " + cfg.output_dir + "/build_dataflow.log")
@@ -132,7 +169,7 @@ def build_dataflow_cfg(model_filename, cfg: DataflowBuildConfig):
             sys.stdout = stdout_orig
             sys.stderr = stderr_orig
             time_per_step[step_name] = step_end - step_start
-            chkpt_name = "%d_%s.onnx" % (step_num, step_name)
+            chkpt_name = "%s.onnx" % (step_name)
             if cfg.save_intermediate_models:
                 intermediate_model_dir = cfg.output_dir + "/intermediate_models"
                 if not os.path.exists(intermediate_model_dir):
diff --git a/src/finn/builder/build_dataflow_config.py b/src/finn/builder/build_dataflow_config.py
index 4a112699ec9bdf126f447fe2244eb01f6f4fa042..807fd706860d7e4667107ddd2ed46ea2b123c3ec 100644
--- a/src/finn/builder/build_dataflow_config.py
+++ b/src/finn/builder/build_dataflow_config.py
@@ -89,6 +89,8 @@ class LargeFIFOMemStyle(str, Enum):
 class VerificationStepType(str, Enum):
     "Steps at which FINN ONNX execution can be launched for verification."
 
+    #: verify after step_qonnx_to_finn, using Python execution
+    QONNX_TO_FINN_PYTHON = "finn_onnx_python"
     #: verify after step_tidy_up, using Python execution
     TIDY_UP_PYTHON = "initial_python"
     #: verify after step_streamline , using Python execution
@@ -103,6 +105,7 @@ class VerificationStepType(str, Enum):
 #: specified order. Use the `steps` as part of build config to restrict which
 #: steps will be run.
 default_build_dataflow_steps = [
+    "step_qonnx_to_finn",
     "step_tidy_up",
     "step_streamline",
     "step_convert_to_hls",
@@ -123,6 +126,7 @@ default_build_dataflow_steps = [
 
 #: List of steps to run for an estimate-only (no synthesis) dataflow build
 estimate_only_dataflow_steps = [
+    "step_qonnx_to_finn",
     "step_tidy_up",
     "step_streamline",
     "step_convert_to_hls",
@@ -172,6 +176,13 @@ class DataflowBuildConfig:
     #: that will override the target_fps setting here.
     target_fps: Optional[int] = None
 
+    #: (Optional) Use two-pass relaxation for folding, only relevant if target_fps
+    #: is set. If enabled, parallelization will internally run a second time if the
+    #: target cycles from the first pass could not be achieved, instead using the
+    #: achievable target to obtain a balanced pipeline. If disabled, this can be
+    #: useful for decreasing the latency (even though throughput won't increase).
+    folding_two_pass_relaxation: Optional[bool] = True
+
     #: (Optional) At which steps the generated intermediate output model
     #: will be verified. See documentation of VerificationStepType for
     #: available options.
@@ -185,6 +196,19 @@ class DataflowBuildConfig:
     #: verification. Only required if verify_steps is not empty.
     verify_expected_output_npy: Optional[str] = "expected_output.npy"
 
+    #: (Optional) Save full execution context for each of the verify_steps.
+    #: By default, only the top-level graph output is saved.
+    verify_save_full_context: Optional[bool] = False
+
+    #: (Optional) Save .vcd waveforms from rtlsim under reports.
+    #: By default, waveforms won't be saved.
+    verify_save_rtlsim_waveforms: Optional[bool] = False
+
+    #: (Optional) Run synthesis to generate a .dcp for the stitched-IP output product.
+    #: This can make it easier to treat it as a standalone artifact without requiring
+    #: the full list of layer IP build directories. By default, synthesis will not run.
+    stitched_ip_gen_dcp: Optional[bool] = False
+
     #: (Optional) Control the maximum width of the per-PE MVAU stream while
     #: exploring the parallelization attributes to reach target_fps
     #: Only relevant if target_fps is specified.
@@ -264,6 +288,24 @@ class DataflowBuildConfig:
     #: - functions are called with (model, DataflowBuildConfig) as args
     steps: Optional[List[Any]] = None
 
+    #: If given, start from this step, loading the intermediate model generated
+    #: from the previous step (save_intermediate_models must be enabled)
+    start_step: Optional[str] = None
+
+    #: If given, stop at this step.
+    stop_step: Optional[str] = None
+
+    #: The optional argument `max_multithreshold_bit_width` affects which Quant nodes
+    #: of the QONNX format get converted to the MultiThreshold nodes of FINN. This
+    #: only affects Quant nodes in the activation path. Quant nodes, which define a
+    #: bit width larger than `max_multithreshold_bit_width` are not converted to
+    #: MultiThreshold nodes and a warning is raised instead.
+    #: If not given `max_multithreshold_bit_width` defaults to 8.
+    max_multithreshold_bit_width: Optional[int] = 8
+
+    #: Override the number of inputs for rtlsim performance measurement.
+    rtlsim_batch_size: Optional[int] = 1
+
     def _resolve_hls_clk_period(self):
         if self.hls_clk_period_ns is None:
             # use same clk for synth and hls if not explicitly specified
@@ -333,4 +375,7 @@ class DataflowBuildConfig:
                 + self.verify_expected_output_npy
             )
             verify_expected_output_npy = np.load(self.verify_expected_output_npy)
-            return (verify_input_npy, verify_expected_output_npy)
+            return (
+                verify_input_npy.astype(np.float32),
+                verify_expected_output_npy.astype(np.float32),
+            )
diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py
index 5bdccebb58ccb6f4906a05dda58da2494366739f..c977f15e7090f5cae633a013f5eb9e6b3dd34dd2 100644
--- a/src/finn/builder/build_dataflow_steps.py
+++ b/src/finn/builder/build_dataflow_steps.py
@@ -30,7 +30,9 @@ import json
 import numpy as np
 import os
 from copy import deepcopy
-from shutil import copy, copytree
+from distutils.dir_util import copy_tree
+from qonnx.util.cleanup import cleanup_model
+from shutil import copy
 
 import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
 import finn.transformation.streamline.absorb as absorb
@@ -70,7 +72,6 @@ from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver
 from finn.transformation.fpgadataflow.make_zynq_proj import ZynqBuild
 from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.fpgadataflow.replace_verilog_relpaths import (
     ReplaceVerilogRelPaths,
 )
@@ -94,9 +95,15 @@ from finn.transformation.infer_datatypes import InferDataTypes
 from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
 from finn.transformation.move_reshape import RemoveCNVtoFCFlatten
+from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
+from finn.transformation.qonnx.quant_act_to_multithreshold import (
+    default_filter_function_generator,
+)
 from finn.transformation.streamline import Streamline
 from finn.transformation.streamline.reorder import MakeMaxPoolNHWC
+from finn.util.basic import get_rtlsim_trace_depth
 from finn.util.config import extract_model_config_to_json
+from finn.util.pyverilator import pyverilate_get_liveness_threshold_cycles
 from finn.util.test import execute_parent
 
 
@@ -115,21 +122,108 @@ def verify_step(
         parent_model_fn = intermediate_models_dir + "/dataflow_parent.onnx"
         child_model_fn = intermediate_models_dir + "/verify_%s.onnx" % step_name
         model.save(child_model_fn)
-        out_npy = execute_parent(parent_model_fn, child_model_fn, in_npy)
+        out_tensor_name = ModelWrapper(parent_model_fn).graph.output[0].name
+        out_dict = execute_parent(
+            parent_model_fn, child_model_fn, in_npy, return_full_ctx=True
+        )
+        out_npy = out_dict[out_tensor_name]
     else:
         inp_tensor_name = model.graph.input[0].name
         out_tensor_name = model.graph.output[0].name
         inp_dict = {inp_tensor_name: in_npy}
-        out_dict = execute_onnx(model, inp_dict)
+        out_dict = execute_onnx(model, inp_dict, True)
         out_npy = out_dict[out_tensor_name]
     res = np.isclose(exp_out_npy, out_npy, atol=1e-3).all()
     res_to_str = {True: "SUCCESS", False: "FAIL"}
     res_str = res_to_str[res]
-    verification_output_fn = verify_out_dir + "/verify_%s_%s.npy" % (step_name, res_str)
-    np.save(verification_output_fn, out_npy)
+    if cfg.verify_save_full_context:
+        verification_output_fn = verify_out_dir + "/verify_%s_%s.npz" % (
+            step_name,
+            res_str,
+        )
+        np.savez(verification_output_fn, **out_dict)
+    else:
+        verification_output_fn = verify_out_dir + "/verify_%s_%s.npy" % (
+            step_name,
+            res_str,
+        )
+        np.save(verification_output_fn, out_npy)
     print("Verification for %s : %s" % (step_name, res_str))
 
 
+def prepare_for_stitched_ip_rtlsim(verify_model, cfg):
+    need_restitch = False
+    # rtlsim only supports certain impl_style for some nodes
+    # StreamingFIFO must have impl_style=rtl
+    for fifo_layer in verify_model.get_nodes_by_op_type("StreamingFIFO"):
+        inst = getCustomOp(fifo_layer)
+        if inst.get_nodeattr("impl_style") != "rtl":
+            inst.set_nodeattr("impl_style", "rtl")
+            inst.set_nodeattr("code_gen_dir_ipgen", "")
+            inst.set_nodeattr("ipgen_path", "")
+            need_restitch = True
+    # StreamingDataWidthConverter must have impl_style=hls
+    for dwc_layer in verify_model.get_nodes_by_op_type(
+        "StreamingDataWidthConverter_Batch"
+    ):
+        inst = getCustomOp(dwc_layer)
+        if inst.get_nodeattr("impl_style") != "hls":
+            inst.set_nodeattr("impl_style", "hls")
+            inst.set_nodeattr("code_gen_dir_ipgen", "")
+            inst.set_nodeattr("ipgen_path", "")
+            need_restitch = True
+    # if we've made alterations to the model, need to do some re-prep
+    if need_restitch:
+        print("Need to regen/re-stitch some IP for STITCHED_IP_RTLSIM")
+        verify_model = verify_model.transform(
+            PrepareIP(cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period())
+        )
+        verify_model = verify_model.transform(HLSSynthIP())
+        verify_model = verify_model.transform(
+            CreateStitchedIP(
+                cfg._resolve_fpga_part(),
+                cfg.synth_clk_period_ns,
+                vitis=False,
+            )
+        )
+    # set top-level prop for stitched-ip rtlsim and launch
+    verify_model.set_metadata_prop("exec_mode", "rtlsim")
+    # TODO make configurable
+    # verify_model.set_metadata_prop("rtlsim_trace", "trace.vcd")
+    return verify_model
+
+
+def step_qonnx_to_finn(model: ModelWrapper, cfg: DataflowBuildConfig):
+    """
+    This step will only execute if QONNX nodes are found.
+    These include the following op_types: "Quant" , "Trunc" and "BinaryQuant".
+    If such nodes are found the step will run the tidy-up step from QONNX
+    and then convert the QONNX model to the FINN-ONNX dialect.
+    """
+    # Check if any QONNX nodes exist, i.e. BinaryQuant, Quant or Trunc
+    q_count = 0
+    for op_type in ["BinaryQuant", "Quant", "Trunc"]:
+        q_count += len(model.get_nodes_by_op_type(op_type))
+    if q_count == 0:
+        return model
+
+    # QONNX cleanup
+    model = cleanup_model(model)
+    # QONNX to FINN-ONNX
+    model = model.transform(
+        ConvertQONNXtoFINN(
+            filter_function=default_filter_function_generator(
+                max_multithreshold_bit_width=cfg.max_multithreshold_bit_width
+            )
+        )
+    )
+
+    if VerificationStepType.QONNX_TO_FINN_PYTHON in cfg._resolve_verification_steps():
+        verify_step(model, cfg, "qonnx_to_finn_python", need_parent=False)
+
+    return model
+
+
 def step_tidy_up(model: ModelWrapper, cfg: DataflowBuildConfig):
     """Run the tidy-up step on given model. This includes shape and datatype
     inference, constant folding, and giving nodes and tensors better names.
@@ -164,6 +258,7 @@ def step_streamline(model: ModelWrapper, cfg: DataflowBuildConfig):
         model = model.transform(MakeMaxPoolNHWC())
         model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
         model = model.transform(MakeMaxPoolNHWC())
+        model = model.transform(absorb.AbsorbConsecutiveTransposes())
     model = model.transform(ConvertBipolarMatMulToXnorPopcount())
     model = model.transform(Streamline())
     # absorb final add-mul nodes into TopK
@@ -212,7 +307,12 @@ def step_create_dataflow_partition(model: ModelWrapper, cfg: DataflowBuildConfig
     nodes, which point to a separate ONNX file. Dataflow accelerator synthesis
     can only be performed on those HLSCustomOp sub-graphs."""
 
-    parent_model = model.transform(CreateDataflowPartition())
+    parent_model = model.transform(
+        CreateDataflowPartition(
+            partition_model_dir=cfg.output_dir
+            + "/intermediate_models/supported_op_partitions"
+        )
+    )
     sdp_nodes = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")
     assert len(sdp_nodes) == 1, "Only a single StreamingDataflowPartition supported."
     sdp_node = sdp_nodes[0]
@@ -226,13 +326,32 @@ def step_create_dataflow_partition(model: ModelWrapper, cfg: DataflowBuildConfig
 
 def step_target_fps_parallelization(model: ModelWrapper, cfg: DataflowBuildConfig):
     """If target_fps was specified, use the SetFolding transformation to determine
-    parallelization attributes."""
+    parallelization attributes. The auto-generated config will be saved under
+    auto_folding_config.json under the outputs, which can serve as a basis for
+    customizing the folding factors further."""
 
     target_cycles_per_frame = cfg._resolve_cycles_per_frame()
     if target_cycles_per_frame is not None:
         model = model.transform(
-            SetFolding(target_cycles_per_frame, mvau_wwidth_max=cfg.mvau_wwidth_max)
+            SetFolding(
+                target_cycles_per_frame,
+                mvau_wwidth_max=cfg.mvau_wwidth_max,
+                two_pass_relaxation=cfg.folding_two_pass_relaxation,
+            )
         )
+        # extract the suggested configuration and save it as json
+        hw_attrs = [
+            "PE",
+            "SIMD",
+            "ram_style",
+            "resType",
+            "mem_mode",
+            "runtime_writeable_weights",
+        ]
+        extract_model_config_to_json(
+            model, cfg.output_dir + "/auto_folding_config.json", hw_attrs
+        )
+
     return model
 
 
@@ -380,25 +499,35 @@ def step_create_stitched_ip(model: ModelWrapper, cfg: DataflowBuildConfig):
     if DataflowOutputType.STITCHED_IP in cfg.generate_outputs:
         stitched_ip_dir = cfg.output_dir + "/stitched_ip"
         model = model.transform(
-            CreateStitchedIP(cfg._resolve_fpga_part(), cfg.synth_clk_period_ns)
+            CreateStitchedIP(
+                cfg._resolve_fpga_part(),
+                cfg.synth_clk_period_ns,
+                vitis=cfg.stitched_ip_gen_dcp,
+            )
         )
         # TODO copy all ip sources into output dir? as zip?
-        copytree(model.get_metadata_prop("vivado_stitch_proj"), stitched_ip_dir)
+        copy_tree(model.get_metadata_prop("vivado_stitch_proj"), stitched_ip_dir)
         print("Vivado stitched IP written into " + stitched_ip_dir)
     if VerificationStepType.STITCHED_IP_RTLSIM in cfg._resolve_verification_steps():
         # prepare ip-stitched rtlsim
         verify_model = deepcopy(model)
-        # rtlsim only supports impl_style=rtl for StreamingFIFO, ensure that
-        for fifo_layer in verify_model.get_nodes_by_op_type("StreamingFIFO"):
-            getCustomOp(fifo_layer).set_nodeattr("impl_style", "rtl")
-        # similarly for StreamingDataWidthConverter with impl_style=hls
-        for dwc_layer in verify_model.get_nodes_by_op_type(
-            "StreamingDataWidthConverter_Batch"
-        ):
-            getCustomOp(dwc_layer).set_nodeattr("impl_style", "hls")
-        verify_model = verify_model.transform(PrepareRTLSim())
-        verify_model.set_metadata_prop("exec_mode", "rtlsim")
+        verify_model = prepare_for_stitched_ip_rtlsim(verify_model, cfg)
+        # use critical path estimate to set rtlsim liveness threshold
+        # (very conservative)
+        verify_model = verify_model.transform(AnnotateCycles())
+        estimate_network_performance = verify_model.analysis(dataflow_performance)
+        prev_liveness = pyverilate_get_liveness_threshold_cycles()
+        os.environ["LIVENESS_THRESHOLD"] = str(
+            int(estimate_network_performance["critical_path_cycles"])
+        )
+        if cfg.verify_save_rtlsim_waveforms:
+            report_dir = cfg.output_dir + "/report"
+            os.makedirs(report_dir, exist_ok=True)
+            verify_model.set_metadata_prop(
+                "rtlsim_trace", "%s/verify_rtlsim.vcd" % (report_dir)
+            )
         verify_step(verify_model, cfg, "stitched_ip_rtlsim", need_parent=True)
+        os.environ["LIVENESS_THRESHOLD"] = str(prev_liveness)
     return model
 
 
@@ -411,30 +540,30 @@ def step_measure_rtlsim_performance(model: ModelWrapper, cfg: DataflowBuildConfi
         assert (
             DataflowOutputType.STITCHED_IP in cfg.generate_outputs
         ), "rtlsim_perf needs stitched IP"
+        report_dir = cfg.output_dir + "/report"
+        os.makedirs(report_dir, exist_ok=True)
         # prepare ip-stitched rtlsim
         rtlsim_model = deepcopy(model)
-        # rtlsim only supports impl_style=rtl for StreamingFIFO, ensure that
-        for fifo_layer in rtlsim_model.get_nodes_by_op_type("StreamingFIFO"):
-            getCustomOp(fifo_layer).set_nodeattr("impl_style", "rtl")
-        # similarly for StreamingDataWidthConverter with impl_style=hls
-        for dwc_layer in rtlsim_model.get_nodes_by_op_type(
-            "StreamingDataWidthConverter_Batch"
-        ):
-            getCustomOp(dwc_layer).set_nodeattr("impl_style", "hls")
-        rtlsim_model = rtlsim_model.transform(PrepareRTLSim())
-        rtlsim_model.set_metadata_prop("exec_mode", "rtlsim")
+        rtlsim_model = prepare_for_stitched_ip_rtlsim(rtlsim_model, cfg)
         # run with single input to get latency
-        rtlsim_perf_dict = throughput_test_rtlsim(rtlsim_model, 1)
-        rtlsim_latency = rtlsim_perf_dict["cycles"]
-        # run with num inputs equal to layers to fill the whole pipeline
-        # to get the steady-state throughput
-        rtlsim_bs = len(rtlsim_model.graph.node)
+        orig_rtlsim_trace_depth = get_rtlsim_trace_depth()
+        rtlsim_bs = int(cfg.rtlsim_batch_size)
+        assert rtlsim_bs > 0, "rtlsim batch size must be >0"
+        if cfg.verify_save_rtlsim_waveforms:
+            # set depth to 3 for layer-by-layer visibility
+            os.environ["RTLSIM_TRACE_DEPTH"] = "3"
+            rtlsim_model.set_metadata_prop(
+                "rtlsim_trace", "%s/rtlsim_perf_batch_%d.vcd" % (report_dir, rtlsim_bs)
+            )
+        rtlsim_model.set_metadata_prop("extra_verilator_args", str(["-CFLAGS", "-O3"]))
         rtlsim_perf_dict = throughput_test_rtlsim(rtlsim_model, rtlsim_bs)
+        rtlsim_latency = rtlsim_perf_dict["cycles"]
         rtlsim_perf_dict["latency_cycles"] = rtlsim_latency
-        report_dir = cfg.output_dir + "/report"
-        os.makedirs(report_dir, exist_ok=True)
         with open(report_dir + "/rtlsim_performance.json", "w") as f:
             json.dump(rtlsim_perf_dict, f, indent=2)
+        if cfg.verify_save_rtlsim_waveforms:
+            # restore original trace depth
+            os.environ["RTLSIM_TRACE_DEPTH"] = str(orig_rtlsim_trace_depth)
 
     return model
 
@@ -446,7 +575,7 @@ def step_make_pynq_driver(model: ModelWrapper, cfg: DataflowBuildConfig):
     if DataflowOutputType.PYNQ_DRIVER in cfg.generate_outputs:
         driver_dir = cfg.output_dir + "/driver"
         model = model.transform(MakePYNQDriver(cfg._resolve_driver_platform()))
-        copytree(model.get_metadata_prop("pynq_driver_dir"), driver_dir)
+        copy_tree(model.get_metadata_prop("pynq_driver_dir"), driver_dir)
         print("PYNQ Python driver written into " + driver_dir)
     return model
 
@@ -487,9 +616,15 @@ def step_synthesize_bitfile(model: ModelWrapper, cfg: DataflowBuildConfig):
         os.makedirs(bitfile_dir, exist_ok=True)
         report_dir = cfg.output_dir + "/report"
         os.makedirs(report_dir, exist_ok=True)
+        partition_model_dir = cfg.output_dir + "/intermediate_models/kernel_partitions"
         if cfg.shell_flow_type == ShellFlowType.VIVADO_ZYNQ:
             model = model.transform(
-                ZynqBuild(cfg.board, cfg.synth_clk_period_ns, cfg.enable_hw_debug)
+                ZynqBuild(
+                    cfg.board,
+                    cfg.synth_clk_period_ns,
+                    cfg.enable_hw_debug,
+                    partition_model_dir=partition_model_dir,
+                )
             )
             copy(model.get_metadata_prop("bitfile"), bitfile_dir + "/finn-accel.bit")
             copy(model.get_metadata_prop("hw_handoff"), bitfile_dir + "/finn-accel.hwh")
@@ -513,6 +648,7 @@ def step_synthesize_bitfile(model: ModelWrapper, cfg: DataflowBuildConfig):
                     strategy=cfg._resolve_vitis_opt_strategy(),
                     enable_debug=cfg.enable_hw_debug,
                     floorplan_file=cfg.vitis_floorplan_file,
+                    partition_model_dir=partition_model_dir,
                 )
             )
             copy(model.get_metadata_prop("bitfile"), bitfile_dir + "/finn-accel.xclbin")
@@ -535,13 +671,14 @@ def step_deployment_package(model: ModelWrapper, cfg: DataflowBuildConfig):
         bitfile_dir = cfg.output_dir + "/bitfile"
         driver_dir = cfg.output_dir + "/driver"
         os.makedirs(deploy_dir, exist_ok=True)
-        copytree(bitfile_dir, deploy_dir + "/bitfile")
-        copytree(driver_dir, deploy_dir + "/driver")
+        copy_tree(bitfile_dir, deploy_dir + "/bitfile")
+        copy_tree(driver_dir, deploy_dir + "/driver")
     return model
 
 
 #: map step name strings to step functions
 build_dataflow_step_lookup = {
+    "step_qonnx_to_finn": step_qonnx_to_finn,
     "step_tidy_up": step_tidy_up,
     "step_streamline": step_streamline,
     "step_convert_to_hls": step_convert_to_hls,
diff --git a/src/finn/custom_op/fpgadataflow/__init__.py b/src/finn/custom_op/fpgadataflow/__init__.py
index 320b947d0dd99b564da5775dfc8624993af57de2..417a505898fb1aba751e4b44db336b8cf313cb6a 100644
--- a/src/finn/custom_op/fpgadataflow/__init__.py
+++ b/src/finn/custom_op/fpgadataflow/__init__.py
@@ -40,6 +40,7 @@ from finn.custom_op.fpgadataflow.fmpadding_batch import FMPadding_Batch
 from finn.custom_op.fpgadataflow.globalaccpool_batch import GlobalAccPool_Batch
 from finn.custom_op.fpgadataflow.iodma import IODMA
 from finn.custom_op.fpgadataflow.labelselect_batch import LabelSelect_Batch
+from finn.custom_op.fpgadataflow.lookup import Lookup
 from finn.custom_op.fpgadataflow.pool_batch import Pool_Batch
 from finn.custom_op.fpgadataflow.streamingdataflowpartition import (
     StreamingDataflowPartition,
@@ -52,6 +53,7 @@ from finn.custom_op.fpgadataflow.streamingfifo import StreamingFIFO
 from finn.custom_op.fpgadataflow.streamingmaxpool_batch import StreamingMaxPool_Batch
 from finn.custom_op.fpgadataflow.thresholding_batch import Thresholding_Batch
 from finn.custom_op.fpgadataflow.tlastmarker import TLastMarker
+from finn.custom_op.fpgadataflow.upsampler import UpsampleNearestNeighbour_Batch
 from finn.custom_op.fpgadataflow.vector_vector_activate_batch import (
     Vector_Vector_Activate_Batch,
 )
@@ -79,3 +81,5 @@ custom_op["Vector_Vector_Activate_Batch"] = Vector_Vector_Activate_Batch
 custom_op["ChannelwiseOp_Batch"] = ChannelwiseOp_Batch
 custom_op["IODMA"] = IODMA
 custom_op["StreamingDataflowPartition"] = StreamingDataflowPartition
+custom_op["UpsampleNearestNeighbour_Batch"] = UpsampleNearestNeighbour_Batch
+custom_op["Lookup"] = Lookup
diff --git a/src/finn/custom_op/fpgadataflow/addstreams_batch.py b/src/finn/custom_op/fpgadataflow/addstreams_batch.py
index 2558394076a24694f153c4cae19eb3368a02a869..ec355a8ee7a5e2e2107e737ed5408d508063433f 100644
--- a/src/finn/custom_op/fpgadataflow/addstreams_batch.py
+++ b/src/finn/custom_op/fpgadataflow/addstreams_batch.py
@@ -29,7 +29,6 @@
 import numpy as np
 import os
 import warnings
-from onnx import TensorProto, helper
 
 from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
@@ -84,19 +83,7 @@ class AddStreams_Batch(HLSCustomOp):
         assert ishape == exp_ishape, "Unexpected input1 shape."
         ishape = tuple(model.get_tensor_shape(self.onnx_node.input[1]))
         assert ishape == exp_ishape, "Unexpected input2 shape."
-        # implement tensor with correct shape
-        values = np.random.randn(*oshape).astype(np.float32)
-        return helper.make_node(
-            "Constant",
-            inputs=[],
-            outputs=[self.onnx_node.output[0]],
-            value=helper.make_tensor(
-                name="const_tensor",
-                data_type=TensorProto.FLOAT,
-                dims=values.shape,
-                vals=values.flatten().astype(float),
-            ),
-        )
+        return super().make_const_shape_op(oshape)
 
     def infer_node_datatype(self, model):
         node = self.onnx_node
diff --git a/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py b/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py
index b1dc02131e45b0a04acb25723e09847ee858ebdc..4961f6148231252d255c1830ced418308032ce41 100644
--- a/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py
+++ b/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py
@@ -30,7 +30,6 @@ import numpy as np
 import os
 import warnings
 from math import ceil
-from onnx import TensorProto, helper
 
 from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
@@ -56,10 +55,10 @@ def get_smallest_possible(vals):
     for v in vals:
         assert int(v) == v, "Error float value"
 
-    for k in DataType.__members__:
+    for k in DataType.get_accumulator_dt_cands():
         dt = DataType[k]
 
-        if dt in [DataType.BIPOLAR, DataType.TERNARY, DataType.FLOAT32]:
+        if dt in [DataType["BIPOLAR"], DataType["TERNARY"], DataType["FLOAT32"]]:
             # not currently supported
             continue
 
@@ -75,9 +74,9 @@ def get_smallest_possible(vals):
     )
 
     if (0 <= vals).all():
-        return DataType.UINT64
+        return DataType["UINT64"]
     else:
-        return DataType.INT64
+        return DataType["INT64"]
 
 
 class ChannelwiseOp_Batch(HLSCustomOp):
@@ -125,18 +124,7 @@ class ChannelwiseOp_Batch(HLSCustomOp):
     def make_shape_compatible_op(self, model):
         oshape = self.get_normal_output_shape()
         # implement tensor with correct shape
-        values = np.random.randn(*oshape).astype(np.float32)
-        return helper.make_node(
-            "Constant",
-            inputs=[],
-            outputs=[self.onnx_node.output[0]],
-            value=helper.make_tensor(
-                name="const_tensor",
-                data_type=TensorProto.FLOAT,
-                dims=values.shape,
-                vals=values.flatten().astype(float),
-            ),
-        )
+        return super().make_const_shape_op(oshape)
 
     def infer_node_datatype(self, model):
         node = self.onnx_node
@@ -347,8 +335,8 @@ class ChannelwiseOp_Batch(HLSCustomOp):
         )
         # get input data type
         export_idt = self.get_input_datatype()
-        if self.get_input_datatype() == DataType.BIPOLAR:
-            export_idt = DataType.BINARY
+        if self.get_input_datatype() == DataType["BIPOLAR"]:
+            export_idt = DataType["BINARY"]
         idt_hls = export_idt.get_hls_datatype_str()
 
         # write parameters into params.h
@@ -356,8 +344,8 @@ class ChannelwiseOp_Batch(HLSCustomOp):
         pdt_hls = pdt.get_hls_datatype_str()
         # use binary to export bipolar activations
         export_odt = self.get_output_datatype()
-        if self.get_output_datatype() == DataType.BIPOLAR:
-            export_odt = DataType.BINARY
+        if self.get_output_datatype() == DataType["BIPOLAR"]:
+            export_odt = DataType["BINARY"]
         odt_hls = export_odt.get_hls_datatype_str()
         # get desired function
         func = self.get_nodeattr("Func")
@@ -438,7 +426,7 @@ class ChannelwiseOp_Batch(HLSCustomOp):
             # load output npy file
             super().npy_to_dynamic_output(context)
             # reinterpret binary output as bipolar where needed
-            if self.get_output_datatype() == DataType.BIPOLAR:
+            if self.get_output_datatype() == DataType["BIPOLAR"]:
                 out = context[node.output[0]]
                 out = 2 * out - 1
                 context[node.output[0]] = out
@@ -526,15 +514,18 @@ class ChannelwiseOp_Batch(HLSCustomOp):
         # should ImgDim be defined or just filled in here like we do now?
         ishape = self.get_folded_input_shape()
         if len(ishape) == 3:
-            imgdim = 1
+            imgdim_h = 1
+            imgdim_w = 1
         elif len(ishape) == 5:
-            imgdim = ishape[1]
+            imgdim_h = ishape[1]
+            imgdim_w = ishape[2]
         else:
             raise Exception("""Unexpeted input shape""")
         self.code_gen_dict["$DOCOMPUTE$"] = [
-            """Thresholding_Batch<{}, NumChannels1, PE1, {}, {}>
+            """Thresholding_Batch<{}, {}, NumChannels1, PE1, {}, {}>
             (in0, out, threshs, numReps);""".format(
-                imgdim,
+                imgdim_h,
+                imgdim_w,
                 tmpl_args["TSrcI"],
                 tmpl_args["TDstI"],
             )
@@ -543,9 +534,9 @@ class ChannelwiseOp_Batch(HLSCustomOp):
     def dataoutstrm(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_output_datatype()
-        if dtype == DataType.BIPOLAR:
+        if dtype == DataType["BIPOLAR"]:
             # use binary for bipolar storage
-            dtype = DataType.BINARY
+            dtype = DataType["BINARY"]
         elem_bits = dtype.bitwidth()
         packed_bits = self.get_outstream_width()
         packed_hls_type = "ap_uint<%d>" % packed_bits
diff --git a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py
index 9ec7bc662d95b1c94ca17bc3c9a1a7b6199cc18a..a4018836846257c15ad203b1cef54c03cd081e45 100644
--- a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py
+++ b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py
@@ -29,7 +29,6 @@
 import math
 import numpy as np
 import os
-from onnx import TensorProto, helper
 
 from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
@@ -148,18 +147,7 @@ class ConvolutionInputGenerator(HLSCustomOp):
         ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0]))
         assert ishape == exp_ishape, "Unexpect input shape for ConvInpGen."
         # implement tensor with correct shape
-        values = np.random.randn(*oshape).astype(np.float32)
-        return helper.make_node(
-            "Constant",
-            inputs=[],
-            outputs=[self.onnx_node.output[0]],
-            value=helper.make_tensor(
-                name="const_tensor",
-                data_type=TensorProto.FLOAT,
-                dims=values.shape,
-                vals=values.flatten().astype(float),
-            ),
-        )
+        return super().make_const_shape_op(oshape)
 
     def infer_node_datatype(self, model):
         node = self.onnx_node
@@ -319,10 +307,10 @@ class ConvolutionInputGenerator(HLSCustomOp):
             inp.shape == exp_ishape
         ), """Input shape doesn't
         match expected shape (1, ifm_dim_h, ifm_dim_w, ifm_ch)."""
-        if self.get_input_datatype() == DataType.BIPOLAR:
+        if self.get_input_datatype() == DataType["BIPOLAR"]:
             # store bipolar activations as binary
             inp = (inp + 1) / 2
-            export_idt = DataType.BINARY
+            export_idt = DataType["BINARY"]
         else:
             export_idt = self.get_input_datatype()
         # reshape input into folded form
@@ -370,7 +358,7 @@ class ConvolutionInputGenerator(HLSCustomOp):
                 )
             )
         # binary -> bipolar if needed
-        if self.get_output_datatype() == DataType.BIPOLAR:
+        if self.get_output_datatype() == DataType["BIPOLAR"]:
             out = context[node.output[0]]
             out = 2 * out - 1
             context[node.output[0]] = out
@@ -404,9 +392,9 @@ class ConvolutionInputGenerator(HLSCustomOp):
     def read_npy_data(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_input_datatype()
-        if dtype == DataType.BIPOLAR:
+        if dtype == DataType["BIPOLAR"]:
             # use binary for bipolar storage
-            dtype = DataType.BINARY
+            dtype = DataType["BINARY"]
         elem_bits = dtype.bitwidth()
         packed_bits = self.get_instream_width()
         packed_hls_type = "ap_uint<%d>" % packed_bits
@@ -465,9 +453,9 @@ class ConvolutionInputGenerator(HLSCustomOp):
     def dataoutstrm(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_output_datatype()
-        if dtype == DataType.BIPOLAR:
+        if dtype == DataType["BIPOLAR"]:
             # use binary for bipolar storage
-            dtype = DataType.BINARY
+            dtype = DataType["BINARY"]
         elem_bits = dtype.bitwidth()
         packed_bits = self.get_outstream_width()
         packed_hls_type = "ap_uint<%d>" % packed_bits
diff --git a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py
index b428210acfd70186f68e7f1b35cfcd945d0a77d9..e43d73b1cd3ec7902fc743bfdf4d2fcad1c01dfe 100644
--- a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py
+++ b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py
@@ -29,7 +29,6 @@
 import math
 import numpy as np
 import os
-from onnx import TensorProto, helper
 
 from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
@@ -128,8 +127,12 @@ class ConvolutionInputGenerator1D(HLSCustomOp):
         ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, pad, dilation_h)
         ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, pad, dilation_w)
         assert ifm_ch % simd == 0, "SIMD must divide IFMChannels"
-        wf = int((k_h * k_w * ifm_ch) // simd)
-        folded_oshape = (1, ofm_dim_h, ofm_dim_w, wf, simd)
+        if self.use_parallel_window_output():
+            wf = int((ifm_ch) // simd)
+            folded_oshape = (1, ofm_dim_h, ofm_dim_w, wf, k_h * k_w * simd)
+        else:
+            wf = int((k_h * k_w * ifm_ch) // simd)
+            folded_oshape = (1, ofm_dim_h, ofm_dim_w, wf, simd)
         return folded_oshape
 
     def make_shape_compatible_op(self, model):
@@ -137,19 +140,7 @@ class ConvolutionInputGenerator1D(HLSCustomOp):
         oshape = self.get_normal_output_shape()
         ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0]))
         assert ishape == exp_ishape, "Unexpect input shape for ConvInpGen."
-        # implement tensor with correct shape
-        values = np.random.randn(*oshape).astype(np.float32)
-        return helper.make_node(
-            "Constant",
-            inputs=[],
-            outputs=[self.onnx_node.output[0]],
-            value=helper.make_tensor(
-                name="const_tensor",
-                data_type=TensorProto.FLOAT,
-                dims=values.shape,
-                vals=values.flatten().astype(float),
-            ),
-        )
+        return super().make_const_shape_op(oshape)
 
     def infer_node_datatype(self, model):
         node = self.onnx_node
@@ -169,8 +160,6 @@ class ConvolutionInputGenerator1D(HLSCustomOp):
         return DataType[self.get_nodeattr("outputDataType")]
 
     def get_instream_width(self):
-        """Returns stream width, input and output stream width are equal for
-        the sliding window function"""
         ibits = self.get_input_datatype().bitwidth()
         simd = self.get_nodeattr("SIMD")
         ifm_ch = self.get_nodeattr("IFMChannels")
@@ -179,10 +168,13 @@ class ConvolutionInputGenerator1D(HLSCustomOp):
         return in_width
 
     def get_outstream_width(self):
-        """Returns stream width, input and output stream width are equal for
-        the sliding window function, so the function to determine the input
-        stream width can be reused."""
-        return self.get_instream_width()
+        if self.use_parallel_window_output():
+            # feed all window pixels in parallel
+            k_h, k_w = self.get_nodeattr("ConvKernelDim")
+            return self.get_instream_width() * k_h * k_w
+        else:
+            # if parallel variant not in use: same width for output and input stream
+            return self.get_instream_width()
 
     def get_number_output_values(self):
         folded_oshape = self.get_folded_output_shape()
@@ -218,6 +210,22 @@ class ConvolutionInputGenerator1D(HLSCustomOp):
 
         return (ifm_ch, ifm_dim, ofm_dim, k, stride, dilation)
 
+    def use_parallel_window_output(self):
+        # Check if simple "ConvolutionInputGenerator_1D_parallel" variant can be used to
+        # feed window in parallel to the following layer, enabling full SIMD unfolding.
+        stride = self.get_nodeattr("Stride")
+        dilation = self.get_nodeattr("Dilation")
+        stride_h, stride_w = stride
+        dilation_h, dilation_w = dilation
+
+        if self.get_nodeattr("SIMD") == self.get_nodeattr("IFMChannels"):
+            if self.get_nodeattr("depthwise") == 0:
+                if stride_h == 1 and stride_w == 1:
+                    if dilation_h == 1 and dilation_w == 1:
+                        return True
+
+        return False
+
     def get_exp_cycles(self):
         simd = self.get_nodeattr("SIMD")
         (
@@ -237,12 +245,15 @@ class ConvolutionInputGenerator1D(HLSCustomOp):
         # since mmv != 1 is not supported yet, we set mmv for now to 1
         mmv = 1
         # see https://github.com/Xilinx/finn-hlslib/blob/master/slidingwindow.h
-        cycles_write_block = (ofm_dim_w * k_w * k_h * (ifm_ch / simd)) / mmv
-        cycles_read_block = stride_w * ifm_dim_w * (ifm_ch / simd)
-        max_cycles = max(cycles_write_block, cycles_read_block)
-        exp_cycles = (
-            ifm_dim_w * k_h * dilation_h * (ifm_ch / simd) + ofm_dim_h * max_cycles
-        )
+        if self.use_parallel_window_output():
+            exp_cycles = k_w + ofm_dim_w
+        else:
+            cycles_write_block = (ofm_dim_w * k_w * k_h * (ifm_ch / simd)) / mmv
+            cycles_read_block = stride_w * ifm_dim_w * (ifm_ch / simd)
+            max_cycles = max(cycles_write_block, cycles_read_block)
+            exp_cycles = (
+                ifm_dim_w * k_h * dilation_h * (ifm_ch / simd) + ofm_dim_h * max_cycles
+            )
 
         return int(exp_cycles)
 
@@ -345,10 +356,10 @@ class ConvolutionInputGenerator1D(HLSCustomOp):
             inp.shape == exp_ishape
         ), """Input shape doesn't
         match expected shape (1, ifm_dim, ifm_dim, ifm_ch)."""
-        if self.get_input_datatype() == DataType.BIPOLAR:
+        if self.get_input_datatype() == DataType["BIPOLAR"]:
             # store bipolar activations as binary
             inp = (inp + 1) / 2
-            export_idt = DataType.BINARY
+            export_idt = DataType["BINARY"]
         else:
             export_idt = self.get_input_datatype()
         # reshape input into folded form
@@ -396,7 +407,7 @@ class ConvolutionInputGenerator1D(HLSCustomOp):
                 )
             )
         # binary -> bipolar if needed
-        if self.get_output_datatype() == DataType.BIPOLAR:
+        if self.get_output_datatype() == DataType["BIPOLAR"]:
             out = context[node.output[0]]
             out = 2 * out - 1
             context[node.output[0]] = out
@@ -502,9 +513,9 @@ class ConvolutionInputGenerator1D(HLSCustomOp):
     def read_npy_data(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_input_datatype()
-        if dtype == DataType.BIPOLAR:
+        if dtype == DataType["BIPOLAR"]:
             # use binary for bipolar storage
-            dtype = DataType.BINARY
+            dtype = DataType["BINARY"]
         elem_bits = dtype.bitwidth()
         packed_bits = self.get_instream_width()
         packed_hls_type = "ap_uint<%d>" % packed_bits
@@ -535,46 +546,56 @@ class ConvolutionInputGenerator1D(HLSCustomOp):
             "ultra": "ap_resource_uram()",
         }
         hls_ram_style = map_to_hls_ram_style[ram_style]
-        hls_call = "ConvolutionInputGenerator"
-        # check which ConvolutionInputGenerator is needed
-        dilation_h, dilation_w = self.get_nodeattr("Dilation")
 
-        hls_call += "_NonSquare"
-        if dilation_h > 1 or dilation_w > 1:
-            hls_call += "_Dilated"
-            if self.get_nodeattr("depthwise") == 1:
-                hls_call += "_dws"
-            self.code_gen_dict["$DOCOMPUTE$"] = [
-                """{}<ConvKernelDim1_x, ConvKernelDim1_y, IFMChannels1, Input_precision1,
-                IFMDim1_x, IFMDim1_y, OFMDim1_x, OFMDim1_y, SIMD1, Stride1_x, Stride1_y,
-                Dilation1_x, Dilation1_y> (in0, out, numReps, {});""".format(
-                    hls_call, hls_ram_style
-                )
-            ]
-        elif self.get_nodeattr("depthwise") == 1:
-            hls_call += "_dws"
+        # check which ConvolutionInputGenerator is needed
+        if self.use_parallel_window_output():
+            hls_call = "ConvolutionInputGenerator_1D_parallel"
             self.code_gen_dict["$DOCOMPUTE$"] = [
-                """{}<ConvKernelDim1_x, ConvKernelDim1_y, IFMChannels1, Input_precision1,
-                IFMDim1_x, IFMDim1_y, OFMDim1_x, OFMDim1_y, SIMD1, Stride1_x, Stride1_y>
+                """{}<ConvKernelDim1_x, IFMChannels1, Input_precision1,
+                IFMDim1_x, OFMDim1_x, SIMD1, Stride1_x>
                 (in0, out, numReps, {});""".format(
                     hls_call, hls_ram_style
                 )
             ]
         else:
-            self.code_gen_dict["$DOCOMPUTE$"] = [
-                """{}<ConvKernelDim1_x, ConvKernelDim1_y, IFMChannels1, Input_precision1,
-                IFMDim1_x, IFMDim1_y, OFMDim1_x, OFMDim1_y, SIMD1, Stride1_x, Stride1_y>
-                (in0, out, numReps, {});""".format(
-                    hls_call, hls_ram_style
-                )
-            ]
+            hls_call = "ConvolutionInputGenerator_NonSquare"
+            dilation_h, dilation_w = self.get_nodeattr("Dilation")
+            if dilation_h > 1 or dilation_w > 1:
+                hls_call += "_Dilated"
+                if self.get_nodeattr("depthwise") == 1:
+                    hls_call += "_dws"
+                self.code_gen_dict["$DOCOMPUTE$"] = [
+                    """{}<ConvKernelDim1_x, ConvKernelDim1_y, IFMChannels1,
+                    Input_precision1, IFMDim1_x, IFMDim1_y, OFMDim1_x, OFMDim1_y,
+                    SIMD1, Stride1_x, Stride1_y, Dilation1_x, Dilation1_y>
+                    (in0, out, numReps, {});""".format(
+                        hls_call, hls_ram_style
+                    )
+                ]
+            elif self.get_nodeattr("depthwise") == 1:
+                hls_call += "_dws"
+                self.code_gen_dict["$DOCOMPUTE$"] = [
+                    """{}<ConvKernelDim1_x, ConvKernelDim1_y, IFMChannels1,
+                    Input_precision1, IFMDim1_x, IFMDim1_y, OFMDim1_x, OFMDim1_y,
+                    SIMD1, Stride1_x, Stride1_y> (in0, out, numReps, {});""".format(
+                        hls_call, hls_ram_style
+                    )
+                ]
+            else:
+                self.code_gen_dict["$DOCOMPUTE$"] = [
+                    """{}<ConvKernelDim1_x, ConvKernelDim1_y, IFMChannels1,
+                    Input_precision1, IFMDim1_x, IFMDim1_y, OFMDim1_x, OFMDim1_y,
+                    SIMD1, Stride1_x, Stride1_y> (in0, out, numReps, {});""".format(
+                        hls_call, hls_ram_style
+                    )
+                ]
 
     def dataoutstrm(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_output_datatype()
-        if dtype == DataType.BIPOLAR:
+        if dtype == DataType["BIPOLAR"]:
             # use binary for bipolar storage
-            dtype = DataType.BINARY
+            dtype = DataType["BINARY"]
         elem_bits = dtype.bitwidth()
         packed_bits = self.get_outstream_width()
         packed_hls_type = "ap_uint<%d>" % packed_bits
@@ -583,9 +604,16 @@ class ConvolutionInputGenerator1D(HLSCustomOp):
         npy_out = "%s/output.npy" % code_gen_dir
         oshape = self.get_folded_output_shape()
         oshape_cpp_str = str(oshape).replace("(", "{").replace(")", "}")
+        if self.use_parallel_window_output():
+            # pass the number of pixels in the folded output to apintstream2npy, needed
+            # to unpack the ouput correctly and reverse only the inner SIMD dimension
+            k_h, k_w = self.get_nodeattr("ConvKernelDim")
+            multi_pixel_out = k_h * k_w
+        else:
+            multi_pixel_out = 1
 
         self.code_gen_dict["$DATAOUTSTREAM$"] = [
-            'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s");'
+            'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s", true, 1, %d);'
             % (
                 packed_hls_type,
                 elem_hls_type,
@@ -593,6 +621,7 @@ class ConvolutionInputGenerator1D(HLSCustomOp):
                 npy_type,
                 oshape_cpp_str,
                 npy_out,
+                multi_pixel_out,
             )
         ]
 
@@ -600,12 +629,21 @@ class ConvolutionInputGenerator1D(HLSCustomOp):
         self.code_gen_dict["$SAVEASCNPY$"] = []
 
     def blackboxfunction(self):
-        self.code_gen_dict["$BLACKBOXFUNCTION$"] = [
-            """void {}(hls::stream<ap_uint<SIMD1*Input_precision1>> &in0,
-                hls::stream<ap_uint<SIMD1*Input_precision1>> &out)""".format(
-                self.onnx_node.name
-            )
-        ]
+        if self.use_parallel_window_output():
+            self.code_gen_dict["$BLACKBOXFUNCTION$"] = [
+                """void {}(hls::stream<ap_uint<SIMD1*Input_precision1>> &in0,
+                    hls::stream<ap_uint<ConvKernelDim1_x*SIMD1*Input_precision1>>
+                    &out)""".format(
+                    self.onnx_node.name
+                )
+            ]
+        else:
+            self.code_gen_dict["$BLACKBOXFUNCTION$"] = [
+                """void {}(hls::stream<ap_uint<SIMD1*Input_precision1>> &in0,
+                    hls::stream<ap_uint<SIMD1*Input_precision1>> &out)""".format(
+                    self.onnx_node.name
+                )
+            ]
 
     def pragmas(self):
         self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"]
diff --git a/src/finn/custom_op/fpgadataflow/downsampler.py b/src/finn/custom_op/fpgadataflow/downsampler.py
index 2313ab28b41668b93a55298aa2b589dac999070e..124b3e4645caa63a2590d91c58f430f8d56bb6a0 100644
--- a/src/finn/custom_op/fpgadataflow/downsampler.py
+++ b/src/finn/custom_op/fpgadataflow/downsampler.py
@@ -1,7 +1,34 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 import numpy as np
 import os
 import warnings
-from onnx import TensorProto, helper
 
 from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
@@ -83,19 +110,7 @@ class DownSampler(HLSCustomOp):
         oshape = self.get_normal_output_shape()
         ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0]))
         assert ishape == exp_ishape, "Unexpect input shape for DownSampler."
-        # implement tensor with correct shape
-        values = np.random.randn(*oshape).astype(np.float32)
-        return helper.make_node(
-            "Constant",
-            inputs=[],
-            outputs=[self.onnx_node.output[0]],
-            value=helper.make_tensor(
-                name="const_tensor",
-                data_type=TensorProto.FLOAT,
-                dims=values.shape,
-                vals=values.flatten().astype(float),
-            ),
-        )
+        return super().make_const_shape_op(oshape)
 
     def infer_node_datatype(self, model):
         node = self.onnx_node
@@ -164,9 +179,9 @@ class DownSampler(HLSCustomOp):
     def read_npy_data(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_input_datatype()
-        if dtype == DataType.BIPOLAR:
+        if dtype == DataType["BIPOLAR"]:
             # use binary for bipolar storage
-            dtype = DataType.BINARY
+            dtype = DataType["BINARY"]
         elem_bits = dtype.bitwidth()
         packed_bits = self.get_instream_width()
         packed_hls_type = "ap_uint<%d>" % packed_bits
@@ -197,9 +212,9 @@ class DownSampler(HLSCustomOp):
     def dataoutstrm(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_output_datatype()
-        if dtype == DataType.BIPOLAR:
+        if dtype == DataType["BIPOLAR"]:
             # use binary for bipolar storage
-            dtype = DataType.BINARY
+            dtype = DataType["BINARY"]
         elem_bits = dtype.bitwidth()
         packed_bits = self.get_outstream_width()
         packed_hls_type = "ap_uint<%d>" % packed_bits
diff --git a/src/finn/custom_op/fpgadataflow/fmpadding_batch.py b/src/finn/custom_op/fpgadataflow/fmpadding_batch.py
index ca0b2f12ab6e84bab0b87e5a34917619c2ba289d..8ac30524ebee6f503e34f6d92408f3f137a59c72 100644
--- a/src/finn/custom_op/fpgadataflow/fmpadding_batch.py
+++ b/src/finn/custom_op/fpgadataflow/fmpadding_batch.py
@@ -1,7 +1,34 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 import numpy as np
 import os
 import warnings
-from onnx import TensorProto, helper
 
 from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
@@ -99,19 +126,7 @@ class FMPadding_Batch(HLSCustomOp):
         oshape = self.get_normal_output_shape()
         ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0]))
         assert ishape == exp_ishape, "Unexpect input shape for SameResize."
-        # implement tensor with correct shape
-        values = np.random.randn(*oshape).astype(np.float32)
-        return helper.make_node(
-            "Constant",
-            inputs=[],
-            outputs=[self.onnx_node.output[0]],
-            value=helper.make_tensor(
-                name="const_tensor",
-                data_type=TensorProto.FLOAT,
-                dims=values.shape,
-                vals=values.flatten().astype(float),
-            ),
-        )
+        return super().make_const_shape_op(oshape)
 
     def infer_node_datatype(self, model):
         node = self.onnx_node
@@ -210,9 +225,9 @@ class FMPadding_Batch(HLSCustomOp):
     def read_npy_data(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_input_datatype()
-        if dtype == DataType.BIPOLAR:
+        if dtype == DataType["BIPOLAR"]:
             # use binary for bipolar storage
-            dtype = DataType.BINARY
+            dtype = DataType["BINARY"]
         elem_bits = dtype.bitwidth()
         packed_bits = self.get_instream_width()
         packed_hls_type = "ap_uint<%d>" % packed_bits
@@ -261,9 +276,9 @@ class FMPadding_Batch(HLSCustomOp):
     def dataoutstrm(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_output_datatype()
-        if dtype == DataType.BIPOLAR:
+        if dtype == DataType["BIPOLAR"]:
             # use binary for bipolar storage
-            dtype = DataType.BINARY
+            dtype = DataType["BINARY"]
         elem_bits = dtype.bitwidth()
         packed_bits = self.get_outstream_width()
         packed_hls_type = "ap_uint<%d>" % packed_bits
diff --git a/src/finn/custom_op/fpgadataflow/globalaccpool_batch.py b/src/finn/custom_op/fpgadataflow/globalaccpool_batch.py
index eabdcf599d23d35ed13069cb81afa3ec4999e8e7..6d4a55ee5c86b68776f4c7c2e58930034bb0be02 100644
--- a/src/finn/custom_op/fpgadataflow/globalaccpool_batch.py
+++ b/src/finn/custom_op/fpgadataflow/globalaccpool_batch.py
@@ -29,7 +29,6 @@
 import numpy as np
 import os
 import warnings
-from onnx import TensorProto, helper
 
 from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
@@ -95,19 +94,7 @@ class GlobalAccPool_Batch(HLSCustomOp):
         oshape = self.get_normal_output_shape()
         ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0]))
         assert ishape == exp_ishape, "Unexpected input shape."
-        # implement tensor with correct shape
-        values = np.random.randn(*oshape).astype(np.float32)
-        return helper.make_node(
-            "Constant",
-            inputs=[],
-            outputs=[self.onnx_node.output[0]],
-            value=helper.make_tensor(
-                name="const_tensor",
-                data_type=TensorProto.FLOAT,
-                dims=values.shape,
-                vals=values.flatten(),
-            ),
-        )
+        return super().make_const_shape_op(oshape)
 
     def infer_node_datatype(self, model):
         node = self.onnx_node
diff --git a/src/finn/custom_op/fpgadataflow/hlscustomop.py b/src/finn/custom_op/fpgadataflow/hlscustomop.py
index 9dbc3dbd0fe8145afbc828ee53b377dfca397428..e2a894dd9d8747cbb065f1a48472258c5b57b583 100644
--- a/src/finn/custom_op/fpgadataflow/hlscustomop.py
+++ b/src/finn/custom_op/fpgadataflow/hlscustomop.py
@@ -301,6 +301,7 @@ class HLSCustomOp(CustomOp):
         self.code_gen_dict["$HWSRCDIR$"] = [code_gen_dir]
         self.code_gen_dict["$FPGAPART$"] = [fpgapart]
         self.code_gen_dict["$FINNHLSLIBDIR$"] = ["/workspace/finn-hlslib"]
+        self.code_gen_dict["$FINNHLSCUSTOMDIR$"] = ["/workspace/finn/custom_hls"]
         self.code_gen_dict["$TOPFXN$"] = [node.name]
         self.code_gen_dict["$CLKPERIOD$"] = [str(clk)]
         self.code_gen_dict["$DEFAULT_DIRECTIVES$"] = self.ipgen_default_directives()
@@ -406,6 +407,7 @@ class HLSCustomOp(CustomOp):
         builder.append_includes("-I/workspace/finn/src/finn/qnn-data/cpp")
         builder.append_includes("-I/workspace/cnpy/")
         builder.append_includes("-I/workspace/finn-hlslib")
+        builder.append_includes("-I/workspace/finn/custom_hls")
         builder.append_includes("-I{}/include".format(os.environ["VIVADO_PATH"]))
         builder.append_includes("--std=c++11")
         builder.append_includes("-O3")
diff --git a/src/finn/custom_op/fpgadataflow/iodma.py b/src/finn/custom_op/fpgadataflow/iodma.py
index cc1f6722ddb717ad518ca16e71a39f0b9747db62..cb28c47be6deaf59833e8fba7f33156a48ccfaae 100644
--- a/src/finn/custom_op/fpgadataflow/iodma.py
+++ b/src/finn/custom_op/fpgadataflow/iodma.py
@@ -29,7 +29,6 @@
 import math
 import numpy as np
 import warnings
-from onnx import TensorProto, helper
 
 from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
@@ -149,19 +148,7 @@ class IODMA(HLSCustomOp):
         oshape = self.get_normal_output_shape()
         ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0]))
         assert ishape == exp_ishape, "Unexpected input shape."
-        # implement tensor with correct shape
-        values = np.random.randn(*oshape).astype(np.float32)
-        return helper.make_node(
-            "Constant",
-            inputs=[],
-            outputs=[self.onnx_node.output[0]],
-            value=helper.make_tensor(
-                name="const_tensor",
-                data_type=TensorProto.FLOAT,
-                dims=values.shape,
-                vals=values.flatten().astype(float),
-            ),
-        )
+        return super().make_const_shape_op(oshape)
 
     def infer_node_datatype(self, model):
         node = self.onnx_node
diff --git a/src/finn/custom_op/fpgadataflow/labelselect_batch.py b/src/finn/custom_op/fpgadataflow/labelselect_batch.py
index d70d0f6a9b0cacb491ce748b84c8c7c474605170..1eb5962fdbc54092eaeb4796806b3a623c65aea8 100644
--- a/src/finn/custom_op/fpgadataflow/labelselect_batch.py
+++ b/src/finn/custom_op/fpgadataflow/labelselect_batch.py
@@ -102,18 +102,14 @@ class LabelSelect_Batch(HLSCustomOp):
         oshape = self.get_normal_output_shape()
         ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0]))
         assert ishape == exp_ishape, "Unexpected input shape."
-        # implement tensor with correct shape
-        values = np.random.randn(*oshape).astype(np.int64)
         return helper.make_node(
-            "Constant",
+            "RandomNormal",
             inputs=[],
             outputs=[self.onnx_node.output[0]],
-            value=helper.make_tensor(
-                name="const_tensor",
-                data_type=TensorProto.INT64,
-                dims=values.shape,
-                vals=values.flatten(),
-            ),
+            mean=0.0,
+            scale=1.0,
+            dtype=TensorProto.INT64,
+            shape=list(oshape),
         )
 
     def infer_node_datatype(self, model):
diff --git a/src/finn/custom_op/fpgadataflow/lookup.py b/src/finn/custom_op/fpgadataflow/lookup.py
new file mode 100644
index 0000000000000000000000000000000000000000..27be06bdfa3ce3d980a139ec91385c7fe85afab3
--- /dev/null
+++ b/src/finn/custom_op/fpgadataflow/lookup.py
@@ -0,0 +1,338 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import numpy as np
+import os
+import warnings
+from math import ceil
+
+from finn.core.datatype import DataType
+from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
+from finn.util.data_packing import (
+    npy_to_rtlsim_input,
+    numpy_to_hls_code,
+    rtlsim_output_to_npy,
+)
+
+
+class Lookup(HLSCustomOp):
+    "Streaming elementwise HLS lookup, mapping indices to values."
+
+    def __init__(self, onnx_node):
+        super().__init__(onnx_node)
+
+    def get_nodeattr_types(self):
+        my_attrs = {
+            # Number of embeddings ("memory depth")
+            "NumEmbeddings": ("i", True, 0),
+            # Dimensionality of each embedding (part of "memory width")
+            "EmbeddingDim": ("i", True, 0),
+            # Datatype for embeddings (part of "memory width")
+            "EmbeddingType": ("s", True, ""),
+            # Datatype for inputs
+            "InputType": ("s", True, ""),
+            # Input shape
+            "InputShape": ("ints", False, [1]),
+        }
+        my_attrs.update(super().get_nodeattr_types())
+        return my_attrs
+
+    def get_exp_cycles(self):
+        n_inputs = np.prod(self.get_nodeattr("InputShape"))
+        exp_cycles = int(n_inputs)
+        return exp_cycles
+
+    def get_normal_input_shape(self):
+        return self.get_nodeattr("InputShape")
+
+    def get_normal_output_shape(self):
+        ishape = self.get_normal_input_shape()
+        oshape = list(ishape) + [self.get_nodeattr("EmbeddingDim")]
+        return tuple(oshape)
+
+    def get_folded_input_shape(self):
+        ishape = self.get_normal_input_shape()
+        folded_ishape = list(ishape) + [1]
+        return tuple(folded_ishape)
+
+    def get_folded_output_shape(self):
+        return self.get_normal_output_shape()
+
+    def make_shape_compatible_op(self, model):
+        exp_ishape = tuple(self.get_normal_input_shape())
+        oshape = tuple(self.get_normal_output_shape())
+        ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0]))
+        assert ishape == exp_ishape, "Unexpected input shape for Lookup: %s vs %s" % (
+            str(exp_ishape),
+            str(ishape),
+        )
+        return super().make_const_shape_op(oshape)
+
+    def infer_node_datatype(self, model):
+        node = self.onnx_node
+        idt = model.get_tensor_datatype(node.input[0])
+        if idt != self.get_input_datatype():
+            warn_str = "InputType changing for %s: %s -> %s " % (
+                node.name,
+                str(self.get_input_datatype()),
+                str(idt),
+            )
+            warnings.warn(warn_str)
+        self.set_nodeattr("InputType", idt.name)
+        odt = DataType[self.get_nodeattr("EmbeddingType")]
+        model.set_tensor_datatype(node.output[0], odt)
+
+    def verify_node(self):
+        pass
+
+    def get_input_datatype(self):
+        ret = DataType[self.get_nodeattr("InputType")]
+        return ret
+
+    def get_output_datatype(self):
+        ret = DataType[self.get_nodeattr("EmbeddingType")]
+        return ret
+
+    def get_instream_width(self):
+        ibits = self.get_input_datatype().bitwidth()
+        return ibits
+
+    def get_outstream_width(self):
+        obits = self.get_output_datatype().bitwidth()
+        ofm_ch = self.get_nodeattr("EmbeddingDim")
+        return obits * ofm_ch
+
+    def get_number_output_values(self):
+        folded_oshape = self.get_folded_output_shape()
+        return np.prod(folded_oshape[:-1])
+
+    def global_includes(self):
+        global_incls = ['#include "lookup.hpp"']
+        global_incls.append('#include "embeddings.hpp"')
+        self.code_gen_dict["$GLOBALS$"] = global_incls
+
+    def defines(self, var):
+        n_inputs = np.prod(self.get_folded_input_shape()[:-1])
+        dtype = self.get_input_datatype()
+        elem_hls_type = dtype.get_hls_datatype_str()
+        emb_type = DataType[self.get_nodeattr("EmbeddingType")]
+        emb_hls_type = emb_type.get_hls_datatype_str()
+        my_defines = []
+        my_defines.append(
+            "#define NumEmbeddings %d" % self.get_nodeattr("NumEmbeddings")
+        )
+        my_defines.append("#define EmbeddingDim %d" % self.get_nodeattr("EmbeddingDim"))
+        my_defines.append("#define NumInputs %d" % n_inputs)
+        my_defines.append("#define InputType %s" % elem_hls_type)
+        my_defines.append("#define EmbeddingType %s" % emb_hls_type)
+        self.code_gen_dict["$DEFINES$"] = my_defines
+
+    def read_npy_data(self):
+        code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
+        dtype = self.get_input_datatype()
+        if dtype == DataType["BIPOLAR"]:
+            # use binary for bipolar storage
+            dtype = DataType["BINARY"]
+        elem_bits = dtype.bitwidth()
+        packed_bits = self.get_instream_width()
+        packed_hls_type = "ap_uint<%d>" % packed_bits
+        elem_hls_type = dtype.get_hls_datatype_str()
+        npy_type = "int64_t"
+        npy_in = "%s/input_0.npy" % code_gen_dir
+        self.code_gen_dict["$READNPYDATA$"] = []
+        self.code_gen_dict["$READNPYDATA$"].append(
+            'npy2apintstream<%s, %s, %d, %s>("%s", in0);'
+            % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in)
+        )
+
+    def dataoutstrm(self):
+        code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
+        dtype = self.get_output_datatype()
+        if dtype == DataType["BIPOLAR"]:
+            # use binary for bipolar storage
+            dtype = DataType["BINARY"]
+        elem_bits = dtype.bitwidth()
+        packed_bits = self.get_outstream_width()
+        packed_hls_type = "ap_uint<%d>" % packed_bits
+        elem_hls_type = dtype.get_hls_datatype_str()
+        npy_type = "float"
+        npy_out = "%s/output.npy" % code_gen_dir
+        oshape = self.get_folded_output_shape()
+        oshape_cpp_str = str(oshape).replace("(", "{").replace(")", "}")
+
+        self.code_gen_dict["$DATAOUTSTREAM$"] = [
+            'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s");'
+            % (
+                packed_hls_type,
+                elem_hls_type,
+                elem_bits,
+                npy_type,
+                oshape_cpp_str,
+                npy_out,
+            )
+        ]
+
+    def save_as_npy(self):
+        self.code_gen_dict["$SAVEASCNPY$"] = []
+
+    def strm_decl(self):
+        self.code_gen_dict["$STREAMDECLARATIONS$"] = []
+        self.code_gen_dict["$STREAMDECLARATIONS$"].append(
+            'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width())
+        )
+        self.code_gen_dict["$STREAMDECLARATIONS$"].append(
+            'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width())
+        )
+
+    def docompute(self):
+        self.code_gen_dict["$DOCOMPUTE$"] = [
+            """StreamingLookup<NumEmbeddings,  EmbeddingDim, NumInputs,
+            InputType, EmbeddingType >(in0, out, embeddings);"""
+        ]
+
+    def blackboxfunction(self):
+        ibits = self.get_instream_width()
+        packed_input_hls_type = "ap_uint<%d>" % ibits
+        obits = self.get_outstream_width()
+        packed_output_hls_type = "ap_uint<%d>" % obits
+        self.code_gen_dict["$BLACKBOXFUNCTION$"] = [
+            "void %s(hls::stream<%s > &in0, hls::stream<%s > &out)"
+            % (self.onnx_node.name, packed_input_hls_type, packed_output_hls_type)
+        ]
+
+    def pragmas(self):
+        my_pragmas = ["#pragma HLS INTERFACE axis port=in0"]
+        my_pragmas.append("#pragma HLS INTERFACE axis port=out")
+        my_pragmas.append("#pragma HLS INTERFACE ap_ctrl_none port=return")
+        self.code_gen_dict["$PRAGMAS$"] = my_pragmas
+
+    def generate_params(self, model, path):
+        code_gen_dir = path
+        embeddings = model.get_initializer(self.onnx_node.input[1])
+        weight_filename = "{}/embeddings.hpp".format(code_gen_dir)
+        edt = DataType[self.get_nodeattr("EmbeddingType")]
+        # obits = self.get_outstream_width()
+        # packed_output_hls_type = "ap_uint<%d>" % obits
+        assert np.vectorize(edt.allowed)(
+            embeddings
+        ).all(), "Embeddings can't be expressed with type %s" % str(edt)
+        embeddings_hls_code = numpy_to_hls_code(
+            embeddings, edt, "embeddings", True, False
+        )
+        f_thresh = open(weight_filename, "w")
+        f_thresh.write(embeddings_hls_code)
+        f_thresh.close()
+
+    def execute_node(self, context, graph):
+        mode = self.get_nodeattr("exec_mode")
+        node = self.onnx_node
+        exp_ishape = tuple(self.get_normal_input_shape())
+        exp_oshape = tuple(self.get_normal_output_shape())
+        folded_ishape = tuple(self.get_folded_input_shape())
+        folded_oshape = tuple(self.get_folded_output_shape())
+
+        if mode == "cppsim":
+            code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
+        elif mode == "rtlsim":
+            code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen")
+        else:
+            raise Exception(
+                """Invalid value for attribute exec_mode! Is currently set to: {}
+            has to be set to one of the following value ("cppsim", "rtlsim")""".format(
+                    mode
+                )
+            )
+
+        inp = context[node.input[0]]
+        assert inp.dtype == np.int64, "Inputs must be contained in int64 ndarray"
+        assert inp.shape == exp_ishape, """Input shape doesn't match expected shape."""
+        export_idt = self.get_input_datatype()
+        odt = self.get_output_datatype()
+
+        reshaped_input = inp.reshape(folded_ishape)
+        np.save(os.path.join(code_gen_dir, "input_0.npy"), reshaped_input)
+
+        if mode == "cppsim":
+            # execute the precompiled model
+            super().exec_precompiled_singlenode_model()
+            # load output npy file
+            super().npy_to_dynamic_output(context)
+            assert (
+                context[node.output[0]].shape == folded_oshape
+            ), "cppsim did not produce expected folded output shape"
+            context[node.output[0]] = context[node.output[0]].reshape(*exp_oshape)
+        elif mode == "rtlsim":
+            sim = self.get_rtlsim()
+            nbits = self.get_instream_width()
+            rtlsim_inp = npy_to_rtlsim_input(
+                "{}/input_0.npy".format(code_gen_dir), export_idt, nbits
+            )
+            super().reset_rtlsim(sim)
+            super().toggle_clk(sim)
+            rtlsim_output = self.rtlsim(sim, rtlsim_inp)
+            target_bits = odt.bitwidth()
+            packed_bits = self.get_outstream_width()
+            out_npy_path = "{}/output.npy".format(code_gen_dir)
+            out_shape = self.get_folded_output_shape()
+            rtlsim_output_to_npy(
+                rtlsim_output,
+                out_npy_path,
+                odt,
+                out_shape,
+                packed_bits,
+                target_bits,
+                reverse_inner=False,
+            )
+            # load and reshape output
+            output = np.load(out_npy_path)
+            output = np.asarray([output], dtype=np.float32).reshape(*exp_oshape)
+            context[node.output[0]] = output
+        else:
+            raise Exception(
+                """Invalid value for attribute exec_mode! Is currently set to: {}
+            has to be set to one of the following value ("cppsim", "rtlsim")""".format(
+                    mode
+                )
+            )
+        assert (
+            context[node.output[0]].shape == exp_oshape
+        ), """Output shape doesn't match expected shape."""
+
+    def bram_estimation(self):
+        # current calculation assumes embeddings always stored in BRAM_18Ks
+        width_factor = ceil(self.get_outstream_width() / 16)
+        depth_factor = ceil(self.get_nodeattr("NumEmbeddings") / 1024)
+        return width_factor * depth_factor
+
+    def bram_efficiency_estimation(self):
+        bram16_est = self.bram_estimation()
+        if bram16_est == 0:
+            return 1
+        ebits = self.get_outstream_width() * self.get_nodeattr("NumEmbeddings")
+        bram16_est_capacity = bram16_est * 18 * 1024
+        return ebits / bram16_est_capacity
diff --git a/src/finn/custom_op/fpgadataflow/pool_batch.py b/src/finn/custom_op/fpgadataflow/pool_batch.py
index cef964acd5192ad254e1086dacead590b51e7ec1..ba8a446f2cf7541c0bd2e1dff731afe2397942ef 100644
--- a/src/finn/custom_op/fpgadataflow/pool_batch.py
+++ b/src/finn/custom_op/fpgadataflow/pool_batch.py
@@ -28,7 +28,6 @@
 
 import numpy as np
 import os
-from onnx import TensorProto, helper
 
 from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
@@ -163,19 +162,7 @@ class Pool_Batch(HLSCustomOp):
         oshape = self.get_normal_output_shape()
         ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0]))
         assert ishape == exp_ishape, "Unexpected input shape for Pool_Batch."
-        # implement tensor with correct shape
-        values = np.random.randn(*oshape).astype(np.float32)
-        return helper.make_node(
-            "Constant",
-            inputs=[],
-            outputs=[self.onnx_node.output[0]],
-            value=helper.make_tensor(
-                name="const_tensor",
-                data_type=TensorProto.FLOAT,
-                dims=values.shape,
-                vals=values.flatten().astype(float),
-            ),
-        )
+        return super().make_const_shape_op(oshape)
 
     def infer_node_datatype(self, model):
         node = self.onnx_node
@@ -235,9 +222,9 @@ class Pool_Batch(HLSCustomOp):
     def read_npy_data(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_input_datatype()
-        if dtype == DataType.BIPOLAR:
+        if dtype == DataType["BIPOLAR"]:
             # use binary for bipolar storage
-            dtype = DataType.BINARY
+            dtype = DataType["BINARY"]
         elem_bits = dtype.bitwidth()
         packed_bits = self.get_instream_width()
         packed_hls_type = "ap_uint<%d>" % packed_bits
@@ -296,9 +283,9 @@ class Pool_Batch(HLSCustomOp):
     def dataoutstrm(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_output_datatype()
-        if dtype == DataType.BIPOLAR:
+        if dtype == DataType["BIPOLAR"]:
             # use binary for bipolar storage
-            dtype = DataType.BINARY
+            dtype = DataType["BINARY"]
         elem_bits = dtype.bitwidth()
         packed_bits = self.get_outstream_width()
         packed_hls_type = "ap_uint<%d>" % packed_bits
diff --git a/src/finn/custom_op/fpgadataflow/streamingdataflowpartition.py b/src/finn/custom_op/fpgadataflow/streamingdataflowpartition.py
index 53446ff1f2aba30e69bf188c1673c738440567fb..cf065cf156abed591e579b3f257e8f442eb3a976 100644
--- a/src/finn/custom_op/fpgadataflow/streamingdataflowpartition.py
+++ b/src/finn/custom_op/fpgadataflow/streamingdataflowpartition.py
@@ -47,6 +47,7 @@ class StreamingDataflowPartition(CustomOp):
             "partition_id": ("i", False, 0),
             "device_id": ("i", False, 0),
             "mem_port": ("s", False, ""),
+            "instance_name": ("s", False, ""),
         }
 
     def make_shape_compatible_op(self, model):
diff --git a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py
index 3caececce8ad3452a959a8516d1d9704fc0241fa..c890bc4c1840176c087ebdb3595e43be0a591540 100644
--- a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py
+++ b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py
@@ -30,7 +30,6 @@ import math
 import numpy as np
 import os
 import warnings
-from onnx import TensorProto, helper
 
 from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
@@ -156,19 +155,7 @@ class StreamingDataWidthConverter_Batch(HLSCustomOp):
         oshape = self.get_normal_output_shape()
         ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0]))
         assert ishape == tuple(exp_ishape), "Unexpect input shape for StreamingDWC."
-        # implement tensor with correct shape
-        values = np.random.randn(*oshape).astype(np.float32)
-        return helper.make_node(
-            "Constant",
-            inputs=[],
-            outputs=[self.onnx_node.output[0]],
-            value=helper.make_tensor(
-                name="const_tensor",
-                data_type=TensorProto.FLOAT,
-                dims=values.shape,
-                vals=values.flatten().astype(float),
-            ),
-        )
+        return super().make_const_shape_op(oshape)
 
     def infer_node_datatype(self, model):
         node = self.onnx_node
@@ -219,9 +206,9 @@ class StreamingDataWidthConverter_Batch(HLSCustomOp):
     def read_npy_data(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_input_datatype()
-        if dtype == DataType.BIPOLAR:
+        if dtype == DataType["BIPOLAR"]:
             # use binary for bipolar storage
-            dtype = DataType.BINARY
+            dtype = DataType["BINARY"]
         elem_bits = dtype.bitwidth()
         packed_bits = self.get_instream_width()
         packed_hls_type = "ap_uint<%d>" % packed_bits
@@ -253,9 +240,9 @@ class StreamingDataWidthConverter_Batch(HLSCustomOp):
     def dataoutstrm(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_output_datatype()
-        if dtype == DataType.BIPOLAR:
+        if dtype == DataType["BIPOLAR"]:
             # use binary for bipolar storage
-            dtype = DataType.BINARY
+            dtype = DataType["BINARY"]
         elem_bits = dtype.bitwidth()
         packed_bits = self.get_outstream_width()
         packed_hls_type = "ap_uint<%d>" % packed_bits
@@ -325,10 +312,10 @@ class StreamingDataWidthConverter_Batch(HLSCustomOp):
             exp_shape
         ), "Input shape does not match expected shape."
 
-        if self.get_input_datatype() == DataType.BIPOLAR:
+        if self.get_input_datatype() == DataType["BIPOLAR"]:
             # store bipolar activations as binary
             inp = (inp + 1) / 2
-            export_idt = DataType.BINARY
+            export_idt = DataType["BINARY"]
         else:
             export_idt = self.get_input_datatype()
         # reshape input into folded shape
@@ -371,7 +358,7 @@ class StreamingDataWidthConverter_Batch(HLSCustomOp):
                 )
             )
         # binary -> bipolar if needed
-        if self.get_output_datatype() == DataType.BIPOLAR:
+        if self.get_output_datatype() == DataType["BIPOLAR"]:
             out = context[node.output[0]]
             out = 2 * out - 1
             context[node.output[0]] = out
diff --git a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
index cf72585392e0a73a5460fb146fee514d37e087e0..6b31aac0eb83de392707a9f020a8e1f949ac582c 100644
--- a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
+++ b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
@@ -31,7 +31,6 @@ import numpy as np
 import os
 import textwrap
 import warnings
-from onnx import TensorProto, helper
 
 from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
@@ -105,6 +104,16 @@ class StreamingFCLayer_Batch(HLSCustomOp):
                 "auto",
                 {"auto", "block", "distributed", "ultra"},
             ),
+            # FPGA resource type for threshold memories (if noActivation is False)
+            # auto -- let Vivado decide
+            # block -- use BRAM
+            # distributed -- use LUTRAM
+            "ram_style_thresholds": (
+                "s",
+                False,
+                "auto",
+                {"auto", "block", "distributed"},
+            ),
             # (mem_mode = decoupled only) whether weights will be writable through
             # an AXI-lite interface during runtime
             # 1 for enabled, 0 for disabled.
@@ -141,19 +150,7 @@ class StreamingFCLayer_Batch(HLSCustomOp):
 
     def make_shape_compatible_op(self, model):
         oshape = self.get_normal_output_shape()
-        # implement tensor with correct shape
-        values = np.random.randn(*oshape).astype(np.float32)
-        return helper.make_node(
-            "Constant",
-            inputs=[],
-            outputs=[self.onnx_node.output[0]],
-            value=helper.make_tensor(
-                name="const_tensor",
-                data_type=TensorProto.FLOAT,
-                dims=values.shape,
-                vals=values.flatten().astype(float),
-            ),
-        )
+        return super().make_const_shape_op(oshape)
 
     def infer_node_datatype(self, model):
         node = self.onnx_node
@@ -499,15 +496,15 @@ class StreamingFCLayer_Batch(HLSCustomOp):
         ret = dict()
         inp_hls_str = self.get_input_datatype().get_hls_datatype_str()
         out_hls_str = self.get_output_datatype().get_hls_datatype_str()
-        inp_is_binary = self.get_input_datatype() == DataType.BINARY
-        # out_is_binary = self.get_output_datatype() == DataType.BINARY
-        wt_is_binary = self.get_weight_datatype() == DataType.BINARY
+        inp_is_binary = self.get_input_datatype() == DataType["BINARY"]
+        # out_is_binary = self.get_output_datatype() == DataType["BINARY"]
+        wt_is_binary = self.get_weight_datatype() == DataType["BINARY"]
         bin_xnor_mode = self.get_nodeattr("binaryXnorMode") == 1
         if (inp_is_binary or wt_is_binary) and (not bin_xnor_mode):
             raise Exception("True binary (non-bipolar) inputs not yet supported")
-        inp_is_bipolar = self.get_input_datatype() == DataType.BIPOLAR
-        # out_is_bipolar = self.get_output_datatype() == DataType.BIPOLAR
-        wt_is_bipolar = self.get_weight_datatype() == DataType.BIPOLAR
+        inp_is_bipolar = self.get_input_datatype() == DataType["BIPOLAR"]
+        # out_is_bipolar = self.get_output_datatype() == DataType["BIPOLAR"]
+        wt_is_bipolar = self.get_weight_datatype() == DataType["BIPOLAR"]
         # reinterpret inp/wt as bipolar if bin_xnor_mode is iset
         inp_is_bipolar = inp_is_bipolar or (inp_is_binary and bin_xnor_mode)
         wt_is_bipolar = wt_is_bipolar or (wt_is_binary and bin_xnor_mode)
@@ -557,7 +554,7 @@ class StreamingFCLayer_Batch(HLSCustomOp):
         # ONNX uses (in_features, out_features) and matmul(x, W)
         # finn-hlslib uses (out_features, in_features) and matmul(W, x)
         ret = orig_weight_matrix.T
-        if self.get_weight_datatype() == DataType.BIPOLAR:
+        if self.get_weight_datatype() == DataType["BIPOLAR"]:
             # convert bipolar to binary
             ret = (ret + 1) / 2
         # interleave rows between PEs and reshape
@@ -604,7 +601,7 @@ class StreamingFCLayer_Batch(HLSCustomOp):
                 if abs(tdt_min) > tdt_max:
                     tdt = DataType.get_smallest_possible(tdt_min)
                 else:
-                    tdt = DataType.get_smallest_possible(0 - tdt_max)
+                    tdt = DataType.get_smallest_possible(-tdt_max - 1)
             else:
                 tdt = DataType.get_smallest_possible(tdt_max)
             assert np.vectorize(tdt.allowed)(
@@ -619,7 +616,7 @@ class StreamingFCLayer_Batch(HLSCustomOp):
                 if abs(acc_min) > acc_max:
                     adt = DataType.get_smallest_possible(acc_min)
                 else:
-                    adt = DataType.get_smallest_possible(0 - acc_max)
+                    adt = DataType.get_smallest_possible(-acc_max - 1)
             else:
                 adt = DataType.get_smallest_possible(acc_max)
             # ensure a datatype divisible by 8-bits in case this is the last node
@@ -648,11 +645,11 @@ class StreamingFCLayer_Batch(HLSCustomOp):
         ), """Threshold matrix dimension is
         not as expected (2)."""
         n_thres_steps = orig_thres_matrix.shape[1]
-        inp_is_bipolar = self.get_input_datatype() == DataType.BIPOLAR
-        wt_is_bipolar = self.get_weight_datatype() == DataType.BIPOLAR
+        inp_is_bipolar = self.get_input_datatype() == DataType["BIPOLAR"]
+        wt_is_bipolar = self.get_weight_datatype() == DataType["BIPOLAR"]
         # reinterpret inp/wt as bipolar if bin_xnor_mode is iset
-        inp_is_binary = self.get_input_datatype() == DataType.BINARY
-        wt_is_binary = self.get_weight_datatype() == DataType.BINARY
+        inp_is_binary = self.get_input_datatype() == DataType["BINARY"]
+        wt_is_binary = self.get_weight_datatype() == DataType["BINARY"]
         bin_xnor_mode = self.get_nodeattr("binaryXnorMode") == 1
         inp_is_bipolar = inp_is_bipolar or (inp_is_binary and bin_xnor_mode)
         wt_is_bipolar = wt_is_bipolar or (wt_is_binary and bin_xnor_mode)
@@ -663,7 +660,7 @@ class StreamingFCLayer_Batch(HLSCustomOp):
             assert (orig_thres_matrix.astype(np.int32) == orig_thres_matrix).all()
         ret = orig_thres_matrix
         # workaround for vivado_hls threshold bug
-        if ret[0][0] == 0:
+        if ret[0][0] == 0 and n_thres_steps == 1:
             ret = np.copy(ret)
             ret[0][0] = 1
             warnings.warn(
@@ -707,8 +704,8 @@ class StreamingFCLayer_Batch(HLSCustomOp):
         export_wdt = self.get_weight_datatype()
         # we have converted bipolar weights to binary for export,
         # so use it as such for weight generation
-        if self.get_weight_datatype() == DataType.BIPOLAR:
-            export_wdt = DataType.BINARY
+        if self.get_weight_datatype() == DataType["BIPOLAR"]:
+            export_wdt = DataType["BINARY"]
         if weight_file_mode == "hls_header":
             weight_hls_code = numpy_to_hls_code(
                 weight_tensor, export_wdt, "weights", True, True
@@ -837,11 +834,11 @@ class StreamingFCLayer_Batch(HLSCustomOp):
             if thresholds is not None:
                 threshold_tensor = self.get_hls_compatible_threshold_tensor(thresholds)
                 # use UINT32 threshold export for bipolar times bipolar
-                inp_is_bipolar = self.get_input_datatype() == DataType.BIPOLAR
-                wt_is_bipolar = self.get_weight_datatype() == DataType.BIPOLAR
+                inp_is_bipolar = self.get_input_datatype() == DataType["BIPOLAR"]
+                wt_is_bipolar = self.get_weight_datatype() == DataType["BIPOLAR"]
                 # reinterpret inp/wt as bipolar if bin_xnor_mode is iset
-                inp_is_binary = self.get_input_datatype() == DataType.BINARY
-                wt_is_binary = self.get_weight_datatype() == DataType.BINARY
+                inp_is_binary = self.get_input_datatype() == DataType["BINARY"]
+                wt_is_binary = self.get_weight_datatype() == DataType["BINARY"]
                 bin_xnor_mode = self.get_nodeattr("binaryXnorMode") == 1
                 inp_is_bipolar = inp_is_bipolar or (inp_is_binary and bin_xnor_mode)
                 wt_is_bipolar = wt_is_bipolar or (wt_is_binary and bin_xnor_mode)
@@ -862,8 +859,8 @@ class StreamingFCLayer_Batch(HLSCustomOp):
                 tdt_hls = tdt.get_hls_datatype_str()
                 # use binary to export bipolar activations
                 export_odt = self.get_output_datatype()
-                if self.get_output_datatype() == DataType.BIPOLAR:
-                    export_odt = DataType.BINARY
+                if self.get_output_datatype() == DataType["BIPOLAR"]:
+                    export_odt = DataType["BINARY"]
                 odt_hls = export_odt.get_hls_datatype_str()
                 f_thresh.write(
                     "static ThresholdsActivation<{},{},{},{},{},{},{}> threshs \
@@ -911,10 +908,10 @@ class StreamingFCLayer_Batch(HLSCustomOp):
                 not float32 as expected."""
                 expected_inp_shape = self.get_folded_input_shape()
                 reshaped_input = context[inputs].reshape(expected_inp_shape)
-                if self.get_input_datatype() == DataType.BIPOLAR:
+                if self.get_input_datatype() == DataType["BIPOLAR"]:
                     # store bipolar activations as binary
                     reshaped_input = (reshaped_input + 1) / 2
-                    export_idt = DataType.BINARY
+                    export_idt = DataType["BINARY"]
                 else:
                     export_idt = self.get_input_datatype()
                 # make copy before saving the array
@@ -933,7 +930,7 @@ class StreamingFCLayer_Batch(HLSCustomOp):
             # load output npy file
             super().npy_to_dynamic_output(context)
             # reinterpret binary output as bipolar where needed
-            if self.get_output_datatype() == DataType.BIPOLAR:
+            if self.get_output_datatype() == DataType["BIPOLAR"]:
                 out = context[node.output[0]]
                 out = 2 * out - 1
                 context[node.output[0]] = out
@@ -956,8 +953,8 @@ class StreamingFCLayer_Batch(HLSCustomOp):
                 export_wdt = self.get_weight_datatype()
                 # we have converted bipolar weights to binary for export,
                 # so use it as such for weight generation
-                if self.get_weight_datatype() == DataType.BIPOLAR:
-                    export_wdt = DataType.BINARY
+                if self.get_weight_datatype() == DataType["BIPOLAR"]:
+                    export_wdt = DataType["BINARY"]
                 wei = npy_to_rtlsim_input(
                     "{}/weights.npy".format(code_gen_dir), export_wdt, wnbits
                 )
@@ -1016,10 +1013,10 @@ class StreamingFCLayer_Batch(HLSCustomOp):
         if var == "ipgen":
             SIMD = self.get_nodeattr("SIMD")
             MW = self.get_nodeattr("MW")
-            condition = SIMD > (MW / 1024)
+            condition = SIMD >= (MW / 1024)
             msg = (
                 f"HLS synthesis of StreamingFCLayer_Batch requires: "
-                f"SIMD > MW / 1024. This is not fulfilled with: SIMD={SIMD} "
+                f"SIMD >= MW / 1024. This is not fulfilled with: SIMD={SIMD} "
                 f"and MW={MW} for node: {self.onnx_node.name}."
             )
             assert condition, msg
@@ -1048,9 +1045,9 @@ class StreamingFCLayer_Batch(HLSCustomOp):
     def read_npy_data(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_input_datatype()
-        if dtype == DataType.BIPOLAR:
+        if dtype == DataType["BIPOLAR"]:
             # use binary for bipolar storage
-            dtype = DataType.BINARY
+            dtype = DataType["BINARY"]
         elem_bits = dtype.bitwidth()
         packed_bits = self.get_instream_width()
         packed_hls_type = "ap_uint<%d>" % packed_bits
@@ -1124,8 +1121,8 @@ class StreamingFCLayer_Batch(HLSCustomOp):
             ]
         elif mem_mode == "decoupled" or mem_mode == "external":
             wdt = self.get_weight_datatype()
-            if wdt == DataType.BIPOLAR:
-                export_wdt = DataType.BINARY
+            if wdt == DataType["BIPOLAR"]:
+                export_wdt = DataType["BINARY"]
             else:
                 export_wdt = wdt
             wdtype_hls_str = export_wdt.get_hls_datatype_str()
@@ -1150,9 +1147,9 @@ class StreamingFCLayer_Batch(HLSCustomOp):
     def dataoutstrm(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_output_datatype()
-        if dtype == DataType.BIPOLAR:
+        if dtype == DataType["BIPOLAR"]:
             # use binary for bipolar storage
-            dtype = DataType.BINARY
+            dtype = DataType["BINARY"]
         elem_bits = dtype.bitwidth()
         packed_bits = self.get_outstream_width()
         packed_hls_type = "ap_uint<%d>" % packed_bits
@@ -1212,6 +1209,7 @@ class StreamingFCLayer_Batch(HLSCustomOp):
 
     def pragmas(self):
         mem_mode = self.get_nodeattr("mem_mode")
+        ram_style_thresholds = self.get_nodeattr("ram_style_thresholds")
         self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"]
         self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out")
         in_fifo_depth = self.get_nodeattr("inFIFODepth")
@@ -1270,6 +1268,28 @@ class StreamingFCLayer_Batch(HLSCustomOp):
                     "complete dim=3"
                 )
             )
+            # add resource pragma for thresholds if set
+            if ram_style_thresholds == "distributed":
+                self.code_gen_dict["$PRAGMAS$"].append(
+                    (
+                        "#pragma HLS RESOURCE variable=threshs.m_thresholds "
+                        "core=ROM_2P_LUTRAM"
+                    )
+                )
+            elif ram_style_thresholds == "block":
+                self.code_gen_dict["$PRAGMAS$"].append(
+                    (
+                        "#pragma HLS RESOURCE variable=threshs.m_thresholds "
+                        "core=ROM_2P_BRAM"
+                    )
+                )
+            elif ram_style_thresholds == "auto":
+                # no pragma needed
+                pass
+            else:
+                raise Exception(
+                    "Unrecognized ram_style_thresholds value:" + ram_style_thresholds
+                )
 
     def code_generation_ipi(self):
         cmd = []
diff --git a/src/finn/custom_op/fpgadataflow/streamingfifo.py b/src/finn/custom_op/fpgadataflow/streamingfifo.py
index 71be9e7b8dda0793f967736b5f4df3bd678b50cf..923081ecdeb65e829a59c4c9bfdc67fc03a82ccc 100644
--- a/src/finn/custom_op/fpgadataflow/streamingfifo.py
+++ b/src/finn/custom_op/fpgadataflow/streamingfifo.py
@@ -30,7 +30,6 @@ import numpy as np
 import os
 import subprocess
 import warnings
-from onnx import TensorProto, helper
 from shutil import copy
 
 from finn.core.datatype import DataType
@@ -78,19 +77,7 @@ class StreamingFIFO(HLSCustomOp):
         oshape = self.get_normal_output_shape()
         ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0]))
         assert ishape == tuple(exp_ishape), "Unexpect input shape for StreamingFIFO."
-        # implement tensor with correct shape
-        values = np.random.randn(*oshape).astype(np.float32)
-        return helper.make_node(
-            "Constant",
-            inputs=[],
-            outputs=[self.onnx_node.output[0]],
-            value=helper.make_tensor(
-                name="const_tensor",
-                data_type=TensorProto.FLOAT,
-                dims=values.shape,
-                vals=values.flatten().astype(float),
-            ),
-        )
+        return super().make_const_shape_op(oshape)
 
     def infer_node_datatype(self, model):
         node = self.onnx_node
@@ -263,10 +250,10 @@ class StreamingFIFO(HLSCustomOp):
                 not float32 as expected."""
             expected_inp_shape = self.get_folded_input_shape()
             reshaped_input = inp.reshape(expected_inp_shape)
-            if DataType[self.get_nodeattr("dataType")] == DataType.BIPOLAR:
+            if DataType[self.get_nodeattr("dataType")] == DataType["BIPOLAR"]:
                 # store bipolar activations as binary
                 reshaped_input = (reshaped_input + 1) / 2
-                export_idt = DataType.BINARY
+                export_idt = DataType["BINARY"]
             else:
                 export_idt = DataType[self.get_nodeattr("dataType")]
             # make copy before saving the array
diff --git a/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py b/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py
index ab27b2c63698f1782ff5569b7c9ce4c8e6b61c02..6012cc7cd1b8c90d6a97dd7db11d2c47fb879ccd 100644
--- a/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py
+++ b/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py
@@ -29,7 +29,6 @@
 import numpy as np
 import os
 import warnings
-from onnx import TensorProto, helper
 
 from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
@@ -42,8 +41,8 @@ class StreamingMaxPool_Batch(HLSCustomOp):
 
     def get_nodeattr_types(self):
         my_attrs = {
-            "ImgDim": ("i", True, 0),
-            "PoolDim": ("i", True, 0),
+            "ImgDim": ("ints", True, []),  # [H, W] = [Y, X]
+            "PoolDim": ("ints", True, []),  # [H, W] = [Y, X]
             "NumChannels": ("i", True, 0),
             # FINN DataTypes for inputs/outputs
             "dataType": ("s", True, ""),
@@ -59,10 +58,27 @@ class StreamingMaxPool_Batch(HLSCustomOp):
         """Returns FINN DataType of output."""
         return DataType[self.get_nodeattr("dataType")]
 
-    def get_normal_input_shape(self):
+    def get_1d_attrs_normalized(self):
+        # support both (1, D) and (D, 1) cases transparently:
+        # assume the dummy ('1') dimension is the Y-dimension, i.e.
+        # images and kernels (and their attributes) of dimension
+        # [H, W] = [Y, X] = [D, 1] or [1, D] are always mapped to [1, D]
         ifm_dim = self.get_nodeattr("ImgDim")
+        k = self.get_nodeattr("PoolDim")
         ifm_ch = self.get_nodeattr("NumChannels")
-        ishape = (1, ifm_dim, ifm_dim, ifm_ch)
+        if ifm_dim[1] == 1:
+            ifm_dim = ifm_dim[::-1]
+            k = k[::-1]
+        return (ifm_dim, k, ifm_ch)
+
+    def is_1d(self):
+        ifm_dim, k, ifm_ch = self.get_1d_attrs_normalized()
+        return (ifm_dim[0] == 1) and (k[0] == 1)
+
+    def get_normal_input_shape(self):
+        ifm_dim_h, ifm_dim_w = self.get_nodeattr("ImgDim")
+        ifm_ch = self.get_nodeattr("NumChannels")
+        ishape = (1, ifm_dim_h, ifm_dim_w, ifm_ch)
         return ishape
 
     def get_folded_input_shape(self):
@@ -74,14 +90,17 @@ class StreamingMaxPool_Batch(HLSCustomOp):
         return tuple(ret)
 
     def get_normal_output_shape(self):
-        k = self.get_nodeattr("PoolDim")
-        ifm_dim = self.get_nodeattr("ImgDim")
+        ifm_dim_h, ifm_dim_w = self.get_nodeattr("ImgDim")
+        k_h, k_w = tuple(self.get_nodeattr("PoolDim"))
         ifm_ch = self.get_nodeattr("NumChannels")
-        stride = k
+        stride_h = k_h
+        stride_w = k_w
         pad = 0
-        assert ifm_dim % k == 0, "StreamingMaxPool needs ImgDim % PoolDim == 0"
-        ofm_dim = compute_conv_output_dim(ifm_dim, k, stride, pad)
-        oshape = (1, ofm_dim, ofm_dim, ifm_ch)
+        assert ifm_dim_h % k_h == 0, "StreamingMaxPool needs ImgDim_h % PoolDim_h == 0"
+        assert ifm_dim_w % k_w == 0, "StreamingMaxPool needs ImgDim_w % PoolDim_w == 0"
+        ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, pad)
+        ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, pad)
+        oshape = (1, ofm_dim_h, ofm_dim_w, ifm_ch)
         return oshape
 
     def get_folded_output_shape(self):
@@ -98,9 +117,12 @@ class StreamingMaxPool_Batch(HLSCustomOp):
 
     def get_exp_cycles(self):
         # derived from StreamingMaxPool_Batch loop nest
-        k = self.get_nodeattr("PoolDim")
-        ifm_dim = self.get_nodeattr("ImgDim")
-        return int(ifm_dim * (ifm_dim + (ifm_dim / k)))
+        ifm_dim, k, ifm_ch = self.get_1d_attrs_normalized()
+        if self.is_1d():
+            return int(ifm_dim[1] + k[1])
+        else:
+            # TODO: adjust inaccurate formula
+            return int(ifm_dim[1] * (ifm_dim[1] + (ifm_dim[1] / k[1])))
 
     def get_instream_width(self):
         dt_bits = self.get_input_datatype().bitwidth()
@@ -117,19 +139,7 @@ class StreamingMaxPool_Batch(HLSCustomOp):
         oshape = self.get_normal_output_shape()
         ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0]))
         assert ishape == exp_ishape, "Unexpect input shape for StreamingMaxPool."
-        # implement tensor with correct shape
-        values = np.random.randn(*oshape).astype(np.float32)
-        return helper.make_node(
-            "Constant",
-            inputs=[],
-            outputs=[self.onnx_node.output[0]],
-            value=helper.make_tensor(
-                name="const_tensor",
-                data_type=TensorProto.FLOAT,
-                dims=values.shape,
-                vals=values.flatten().astype(float),
-            ),
-        )
+        return super().make_const_shape_op(oshape)
 
     def infer_node_datatype(self, model):
         node = self.onnx_node
@@ -167,11 +177,13 @@ class StreamingMaxPool_Batch(HLSCustomOp):
 
     def defines(self, var):
         numReps = 2
+        ifm_dim, k, ifm_ch = self.get_1d_attrs_normalized()
+
         self.code_gen_dict["$DEFINES$"] = [
             """#define ImgDim {}\n #define PoolDim {}\n
             #define NumChannels {}\n #define numReps {}""".format(
-                self.get_nodeattr("ImgDim"),
-                self.get_nodeattr("PoolDim"),
+                ifm_dim[1],
+                k[1],
                 self.get_nodeattr("NumChannels"),
                 numReps,
             )
@@ -180,9 +192,9 @@ class StreamingMaxPool_Batch(HLSCustomOp):
     def read_npy_data(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_input_datatype()
-        if dtype == DataType.BIPOLAR:
+        if dtype == DataType["BIPOLAR"]:
             # use binary for bipolar storage
-            dtype = DataType.BINARY
+            dtype = DataType["BINARY"]
         elem_bits = dtype.bitwidth()
         packed_bits = self.get_instream_width()
         packed_hls_type = "ap_uint<%d>" % packed_bits
@@ -207,12 +219,19 @@ class StreamingMaxPool_Batch(HLSCustomOp):
     def docompute(self):
         dtype = self.get_input_datatype()
         if dtype.bitwidth() == 1:
-            op = "StreamingMaxPool_Batch"
+            if self.is_1d():
+                raise Exception("Binary 1d MaxPool not implemented on HLS backend")
+            else:
+                op = "StreamingMaxPool_Batch"
             self.code_gen_dict["$DOCOMPUTE$"] = [
                 "%s<ImgDim, PoolDim, NumChannels>(in0, out, numReps);" % (op)
             ]
         else:
-            op = "StreamingMaxPool_Precision"
+            if self.is_1d():
+                # FIXME handle this for vitis_hls hlslib branch
+                op = "StreamingMaxPool_Precision_Batch_1d"
+            else:
+                op = "StreamingMaxPool_Precision"
             dtype = self.get_input_datatype()
             dtype_hls = dtype.get_hls_datatype_str()
             minval_str = str(int(dtype.min()))
@@ -224,9 +243,9 @@ class StreamingMaxPool_Batch(HLSCustomOp):
     def dataoutstrm(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_output_datatype()
-        if dtype == DataType.BIPOLAR:
+        if dtype == DataType["BIPOLAR"]:
             # use binary for bipolar storage
-            dtype = DataType.BINARY
+            dtype = DataType["BINARY"]
         elem_bits = dtype.bitwidth()
         packed_bits = self.get_outstream_width()
         packed_hls_type = "ap_uint<%d>" % packed_bits
@@ -292,10 +311,10 @@ class StreamingMaxPool_Batch(HLSCustomOp):
             inp.shape == exp_ishape
         ), """Input shape doesn't
         match expected shape (1, ifm_dim, ifm_dim, ifm_ch)."""
-        if self.get_input_datatype() == DataType.BIPOLAR:
+        if self.get_input_datatype() == DataType["BIPOLAR"]:
             # store bipolar activations as binary
             inp = (inp + 1) / 2
-            export_idt = DataType.BINARY
+            export_idt = DataType["BINARY"]
         else:
             export_idt = self.get_input_datatype()
         # no reshaping for input since assuming no folding on input
@@ -342,7 +361,7 @@ class StreamingMaxPool_Batch(HLSCustomOp):
                 )
             )
         # binary -> bipolar if needed
-        if self.get_output_datatype() == DataType.BIPOLAR:
+        if self.get_output_datatype() == DataType["BIPOLAR"]:
             out = context[node.output[0]]
             out = 2 * out - 1
             context[node.output[0]] = out
diff --git a/src/finn/custom_op/fpgadataflow/templates.py b/src/finn/custom_op/fpgadataflow/templates.py
index 2f2900c8b98c55d208d3303f0bea7e37609d8ca5..0c101a3fa39470b7ec54b2a9eb7f1a7a597ac07a 100644
--- a/src/finn/custom_op/fpgadataflow/templates.py
+++ b/src/finn/custom_op/fpgadataflow/templates.py
@@ -88,12 +88,13 @@ puts "HW source dir: $config_hwsrcdir"
 set config_proj_part "$FPGAPART$"
 
 set config_bnnlibdir "$FINNHLSLIBDIR$"
+set config_customhlsdir "$FINNHLSCUSTOMDIR$"
 
 set config_toplevelfxn "$TOPFXN$"
 set config_clkperiod $CLKPERIOD$
 
 open_project $config_proj_name
-add_files $config_hwsrcdir/top_$TOPFXN$.cpp -cflags "-std=c++0x -I$config_bnnlibdir"
+add_files $config_hwsrcdir/top_$TOPFXN$.cpp -cflags "-std=c++0x -I$config_bnnlibdir -I$config_customhlsdir"
 
 set_top $config_toplevelfxn
 open_solution sol1
@@ -353,56 +354,3 @@ $LAYER_NAME$
 
 endmodule
 """
-
-decoupled_thresholding_template = """
-template <
-    unsigned ImgDim, unsigned NumChannels, unsigned PE,
-    typename TSrcI = Identity, typename TDstI = Identity,
-    int ActVal=0, typename TT, unsigned int NumSteps,
-    typename TI, typename TO>
-void Thresholding_Stream_Batch(hls::stream<TI> &in,
-                        hls::stream<TO> &out,
-                        hls::stream<ap_uint<PE*NumSteps*TT::width>> &weight,
-                        int const reps)
-{
-
-  // how many different rows each neuron will compute
-  // alternatively: number of vertical matrix chunks
-  unsigned const NF = NumChannels / PE;
-
-  ThresholdsActivation<1, PE, NumSteps, TT, TO, ActVal, comp::less_equal<TT>> internal_thr;
-  #pragma HLS ARRAY_PARTITION variable=internal_thr.m_thresholds complete dim=0
-
-  // everything merged into a common iteration space (one "big" loop instead
-  // of smaller nested loops) to get the pipelinening the way we want
-  for (unsigned i = 0; i < reps * ImgDim * ImgDim * NF; i++)
-  {
-    #pragma HLS PIPELINE II=1
-
-    ap_uint<PE*NumSteps*TT::width> packed_thr;
-    packed_thr = weight.read();
-    // slicer to get 1 PE's worth of thresholds
-    auto const pe_slicer = Slice<ap_uint<NumSteps*TT::width>>()(packed_thr);
-
-    TI inElem;
-    inElem = in.read();
-    auto outElem = TDstI().template operator()<TO>();
-
-    for (unsigned pe = 0; pe < PE; pe++)
-    {
-#pragma HLS UNROLL
-      // slicer to get individual thresholds
-      auto const thr_slicer = Slice<TT>()(pe_slicer(pe, 0));
-      for (unsigned nt = 0; nt < NumSteps; nt++)
-      {
-      #pragma HLS UNROLL
-        internal_thr.m_thresholds[pe][0][nt] = thr_slicer(nt, 0);
-      }
-
-      auto const act = TSrcI()(inElem);
-      outElem(pe,0,1) = internal_thr.activate(0, pe, act(pe,0));
-    }
-    out.write(outElem);
-  }
-}
-"""
diff --git a/src/finn/custom_op/fpgadataflow/thresholding_batch.py b/src/finn/custom_op/fpgadataflow/thresholding_batch.py
index 3c82ea4439427be55441c50496bc4b4c7b62cfbc..97d4d0d80553ab530ffdc4a18a8956aae0c3c4e7 100644
--- a/src/finn/custom_op/fpgadataflow/thresholding_batch.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_batch.py
@@ -31,7 +31,6 @@ import os
 import textwrap
 import warnings
 from math import ceil, log2
-from onnx import TensorProto, helper
 
 from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
@@ -112,19 +111,7 @@ class Thresholding_Batch(HLSCustomOp):
 
     def make_shape_compatible_op(self, model):
         oshape = self.get_normal_output_shape()
-        # implement tensor with correct shape
-        values = np.random.randn(*oshape).astype(np.float32)
-        return helper.make_node(
-            "Constant",
-            inputs=[],
-            outputs=[self.onnx_node.output[0]],
-            value=helper.make_tensor(
-                name="const_tensor",
-                data_type=TensorProto.FLOAT,
-                dims=values.shape,
-                vals=values.flatten().astype(float),
-            ),
-        )
+        return super().make_const_shape_op(oshape)
 
     def infer_node_datatype(self, model):
         node = self.onnx_node
@@ -132,8 +119,8 @@ class Thresholding_Batch(HLSCustomOp):
         if idt != self.get_input_datatype():
             warn_str = "inputDataType changing for %s: %s -> %s " % (
                 node.name,
-                str(self.get_input_datatype()),
-                str(idt),
+                str(self.get_input_datatype().name),
+                str(idt.name),
             )
             warnings.warn(warn_str)
         self.set_nodeattr("inputDataType", idt.name)
@@ -225,7 +212,7 @@ class Thresholding_Batch(HLSCustomOp):
             if abs(tdt_min) > tdt_max:
                 tdt = DataType.get_smallest_possible(tdt_min)
             else:
-                tdt = DataType.get_smallest_possible(0 - tdt_max - 1)
+                tdt = DataType.get_smallest_possible(-tdt_max - 1)
         else:
             tdt = DataType.get_smallest_possible(tdt_max)
         assert np.vectorize(tdt.allowed)(
@@ -336,7 +323,7 @@ class Thresholding_Batch(HLSCustomOp):
         ).all(), "Need int threshold tensor"
         ret = orig_thres_matrix
         # workaround for vivado_hls threshold bug
-        if ret[0][0] == 0:
+        if ret[0][0] == 0 and n_thres_steps == 1:
             ret = np.copy(ret)
             ret[0][0] = 1
             warnings.warn(
@@ -390,8 +377,8 @@ class Thresholding_Batch(HLSCustomOp):
             tdt_hls = tdt.get_hls_datatype_str()
             # use binary to export bipolar activations
             export_odt = self.get_output_datatype()
-            if self.get_output_datatype() == DataType.BIPOLAR:
-                export_odt = DataType.BINARY
+            if self.get_output_datatype() == DataType["BIPOLAR"]:
+                export_odt = DataType["BINARY"]
             odt_hls = export_odt.get_hls_datatype_str()
             f_thresh.write(
                 "static ThresholdsActivation<{},{},{},{},{},{},{}> threshs \
@@ -515,10 +502,10 @@ class Thresholding_Batch(HLSCustomOp):
                 not float32 as expected."""
                 expected_inp_shape = self.get_folded_input_shape()
                 reshaped_input = context[inputs].reshape(expected_inp_shape)
-                if self.get_input_datatype() == DataType.BIPOLAR:
+                if self.get_input_datatype() == DataType["BIPOLAR"]:
                     # store bipolar activations as binary
                     reshaped_input = (reshaped_input + 1) / 2
-                    export_idt = DataType.BINARY
+                    export_idt = DataType["BINARY"]
                 else:
                     export_idt = self.get_input_datatype()
                 # make copy before saving the array
@@ -537,7 +524,7 @@ class Thresholding_Batch(HLSCustomOp):
             # load output npy file
             super().npy_to_dynamic_output(context)
             # reinterpret binary output as bipolar where needed
-            if self.get_output_datatype() == DataType.BIPOLAR:
+            if self.get_output_datatype() == DataType["BIPOLAR"]:
                 out = context[node.output[0]]
                 out = 2 * out - 1
                 context[node.output[0]] = out
@@ -621,10 +608,6 @@ class Thresholding_Batch(HLSCustomOp):
             self.code_gen_dict["$DEFINES$"].append(
                 "#define NumSteps1 %d" % self.get_nodeattr("numSteps")
             )
-            # TODO remove once Thresholding_Stream_Batch is in hlslib:
-            self.code_gen_dict["$DEFINES$"].append(
-                templates.decoupled_thresholding_template
-            )
 
     def read_npy_data(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
@@ -679,28 +662,32 @@ class Thresholding_Batch(HLSCustomOp):
         node = self.onnx_node
         ishape = self.get_folded_input_shape()
         if len(ishape) == 3:
-            imgdim = 1
+            imgdimh = 1
+            imgdimw = 1
         elif len(ishape) == 5:
-            imgdim = ishape[1]
+            imgdimh = ishape[1]
+            imgdimw = ishape[2]
         else:
-            raise Exception("""Unexpeted input shape""")
+            raise Exception("""Unexpected input shape""")
         mem_mode = self.get_nodeattr("mem_mode")
         if mem_mode == "const":
             self.code_gen_dict["$DOCOMPUTE$"] = [
-                """{}<{}, NumChannels1, PE1, {}, {}>
+                """{}<{}, {}, NumChannels1, PE1, {}, {}>
                 (in0, out, threshs, numReps);""".format(
                     node.op_type,
-                    imgdim,
+                    imgdimh,
+                    imgdimw,
                     tmpl_args["TSrcI"],
                     tmpl_args["TDstI"],
                 )
             ]
         elif mem_mode == "decoupled":
             self.code_gen_dict["$DOCOMPUTE$"] = [
-                """{}<{}, NumChannels1, PE1, {}, {}, ActVal1, ThresType1, NumSteps1>
+                """{}<{}, {}, NumChannels1, PE1, {}, {}, ActVal1, ThresType1, NumSteps1>
                 (in0, out, weights, numReps);""".format(
                     "Thresholding_Stream_Batch",
-                    imgdim,
+                    imgdimh,
+                    imgdimw,
                     tmpl_args["TSrcI"],
                     tmpl_args["TDstI"],
                 )
@@ -711,9 +698,9 @@ class Thresholding_Batch(HLSCustomOp):
     def dataoutstrm(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_output_datatype()
-        if dtype == DataType.BIPOLAR:
+        if dtype == DataType["BIPOLAR"]:
             # use binary for bipolar storage
-            dtype = DataType.BINARY
+            dtype = DataType["BINARY"]
         elem_bits = dtype.bitwidth()
         packed_bits = self.get_outstream_width()
         packed_hls_type = "ap_uint<%d>" % packed_bits
diff --git a/src/finn/custom_op/fpgadataflow/upsampler.py b/src/finn/custom_op/fpgadataflow/upsampler.py
new file mode 100644
index 0000000000000000000000000000000000000000..7114cd83ed08b53eab2cfe38d98d84944d537168
--- /dev/null
+++ b/src/finn/custom_op/fpgadataflow/upsampler.py
@@ -0,0 +1,311 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import numpy as np
+import os
+import warnings
+
+from finn.core.datatype import DataType
+from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
+from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
+
+
+class UpsampleNearestNeighbour_Batch(HLSCustomOp):
+    """
+    Corresponds to finn-hlslib UpsampleNearestNeighbour_Batch function.
+    Upsampling is done with the Nearest Neighbour algorithm.
+    The layer expects square feature maps for the in and output.
+    """
+
+    def __init__(self, onnx_node):
+        super().__init__(onnx_node)
+
+    def get_nodeattr_types(self):
+        my_attrs = {
+            # Size of the output feature map
+            "OFMDim": ("i", True, 0),
+            # Size of the input feature map
+            "IFMDim": ("i", True, 0),
+            # Amount of channels of the input feature map
+            "NumChannels": ("i", True, 0),
+            # FINN input datatype
+            "inputDataType": ("s", True, ""),
+            # Batch size
+            "numInputVectors": ("i", False, 1),
+        }
+        my_attrs.update(super().get_nodeattr_types())
+        return my_attrs
+
+    def get_exp_cycles(self):
+        OFMDim = self.get_nodeattr("OFMDim")
+        batch_size = self.get_nodeattr("numInputVectors")
+        exp_cycles = OFMDim * OFMDim * batch_size
+        return int(exp_cycles)
+
+    def get_normal_input_shape(self):
+        IFMDim = self.get_nodeattr("IFMDim")
+        num_ch = self.get_nodeattr("NumChannels")
+        batch = self.get_nodeattr("numInputVectors")
+        ishape = (batch, IFMDim, IFMDim, num_ch)
+        return ishape
+
+    def get_normal_output_shape(self):
+        OFMDim = self.get_nodeattr("OFMDim")
+        num_ch = self.get_nodeattr("NumChannels")
+        batch = self.get_nodeattr("numInputVectors")
+        oshape = (batch, OFMDim, OFMDim, num_ch)
+        return oshape
+
+    def get_folded_input_shape(self):
+        normal_ishape = list(self.get_normal_input_shape())
+        return tuple(normal_ishape)
+
+    def get_folded_output_shape(self):
+        normal_oshape = list(self.get_normal_output_shape())
+        return tuple(normal_oshape)
+
+    def make_shape_compatible_op(self, model):
+        exp_ishape = self.get_normal_input_shape()
+        oshape = self.get_normal_output_shape()
+        ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0]))
+        assert (
+            ishape == exp_ishape
+        ), "Unexpect input shape for UpsampleNearestNeighbour_Batch."
+        return super().make_const_shape_op(oshape)
+
+    def infer_node_datatype(self, model):
+        node = self.onnx_node
+        # data type stays the same
+        idt = model.get_tensor_datatype(node.input[0])
+        if idt != self.get_input_datatype():
+            warn_str = "inputDataType changing for %s: %s -> %s " % (
+                node.name,
+                str(self.get_input_datatype()),
+                str(idt),
+            )
+            warnings.warn(warn_str)
+        self.set_nodeattr("inputDataType", idt.name)
+        model.set_tensor_datatype(node.output[0], idt)
+
+    def verify_node(self):
+        pass
+
+    def get_input_datatype(self):
+        """Returns FINN DataType of input."""
+        ret = DataType[self.get_nodeattr("inputDataType")]
+        return ret
+
+    def get_output_datatype(self):
+        """Returns FINN DataType of output. (Same as input datatype)"""
+        return self.get_input_datatype()
+
+    def get_instream_width(self):
+        ibits = self.get_input_datatype().bitwidth()
+        ifm_ch = self.get_nodeattr("NumChannels")
+        return ibits * ifm_ch
+
+    def get_outstream_width(self):
+        obits = self.get_output_datatype().bitwidth()
+        ifm_ch = self.get_nodeattr("NumChannels")
+        return obits * ifm_ch
+
+    def get_number_output_values(self):
+        folded_oshape = self.get_folded_output_shape()
+        return np.prod(folded_oshape[:-1])
+
+    def global_includes(self):
+        self.code_gen_dict["$GLOBALS$"] = ['#include "upsample.hpp"']
+
+    def defines(self, var):
+        self.code_gen_dict["$DEFINES$"] = []
+
+        ifm_ch = self.get_nodeattr("NumChannels")
+        self.code_gen_dict["$DEFINES$"] += ["#define IFMChannels {}".format(ifm_ch)]
+
+        ibits = self.get_input_datatype().bitwidth()
+        self.code_gen_dict["$DEFINES$"] += ["#define Input_precision {}".format(ibits)]
+
+        idim = self.get_nodeattr("IFMDim")
+        self.code_gen_dict["$DEFINES$"] += ["#define IFMDim {}".format(idim)]
+
+        odim = self.get_nodeattr("OFMDim")
+        self.code_gen_dict["$DEFINES$"] += ["#define OFMDim {}".format(odim)]
+
+        batch_size = self.get_nodeattr("numInputVectors")
+        self.code_gen_dict["$DEFINES$"] += ["#define numReps {}".format(batch_size)]
+
+    def read_npy_data(self):
+        code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
+        dtype = self.get_input_datatype()
+        if dtype == DataType["BIPOLAR"]:
+            # use binary for bipolar storage
+            dtype = DataType["BINARY"]
+        elem_bits = dtype.bitwidth()
+        packed_bits = self.get_instream_width()
+        packed_hls_type = "ap_uint<%d>" % packed_bits
+        elem_hls_type = dtype.get_hls_datatype_str()
+        npy_type = "float"
+        npy_in = "%s/input_0.npy" % code_gen_dir
+        self.code_gen_dict["$READNPYDATA$"] = []
+        self.code_gen_dict["$READNPYDATA$"].append(
+            'npy2apintstream<%s, %s, %d, %s>("%s", in0);'
+            % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in)
+        )
+
+    def strm_decl(self):
+        self.code_gen_dict["$STREAMDECLARATIONS$"] = []
+        self.code_gen_dict["$STREAMDECLARATIONS$"].append(
+            'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width())
+        )
+        self.code_gen_dict["$STREAMDECLARATIONS$"].append(
+            'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width())
+        )
+
+    def docompute(self):
+        self.code_gen_dict["$DOCOMPUTE$"] = [
+            """UpsampleNearestNeighbour_Batch<OFMDim, IFMDim, IFMChannels,
+            ap_uint<Input_precision> > (in0, out, numReps);"""
+        ]
+
+    def dataoutstrm(self):
+        code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
+        dtype = self.get_output_datatype()
+        if dtype == DataType["BIPOLAR"]:
+            # use binary for bipolar storage
+            dtype = DataType["BINARY"]
+        elem_bits = dtype.bitwidth()
+        packed_bits = self.get_outstream_width()
+        packed_hls_type = "ap_uint<%d>" % packed_bits
+        elem_hls_type = dtype.get_hls_datatype_str()
+        npy_type = "float"
+        npy_out = "%s/output.npy" % code_gen_dir
+        oshape = self.get_folded_output_shape()
+        oshape_cpp_str = str(oshape).replace("(", "{").replace(")", "}")
+
+        self.code_gen_dict["$DATAOUTSTREAM$"] = [
+            'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s");'
+            % (
+                packed_hls_type,
+                elem_hls_type,
+                elem_bits,
+                npy_type,
+                oshape_cpp_str,
+                npy_out,
+            )
+        ]
+
+    def save_as_npy(self):
+        self.code_gen_dict["$SAVEASCNPY$"] = []
+
+    def blackboxfunction(self):
+        packed_bits = self.get_instream_width()
+        packed_hls_type = "ap_uint<%d>" % packed_bits
+        self.code_gen_dict["$BLACKBOXFUNCTION$"] = [
+            "void %s(hls::stream<%s > &in0, hls::stream<%s > &out)"
+            % (self.onnx_node.name, packed_hls_type, packed_hls_type)
+        ]
+
+    def pragmas(self):
+        self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"]
+        self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out")
+        self.code_gen_dict["$PRAGMAS$"].append(
+            "#pragma HLS INTERFACE ap_ctrl_none port=return"
+        )
+
+    def execute_node(self, context, graph):
+        mode = self.get_nodeattr("exec_mode")
+        node = self.onnx_node
+        exp_ishape = self.get_normal_input_shape()
+        exp_oshape = self.get_normal_output_shape()
+        folded_ishape = self.get_folded_input_shape()
+        folded_oshape = self.get_folded_output_shape()
+
+        if mode == "cppsim":
+            code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
+        elif mode == "rtlsim":
+            code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen")
+        else:
+            raise Exception(
+                """Invalid value for attribute exec_mode! Is currently set to: {}
+            has to be set to one of the following value ("cppsim", "rtlsim")""".format(
+                    mode
+                )
+            )
+
+        inp = context[node.input[0]]
+        assert str(inp.dtype) == "float32", "Input datatype is not float32"
+        assert (
+            inp.shape == exp_ishape
+        ), """Input shape doesn't
+        match expected shape (numInputVectors, ImgDim, ImgDim, NumChannels)."""
+        export_idt = self.get_input_datatype()
+
+        reshaped_input = inp.reshape(folded_ishape)
+        np.save(os.path.join(code_gen_dir, "input_0.npy"), reshaped_input)
+
+        if mode == "cppsim":
+            # execute the precompiled model
+            super().exec_precompiled_singlenode_model()
+            # load output npy file
+            super().npy_to_dynamic_output(context)
+            assert (
+                context[node.output[0]].shape == folded_oshape
+            ), "cppsim did not produce expected folded output shape"
+            context[node.output[0]] = context[node.output[0]].reshape(*exp_oshape)
+        elif mode == "rtlsim":
+            sim = self.get_rtlsim()
+            nbits = self.get_instream_width()
+            rtlsim_inp = npy_to_rtlsim_input(
+                "{}/input_0.npy".format(code_gen_dir), export_idt, nbits
+            )
+            super().reset_rtlsim(sim)
+            super().toggle_clk(sim)
+            rtlsim_output = self.rtlsim(sim, rtlsim_inp)
+            odt = export_idt
+            target_bits = odt.bitwidth()
+            packed_bits = self.get_outstream_width()
+            out_npy_path = "{}/output.npy".format(code_gen_dir)
+            out_shape = self.get_folded_output_shape()
+            rtlsim_output_to_npy(
+                rtlsim_output, out_npy_path, odt, out_shape, packed_bits, target_bits
+            )
+            # load and reshape output
+            output = np.load(out_npy_path)
+            output = np.asarray([output], dtype=np.float32).reshape(*exp_oshape)
+            context[node.output[0]] = output
+        else:
+            raise Exception(
+                """Invalid value for attribute exec_mode! Is currently set to: {}
+            has to be set to one of the following value ("cppsim", "rtlsim")""".format(
+                    mode
+                )
+            )
+        assert (
+            context[node.output[0]].shape == exp_oshape
+        ), """Output shape doesn't match expected shape
+            (1, OutputDim, OutputDim, NumChannels)."""
diff --git a/src/finn/custom_op/fpgadataflow/vector_vector_activate_batch.py b/src/finn/custom_op/fpgadataflow/vector_vector_activate_batch.py
index 921be6fdfa8839239bb6e746112ed30477b8f529..e0f789a8883aad83ed8c8b37a16392308bc720cc 100644
--- a/src/finn/custom_op/fpgadataflow/vector_vector_activate_batch.py
+++ b/src/finn/custom_op/fpgadataflow/vector_vector_activate_batch.py
@@ -1,8 +1,35 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 import math
 import numpy as np
 import os
 import warnings
-from onnx import TensorProto, helper
 
 from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
@@ -83,7 +110,7 @@ class Vector_Vector_Activate_Batch(HLSCustomOp):
                 if abs(tdt_min) > tdt_max:
                     tdt = DataType.get_smallest_possible(tdt_min)
                 else:
-                    tdt = DataType.get_smallest_possible(0 - tdt_max)
+                    tdt = DataType.get_smallest_possible(-tdt_max - 1)
             else:
                 tdt = DataType.get_smallest_possible(tdt_max)
             assert np.vectorize(tdt.allowed)(
@@ -98,7 +125,7 @@ class Vector_Vector_Activate_Batch(HLSCustomOp):
                 if abs(acc_min) > acc_max:
                     adt = DataType.get_smallest_possible(acc_min)
                 else:
-                    adt = DataType.get_smallest_possible(0 - acc_max)
+                    adt = DataType.get_smallest_possible(-acc_max - 1)
             else:
                 adt = DataType.get_smallest_possible(acc_max)
             # ensure a datatype divisible by 8-bits in case this is the last node
@@ -129,19 +156,7 @@ class Vector_Vector_Activate_Batch(HLSCustomOp):
 
     def make_shape_compatible_op(self, model):
         oshape = self.get_normal_output_shape()
-        # implement tensor with correct shape
-        values = np.random.randn(*oshape).astype(np.float32)
-        return helper.make_node(
-            "Constant",
-            inputs=[],
-            outputs=[self.onnx_node.output[0]],
-            value=helper.make_tensor(
-                name="const_tensor",
-                data_type=TensorProto.FLOAT,
-                dims=values.shape,
-                vals=values.flatten().astype(float),
-            ),
-        )
+        return super().make_const_shape_op(oshape)
 
     def infer_node_datatype(self, model):
         node = self.onnx_node
@@ -236,8 +251,8 @@ class Vector_Vector_Activate_Batch(HLSCustomOp):
         ret = dict()
         inp_hls_str = self.get_input_datatype().get_hls_datatype_str()
         out_hls_str = self.get_output_datatype().get_hls_datatype_str()
-        inp_is_bipolar = self.get_input_datatype() == DataType.BIPOLAR
-        wt_is_bipolar = self.get_weight_datatype() == DataType.BIPOLAR
+        inp_is_bipolar = self.get_input_datatype() == DataType["BIPOLAR"]
+        wt_is_bipolar = self.get_weight_datatype() == DataType["BIPOLAR"]
         # fill in TSrcI and TWeightI
         # TODO handle bipolar inputs
         if inp_is_bipolar or wt_is_bipolar:
diff --git a/src/finn/qnn-data/build_dataflow/dataflow_build_config.json b/src/finn/qnn-data/build_dataflow/dataflow_build_config.json
index 5e4cf2d3028fb48cbc768b744bb0144d4f0d5fda..27ec38f6a4eb55c99dc4805f91d6e388e735308c 100644
--- a/src/finn/qnn-data/build_dataflow/dataflow_build_config.json
+++ b/src/finn/qnn-data/build_dataflow/dataflow_build_config.json
@@ -6,6 +6,7 @@
   "board": "Pynq-Z1",
   "standalone_thresholds": true,
   "shell_flow_type": "vivado_zynq",
+  "verify_save_rtlsim_waveforms": true,
   "verify_steps": [
     "initial_python",
     "streamlined_python",
diff --git a/src/finn/qnn-data/cpp/npy2apintstream.hpp b/src/finn/qnn-data/cpp/npy2apintstream.hpp
index f3afbc5bfb16e2423184e334e78b96a8cdeef45c..6aade3a2bbe2ba9914728802a8a6a448ef2d9fb2 100644
--- a/src/finn/qnn-data/cpp/npy2apintstream.hpp
+++ b/src/finn/qnn-data/cpp/npy2apintstream.hpp
@@ -3,6 +3,7 @@
 #include "hls_stream.h"
 #include "ap_int.h"
 #include <vector>
+#include <stdio.h>
 
 #ifdef DEBUG
 #define DEBUG_NPY2APINTSTREAM(x) std::cout << "[npy2apintstream] " << x << std::endl;
@@ -34,7 +35,7 @@ void npy2apintstream(const char * npy_path, hls::stream<PackedT> & out_stream, b
         NpyT loaded_elem_npyt = *loaded_data;
         ElemT loaded_elem = (ElemT) loaded_elem_npyt;
         DEBUG_NPY2APINTSTREAM("NpyT " << loaded_elem_npyt << " elem " << loaded_elem)
-        packed_elem((i+1)*ElemBits-1, i*ElemBits) = loaded_elem;
+        packed_elem((i+1)*ElemBits-1, i*ElemBits) = *reinterpret_cast<ap_uint<ElemBits>*>(&loaded_elem);
         loaded_data++;
       }
       DEBUG_NPY2APINTSTREAM("packed hls elem " << std::hex << packed_elem << std::dec)
@@ -44,25 +45,34 @@ void npy2apintstream(const char * npy_path, hls::stream<PackedT> & out_stream, b
 }
 
 template <typename PackedT, typename ElemT, int ElemBits, typename NpyT>
-void apintstream2npy(hls::stream<PackedT> & in_stream, const std::vector<size_t> & shape, const char * npy_path, bool reverse_inner = true, size_t numReps = 1) {
+void apintstream2npy(hls::stream<PackedT> & in_stream, const std::vector<size_t> & shape, const char * npy_path, bool reverse_inner = true, size_t numReps = 1, size_t multi_pixel_out = 1) {
   for(size_t rep = 0; rep < numReps; rep++) {
     std::vector<NpyT> data_to_save;
     size_t outer_dim_elems = 1;
     for(size_t dim = 0; dim < shape.size()-1; dim++) {
       outer_dim_elems *= shape[dim];
     }
-    size_t inner_dim_elems = shape[shape.size()-1];
-    DEBUG_APINTSTREAM2NPY("n_outer " << outer_dim_elems << " n_inner " << inner_dim_elems)
+    size_t inner_dim_elems = shape[shape.size()-1] / multi_pixel_out;
+    DEBUG_APINTSTREAM2NPY("n_outer " << outer_dim_elems << " n_inner " << inner_dim_elems << " n_multi_pixel_out " << multi_pixel_out)
     for(size_t outer_elem = 0; outer_elem < outer_dim_elems; outer_elem++) {
       PackedT packed_elem;
       in_stream >> packed_elem;
       DEBUG_APINTSTREAM2NPY("packed hls elem " << std::hex << packed_elem << std::dec)
-      for(size_t ii = 0; ii < inner_dim_elems; ii++) {
-        size_t i = reverse_inner ? inner_dim_elems-ii-1 : ii;
-        ElemT elem = packed_elem((i+1)*ElemBits-1, i*ElemBits);
-        NpyT npyt = (NpyT) elem;
-        DEBUG_APINTSTREAM2NPY("elem " << elem << " NpyT " << npyt)
-        data_to_save.push_back(npyt);
+      for(size_t ii_multi_pixel_out = 0; ii_multi_pixel_out < multi_pixel_out; ii_multi_pixel_out++) {
+        // loop over multi_pixel_out blocks of inner_dim_elems separately,
+        // so that reverse_inner is not applied across multiple pixels
+        for(size_t ii = 0; ii < inner_dim_elems; ii++) {
+          size_t i = ii_multi_pixel_out*inner_dim_elems;
+          i += reverse_inner ? inner_dim_elems-ii-1 : ii;
+          ap_uint<ElemBits> tmp_elem = packed_elem((i+1)*ElemBits-1, i*ElemBits);
+          // important: don't init elem = reinterpret_cast.. directly here
+          // this causes weird behavior for conversion to NpyT afterwards
+          ElemT elem;
+          elem = reinterpret_cast<ElemT&>(tmp_elem);
+          NpyT npyt = (NpyT) elem;
+          DEBUG_APINTSTREAM2NPY("elem " << elem << " NpyT " << npyt)
+          data_to_save.push_back(npyt);
+        }
       }
     }
     cnpy::npy_save(npy_path, &data_to_save[0], shape, "w");
diff --git a/src/finn/qnn-data/templates/driver/driver_base.py b/src/finn/qnn-data/templates/driver/driver_base.py
index 4dd5a080e10e4a0ab5bd14381186e19144f6edb3..b6dd8350809a33ab5dad3e21b0f52f41cbe872ec 100644
--- a/src/finn/qnn-data/templates/driver/driver_base.py
+++ b/src/finn/qnn-data/templates/driver/driver_base.py
@@ -85,24 +85,27 @@ class FINNExampleOverlay(Overlay):
         self.platform = platform
         self.batch_size = batch_size
         self.fclk_mhz = fclk_mhz
-        if self.platform == "alveo":
-            if "input_dma_name" in io_shape_dict.keys():
-                self.idma = getattr(self, io_shape_dict["input_dma_name"])
-            else:
-                self.idma = self.idma0
-            self.odma = self.odma0
-            self.odma_handle = None
-        elif self.platform == "zynq-iodma":
-            if "input_dma_name" in io_shape_dict.keys():
-                self.idma = getattr(self, io_shape_dict["input_dma_name"])
-            else:
-                self.idma = self.idma0
-            self.odma = self.odma0
+        self.idma = []
+        self.odma = []
+        self.odma_handle = []
+        if "input_dma_name" in io_shape_dict.keys():
+            for idma_name in io_shape_dict["input_dma_name"]:
+                self.idma.append(getattr(self, idma_name))
+        else:
+            self.idma = [self.idma0]
+        if "output_dma_name" in io_shape_dict.keys():
+            for odma_name in io_shape_dict["output_dma_name"]:
+                self.odma.append(getattr(self, odma_name))
+                if self.platform == "alveo":
+                    self.odma_handle.append(None)
+        else:
+            self.odma = [self.odma0]
+            if self.platform == "alveo":
+                self.odma_handle.append(None)
+        if self.platform == "zynq-iodma":
             # set the clock frequency as specified by user during transformations
             if self.fclk_mhz > 0:
                 Clocks.fclk0_mhz = self.fclk_mhz
-        else:
-            raise ValueError("Supported platforms are zynq-iodma alveo")
         # load any external + runtime weights
         self.load_external_weights()
         self.load_runtime_weights()
@@ -204,50 +207,50 @@ class FINNExampleOverlay(Overlay):
             # run accelerator to flush any stale weights from weight streamer FIFOs
             self.execute_on_buffers()
 
-    @property
-    def idt(self):
-        return self._io_shape_dict["idt"]
+    def idt(self, ind=0):
+        return self._io_shape_dict["idt"][ind]
 
-    @property
-    def odt(self):
-        return self._io_shape_dict["odt"]
+    def odt(self, ind=0):
+        return self._io_shape_dict["odt"][ind]
 
-    @property
-    def ishape_normal(self):
-        ret = list(self._io_shape_dict["ishape_normal"])
+    def ishape_normal(self, ind=0):
+        ret = list(self._io_shape_dict["ishape_normal"][ind])
         ret[0] = self.batch_size
         return tuple(ret)
 
-    @property
-    def oshape_normal(self):
-        ret = list(self._io_shape_dict["oshape_normal"])
+    def oshape_normal(self, ind=0):
+        ret = list(self._io_shape_dict["oshape_normal"][ind])
         ret[0] = self.batch_size
         return tuple(ret)
 
-    @property
-    def ishape_folded(self):
-        ret = list(self._io_shape_dict["ishape_folded"])
+    def ishape_folded(self, ind=0):
+        ret = list(self._io_shape_dict["ishape_folded"][ind])
         ret[0] = self.batch_size
         return tuple(ret)
 
-    @property
-    def oshape_folded(self):
-        ret = list(self._io_shape_dict["oshape_folded"])
+    def oshape_folded(self, ind=0):
+        ret = list(self._io_shape_dict["oshape_folded"][ind])
         ret[0] = self.batch_size
         return tuple(ret)
 
-    @property
-    def ishape_packed(self):
-        ret = list(self._io_shape_dict["ishape_packed"])
+    def ishape_packed(self, ind=0):
+        ret = list(self._io_shape_dict["ishape_packed"][ind])
         ret[0] = self.batch_size
         return tuple(ret)
 
-    @property
-    def oshape_packed(self):
-        ret = list(self._io_shape_dict["oshape_packed"])
+    def oshape_packed(self, ind=0):
+        ret = list(self._io_shape_dict["oshape_packed"][ind])
         ret[0] = self.batch_size
         return tuple(ret)
 
+    @property
+    def num_inputs(self):
+        return self._io_shape_dict["num_inputs"]
+
+    @property
+    def num_outputs(self):
+        return self._io_shape_dict["num_outputs"]
+
     @property
     def batch_size(self):
         return self._batch_size
@@ -261,68 +264,72 @@ class FINNExampleOverlay(Overlay):
             self.ibuf_packed_device = None
         if self.obuf_packed_device is not None:
             self.obuf_packed_device = None
-        if self.platform == "alveo":
-            self.ibuf_packed_device = allocate(shape=self.ishape_packed, dtype=np.uint8)
-            self.obuf_packed_device = allocate(shape=self.oshape_packed, dtype=np.uint8)
-        else:
-            self.ibuf_packed_device = allocate(
-                shape=self.ishape_packed, dtype=np.uint8, cacheable=True
+        cacheable = {"alveo": False, "zynq-iodma": True}[self.platform]
+        self.ibuf_packed_device = []
+        self.obuf_packed_device = []
+        self.obuf_packed = []
+        for i in range(self.num_inputs):
+            new_packed_ibuf = allocate(
+                shape=self.ishape_packed(i), dtype=np.uint8, cacheable=cacheable
             )
-            self.obuf_packed_device = allocate(
-                shape=self.oshape_packed, dtype=np.uint8, cacheable=True
+            self.ibuf_packed_device.append(new_packed_ibuf)
+        for o in range(self.num_outputs):
+            new_packed_obuf = allocate(
+                shape=self.oshape_packed(o), dtype=np.uint8, cacheable=cacheable
             )
-        self.obuf_packed = np.empty_like(self.obuf_packed_device)
+            self.obuf_packed_device.append(new_packed_obuf)
+            self.obuf_packed.append(np.empty_like(new_packed_obuf))
 
-    def fold_input(self, ibuf_normal):
+    def fold_input(self, ibuf_normal, ind=0):
         """Reshapes input in desired shape.
         Gets input data (ibuf_normal), checks if data is in expected normal shape.
         Returns folded input."""
         # ensure that shape is as expected
-        assert ibuf_normal.shape == self.ishape_normal
+        assert ibuf_normal.shape == self.ishape_normal(ind)
         # convert to folded form
-        ibuf_folded = ibuf_normal.reshape(self.ishape_folded)
+        ibuf_folded = ibuf_normal.reshape(self.ishape_folded(ind))
         return ibuf_folded
 
-    def pack_input(self, ibuf_folded):
+    def pack_input(self, ibuf_folded, ind=0):
         """Packs folded input and reverses both SIMD dim and endianness.
         Gets input data in folded shape and returns packed input data."""
         ibuf_packed = finnpy_to_packed_bytearray(
             ibuf_folded,
-            self.idt,
+            self.idt(ind),
             reverse_endian=True,
             reverse_inner=True,
             fast_mode=True,
         )
         return ibuf_packed
 
-    def unpack_output(self, obuf_packed):
+    def unpack_output(self, obuf_packed, ind=0):
         """Unpacks the packed output buffer from accelerator.
         Gets packed output and returns output data in folded shape."""
         obuf_folded = packed_bytearray_to_finnpy(
             obuf_packed,
-            self.odt,
-            self.oshape_folded,
+            self.odt(ind),
+            self.oshape_folded(ind),
             reverse_endian=True,
             reverse_inner=True,
             fast_mode=True,
         )
         return obuf_folded
 
-    def unfold_output(self, obuf_folded):
+    def unfold_output(self, obuf_folded, ind=0):
         """Unfolds output data to normal shape.
         Gets folded output data and returns output data in normal shape."""
-        obuf_normal = obuf_folded.reshape(self.oshape_normal)
+        obuf_normal = obuf_folded.reshape(self.oshape_normal(ind))
         return obuf_normal
 
-    def copy_input_data_to_device(self, data):
+    def copy_input_data_to_device(self, data, ind=0):
         """Copies given input data to PYNQ buffer."""
-        np.copyto(self.ibuf_packed_device, data)
-        self.ibuf_packed_device.flush()
+        np.copyto(self.ibuf_packed_device[ind], data)
+        self.ibuf_packed_device[ind].flush()
 
-    def copy_output_data_from_device(self, data):
+    def copy_output_data_from_device(self, data, ind=0):
         """Copies PYNQ output buffer from device."""
-        self.obuf_packed_device.invalidate()
-        np.copyto(data, self.obuf_packed_device)
+        self.obuf_packed_device[ind].invalidate()
+        np.copyto(data, self.obuf_packed_device[ind])
 
     def execute_on_buffers(self, asynch=False, batch_size=None):
         """Executes accelerator by setting up the DMA(s) on pre-allocated buffers.
@@ -338,24 +345,36 @@ class FINNExampleOverlay(Overlay):
             batch_size = self.batch_size
         assert batch_size <= self.batch_size, "Specified batch_size is too large."
         if self.platform == "zynq-iodma":
-            assert self.odma.read(0x00) & 0x4 != 0, "Output DMA is not idle"
+            for o in range(self.num_outputs):
+                assert (
+                    self.odma[o].read(0x00) & 0x4 != 0
+                ), "Output DMA %d is not idle" % (o)
             # manually launch IODMAs since signatures are missing
             for iwdma, iwbuf, iwdma_name in self.external_weights:
                 iwdma.write(0x10, iwbuf.device_address)
                 iwdma.write(0x1C, batch_size)
                 iwdma.write(0x00, 1)
-            self.idma.write(0x10, self.ibuf_packed_device.device_address)
-            self.idma.write(0x1C, batch_size)
-            self.odma.write(0x10, self.obuf_packed_device.device_address)
-            self.odma.write(0x1C, batch_size)
-            self.idma.write(0x00, 1)
-            self.odma.write(0x00, 1)
+            for o in range(self.num_outputs):
+                self.odma[o].write(0x10, self.obuf_packed_device[o].device_address)
+                self.odma[o].write(0x1C, batch_size)
+                self.odma[o].write(0x00, 1)
+            for i in range(self.num_inputs):
+                self.idma[i].write(0x10, self.ibuf_packed_device[i].device_address)
+                self.idma[i].write(0x1C, batch_size)
+                self.idma[i].write(0x00, 1)
         elif self.platform == "alveo":
-            assert self.odma_handle is None, "Output DMA is already running"
-            self.idma.start(self.ibuf_packed_device, batch_size)
+            for o in range(self.num_outputs):
+                assert self.odma_handle[o] is None, (
+                    "Output DMA %d is already running" % o
+                )
+            for i in range(self.num_inputs):
+                self.idma[i].start(self.ibuf_packed_device[i], batch_size)
             for iwdma, iwbuf, iwdma_name in self.external_weights:
                 iwdma.start(iwbuf, batch_size)
-            self.odma_handle = self.odma.start(self.obuf_packed_device, batch_size)
+            for o in range(self.num_outputs):
+                self.odma_handle[o] = self.odma[o].start(
+                    self.obuf_packed_device[o], batch_size
+                )
         else:
             raise Exception("Unrecognized platform: %s" % self.platform)
         # blocking behavior depends on asynch parameter
@@ -363,31 +382,48 @@ class FINNExampleOverlay(Overlay):
             self.wait_until_finished()
 
     def wait_until_finished(self):
-        "Block until the output DMA has finished writing."
+        "Block until all output DMAs have finished writing."
         if self.platform == "zynq-iodma":
             # check if output IODMA is finished via register reads
-            status = self.odma.read(0x00)
-            while status & 0x2 == 0:
-                status = self.odma.read(0x00)
+            for o in range(self.num_outputs):
+                status = self.odma[o].read(0x00)
+                while status & 0x2 == 0:
+                    status = self.odma[o].read(0x00)
         elif self.platform == "alveo":
-            assert self.odma_handle is not None, "No odma_handle to wait on"
-            self.odma_handle.wait()
-            self.odma_handle = None
+            assert all(
+                [x is not None for x in self.odma_handle]
+            ), "No odma_handle to wait on"
+            for o in range(self.num_outputs):
+                self.odma_handle[o].wait()
+                self.odma_handle[o] = None
         else:
             raise Exception("Unrecognized platform: %s" % self.platform)
 
     def execute(self, input_npy):
-        """Given input numpy array, first perform necessary packing and copying
-        to device buffers, execute on accelerator, then unpack output and return
-        output numpy array from accelerator."""
-        ibuf_folded = self.fold_input(input_npy)
-        ibuf_packed = self.pack_input(ibuf_folded)
-        self.copy_input_data_to_device(ibuf_packed)
+        """Given a single or a list of input numpy array, first perform necessary
+        packing and copying to device buffers, execute on accelerator, then unpack
+        output and return output numpy array from accelerator."""
+        # if single input, convert to list to normalize how we process the input
+        if not type(input_npy) is list:
+            input_npy = [input_npy]
+        assert self.num_inputs == len(
+            input_npy
+        ), "Not all accelerator inputs are specified."
+        for i in range(self.num_inputs):
+            ibuf_folded = self.fold_input(input_npy[i], ind=i)
+            ibuf_packed = self.pack_input(ibuf_folded, ind=i)
+            self.copy_input_data_to_device(ibuf_packed, ind=i)
         self.execute_on_buffers()
-        self.copy_output_data_from_device(self.obuf_packed)
-        obuf_folded = self.unpack_output(self.obuf_packed)
-        obuf_normal = self.unfold_output(obuf_folded)
-        return obuf_normal
+        outputs = []
+        for o in range(self.num_outputs):
+            self.copy_output_data_from_device(self.obuf_packed[o], ind=o)
+            obuf_folded = self.unpack_output(self.obuf_packed[o], ind=o)
+            obuf_normal = self.unfold_output(obuf_folded, ind=o)
+            outputs.append(obuf_normal)
+        if self.num_outputs == 1:
+            return outputs[0]
+        else:
+            return outputs
 
     def throughput_test(self):
         """Run accelerator with empty inputs to measure throughput and other metrics.
@@ -400,12 +436,14 @@ class FINNExampleOverlay(Overlay):
         runtime = end - start
         res["runtime[ms]"] = runtime * 1000
         res["throughput[images/s]"] = self.batch_size / runtime
-        res["DRAM_in_bandwidth[Mb/s]"] = (
-            np.prod(self.ishape_packed) * 0.000001 / runtime
-        )
-        res["DRAM_out_bandwidth[Mb/s]"] = (
-            np.prod(self.oshape_packed) * 0.000001 / runtime
-        )
+        total_in = 0
+        for i in range(self.num_inputs):
+            total_in += np.prod(self.ishape_packed(i))
+        res["DRAM_in_bandwidth[Mb/s]"] = total_in * 0.000001 / runtime
+        total_out = 0
+        for o in range(self.num_outputs):
+            total_out += np.prod(self.oshape_packed(o))
+        res["DRAM_out_bandwidth[Mb/s]"] = total_out * 0.000001 / runtime
         for iwdma, iwbuf, iwdma_name in self.external_weights:
             res["DRAM_extw_%s_bandwidth[Mb/s]" % iwdma_name] = (
                 self.batch_size * np.prod(iwbuf.shape) * 0.000001 / runtime
@@ -416,11 +454,11 @@ class FINNExampleOverlay(Overlay):
             res["fclk[mhz]"] = self.clock_dict["clock0"]["frequency"]
         res["batch_size"] = self.batch_size
         # also benchmark driver-related overheads
-        input_npy = gen_finn_dt_tensor(self.idt, self.ishape_normal)
+        input_npy = gen_finn_dt_tensor(self.idt(), self.ishape_normal())
         # provide as int8/uint8 to support fast packing path where possible
-        if self.idt == DataType.UINT8:
+        if self.idt() == DataType["UINT8"]:
             input_npy = input_npy.astype(np.uint8)
-        elif self.idt == DataType.INT8:
+        elif self.idt() == DataType["INT8"]:
             input_npy = input_npy.astype(np.int8)
         start = time.time()
         ibuf_folded = self.fold_input(input_npy)
@@ -441,13 +479,13 @@ class FINNExampleOverlay(Overlay):
         res["copy_input_data_to_device[ms]"] = runtime * 1000
 
         start = time.time()
-        self.copy_output_data_from_device(self.obuf_packed)
+        self.copy_output_data_from_device(self.obuf_packed[0])
         end = time.time()
         runtime = end - start
         res["copy_output_data_from_device[ms]"] = runtime * 1000
 
         start = time.time()
-        obuf_folded = self.unpack_output(self.obuf_packed)
+        obuf_folded = self.unpack_output(self.obuf_packed[0])
         end = time.time()
         runtime = end - start
         res["unpack_output[ms]"] = runtime * 1000
diff --git a/src/finn/qnn-data/templates/driver/validate.py b/src/finn/qnn-data/templates/driver/validate.py
index 001744cba2b59f6d1a0a67fca3e2ad9668a519c0..1b29d4342c830ae896e580f602e810ee25ed234d 100644
--- a/src/finn/qnn-data/templates/driver/validate.py
+++ b/src/finn/qnn-data/templates/driver/validate.py
@@ -94,11 +94,11 @@ if __name__ == "__main__":
     test_labels = test_labels.reshape(n_batches, bsize)
 
     for i in range(n_batches):
-        ibuf_normal = test_imgs[i].reshape(driver.ibuf_packed_device.shape)
+        ibuf_normal = test_imgs[i].reshape(driver.ibuf_packed_device[0].shape)
         exp = test_labels[i]
         driver.copy_input_data_to_device(ibuf_normal)
         driver.execute_on_buffers()
-        obuf_normal = np.empty_like(driver.obuf_packed_device)
+        obuf_normal = np.empty_like(driver.obuf_packed_device[0])
         driver.copy_output_data_from_device(obuf_normal)
         ret = np.bincount(obuf_normal.flatten() == exp.flatten())
         nok += ret[0]
diff --git a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
index 03d7b73a567ef8e87890d4ecfdc697ab3c6120fd..113ccb93b839d6a3bd67e3bf8f23e477e86822c6 100644
--- a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
+++ b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
@@ -61,7 +61,9 @@ class InferConvInpGen(Transformation):
                 i2c_out_shape = model.get_tensor_shape(i2c_output)
                 dt = model.get_tensor_datatype(i2c_input)
                 if not dt.is_integer():
-                    warnings.warn("Input is not int. Can't infer ConvInpGen")
+                    warnings.warn(
+                        "%s : Input is not int. Can't infer ConvInpGen." % n.name
+                    )
                     continue
                 i2c_inst = getCustomOp(n)
                 stride_h, stride_w = i2c_inst.get_nodeattr("stride")
@@ -89,9 +91,10 @@ class InferConvInpGen(Transformation):
                     # if padding enabled, ensure pad_val supported by DataType
                     # assert dt.allowed(pad_val),"""FMPadding_Batch DataType
                     # must support pad_val"""
-                    assert (
-                        pad_val == 0
-                    ), "FMPadding_Batch doesn't currently support pad_val!= 0"
+                    assert pad_val == 0, (
+                        "%s : FMPadding_Batch doesn't currently support pad_val!= 0"
+                        % n.name
+                    )
 
                     odim_padding_h = ifm_dim_h + pad_h
                     odim_padding_w = ifm_dim_w + pad_w
@@ -121,6 +124,7 @@ class InferConvInpGen(Transformation):
                         NumChannels=ifm_ch,
                         inputDataType=dt.name,
                         SIMD=ifm_ch,
+                        name="FMPadding_Batch_" + n.name,
                     )
                     graph.node.insert(node_ind, padding_node)
 
@@ -134,11 +138,15 @@ class InferConvInpGen(Transformation):
                 )
 
                 if (stride_h > 1 or stride_w > 1) and is_kernel_pointwise:
-                    assert (
-                        is_square_image
-                    ), "DownSampler currently only supports square input images."
-                    assert is_equal_stride, """DownSampler currently only supports equal stride value
+                    assert is_square_image, (
+                        "%s : DownSampler currently only supports square input images."
+                        % n.name
+                    )
+                    assert is_equal_stride, (
+                        """%s : DownSampler currently only supports equal stride value
                         along different axes."""
+                        % n.name
+                    )
                     ConvInpGen_idim = ConvInpGen_idim_h
                     stride = stride_h
                     # create DownSampler node
@@ -153,6 +161,7 @@ class InferConvInpGen(Transformation):
                         SIMD=ifm_ch,
                         Stride=stride,
                         inputDataType=dt.name,
+                        name="DownSampler_" + n.name,
                     )
                     graph.node.insert(ConvInpGen_node_idx, ConvInpGen_node)
                 else:
@@ -160,12 +169,16 @@ class InferConvInpGen(Transformation):
                     if (
                         is_square_image and is_square_kernel
                     ):  # square images and square kernels
-                        assert is_equal_stride, """Non-equal strides along different axes is not supported
+                        assert is_equal_stride, (
+                            """%s: Non-equal strides along different axes is not supported
                             for (non-)square convolutions"""
-                        assert (
-                            dilation_h == 1 and dilation_w == 1
-                        ), """Dilation value != 1 is not supported
+                            % n.name
+                        )
+                        assert dilation_h == 1 and dilation_w == 1, (
+                            """%s: Dilation value != 1 is not supported
                             for square convolutions"""
+                            % n.name
+                        )
                         ConvInpGen_node = helper.make_node(
                             "ConvolutionInputGenerator",
                             [ConvInpGen_input],
@@ -182,16 +195,19 @@ class InferConvInpGen(Transformation):
                             inputDataType=dt.name,
                             outputDataType=dt.name,
                             depthwise=depthwise,
+                            name="ConvolutionInputGenerator_" + n.name,
                         )
                     else:  # non-square images and/or kernels
-                        assert (
-                            is_1d_convolution
-                        ), "ConvultionInputGenerator1D works only for 1D convolutions"
+                        assert is_1d_convolution, (
+                            "%s: ConvolutionInputGenerator1D works only for 1D convs"
+                            % n.name
+                        )
                         if dilation_h > 1 or dilation_w > 1:
-                            assert (
-                                stride_h == 1 and stride_w == 1
-                            ), """Stride value of greater than 1 is not supported for convolutions
+                            assert stride_h == 1 and stride_w == 1, (
+                                """%s: Stride value of greater than 1 is not supported for convolutions
                                 with dilation value greater than 1"""
+                                % n.name
+                            )
                         ConvInpGen_node = helper.make_node(
                             "ConvolutionInputGenerator1D",
                             [ConvInpGen_input],
@@ -208,6 +224,7 @@ class InferConvInpGen(Transformation):
                             inputDataType=dt.name,
                             outputDataType=dt.name,
                             depthwise=depthwise,
+                            name="ConvolutionInputGenerator1D_" + n.name,
                         )
                     graph.node.insert(ConvInpGen_node_idx, ConvInpGen_node)
                 # remove old nodes
@@ -219,6 +236,102 @@ class InferConvInpGen(Transformation):
         return (model, graph_modified)
 
 
+class InferUpsample(Transformation):
+    """
+    Convert Upsample and Resize nodes to layers to UpsampleNearestNeighbour_Batch nodes.
+    """
+
+    def apply(self, model):
+        graph = model.graph
+        node_ind = 0
+        graph_modified = False
+        for n in graph.node:
+            node_ind += 1
+            if n.op_type == "Upsample" or n.op_type == "Resize":
+                # Extract mode and scales and input shape
+                mode = get_by_name(n.attribute, "mode").s.decode("ascii")
+                if n.op_type == "Upsample":
+                    scales = model.get_initializer(n.input[1])
+                else:
+                    scales = model.get_initializer(n.input[2])
+                in_shape = model.get_tensor_shape(n.input[0])
+
+                dt = model.get_tensor_datatype(n.input[0])
+                if not dt.is_integer():
+                    warnings.warn(
+                        "%s: Input not int. Can't infer UpsampleNearestNeighbour."
+                        % n.name
+                    )
+                    continue
+
+                if model.get_tensor_layout(n.input[0]) != DataLayout.NHWC:
+                    warnings.warn(
+                        "%s: Input not NHWC. Can't infer UpsampleNearestNeighbour."
+                        % n.name
+                    )
+                    continue
+
+                # Check that the parameters are okay
+                assert mode == "nearest", (
+                    "%s: Upsampling is only supported for the mode nearest." % n.name
+                )
+                assert len(in_shape) == 4, "Upsampling is only supported for 4D inputs."
+                assert scales.shape == (4,), (
+                    "%s: Upsampling is only supported for 4D scales." % n.name
+                )
+                assert (scales >= 1).all(), (
+                    n.name + ": Upsampling is only supported for scales "
+                    "which are larger or equal 1 in all dimensions."
+                )
+
+                # Assumes nhwc layout for scales and input
+                assert scales[1] == scales[2], (
+                    "%s: Upsampling is only supported for quadratic scales." % n.name
+                )
+                assert scales[0] == scales[3] == 1, (
+                    n.name + ": Upsampling is only supported for scales with "
+                    "the first and last dimensions being 1."
+                )
+                spatial_scale = scales[1]
+                assert spatial_scale == int(spatial_scale), (
+                    "%s: Upsampling is only supported for integer scales." % n.name
+                )
+
+                assert in_shape[1] == in_shape[2], (
+                    "%s: Upsampling is only supported for quadratic input shapes."
+                    % n.name
+                )
+
+                # Extract information for HLS node
+                IFMDim = in_shape[1]
+                OFMDim = int(round(in_shape[1] * spatial_scale))
+                NumChannels = in_shape[-1]
+                numInputVectors = in_shape[0]
+                inputDataType = dt.name
+
+                # Insert the HLSCustomOp node
+                Upsample_HLS_node = helper.make_node(
+                    "UpsampleNearestNeighbour_Batch",
+                    [n.input[0]],
+                    [n.output[0]],
+                    domain="finn.custom_op.fpgadataflow",
+                    backend="fpgadataflow",
+                    OFMDim=OFMDim,
+                    IFMDim=IFMDim,
+                    NumChannels=NumChannels,
+                    inputDataType=inputDataType,
+                    numInputVectors=numInputVectors,
+                    name="UpsampleNearestNeighbour_Batch_" + n.name,
+                )
+
+                # Remove the old node
+                graph.node.insert(node_ind, Upsample_HLS_node)
+                # remove old nodes
+                graph.node.remove(n)
+                graph_modified = True
+        return (model, graph_modified)
+
+
 class InferStreamingMaxPool(Transformation):
     """Convert MaxPoolNHWC layers to StreamingMaxPool layers."""
 
@@ -235,25 +348,23 @@ class InferStreamingMaxPool(Transformation):
                 # mp_out_shape = model.get_tensor_shape(mp_output)
                 dt = model.get_tensor_datatype(mp_input)
                 mp_inst = getCustomOp(n)
-                # stride = mp_inst.get_nodeattr("strides")[0]
-                k = mp_inst.get_nodeattr("kernel_shape")[0]
-                # pad = mp_inst.get_nodeattr("pads")[0]
+                k_h, k_w = mp_inst.get_nodeattr("kernel_shape")
                 ifm_ch = mp_in_shape[-1]
-                ifm_dim = mp_in_shape[1]
-                # ofm_dim = mp_out_shape[1]
-                if ifm_dim % k == 0:
+                ifm_dim_h = mp_in_shape[1]
+                ifm_dim_w = mp_in_shape[2]
+                if ifm_dim_h % k_h == 0 and ifm_dim_w % k_w == 0:
                     # create equivalent StreamingMaxPool_Batch node
-                    # TODO support non-k strides
                     new_node = helper.make_node(
                         "StreamingMaxPool_Batch",
                         [mp_input],
                         [mp_output],
                         domain="finn.custom_op.fpgadataflow",
                         backend="fpgadataflow",
-                        PoolDim=k,
+                        PoolDim=(k_h, k_w),
                         NumChannels=ifm_ch,
-                        ImgDim=ifm_dim,
+                        ImgDim=(ifm_dim_h, ifm_dim_w),
                         dataType=dt.name,
+                        name="StreamingMaxPool_Batch_" + n.name,
                     )
                     graph.node.insert(node_ind, new_node)
                     # remove old nodes
@@ -276,7 +387,7 @@ class InferPool_Batch(Transformation):
         graph_modified = False
         for n in graph.node:
             node_ind += 1
-            if n.op_type in ["MaxPool", "QuantAvgPool2d"]:
+            if n.op_type in ["MaxPool", "QuantAvgPool2d", "MaxPoolNHWC"]:
                 # extract pool parameters
 
                 if n.op_type == "MaxPool":
@@ -289,6 +400,15 @@ class InferPool_Batch(Transformation):
                     k = inst.get_nodeattr("kernel")
                     stride = inst.get_nodeattr("stride")
                     dlayout = inst.get_nodeattr("data_layout")
+                elif n.op_type == "MaxPoolNHWC":
+                    inst = getCustomOp(n)
+                    k_shape = inst.get_nodeattr("kernel_shape")
+                    strides = inst.get_nodeattr("strides")
+                    assert k_shape[0] == k_shape[1]
+                    assert strides[0] == strides[1]
+                    k = k_shape[0]
+                    stride = strides[0]
+                    dlayout = "NHWC"
                 try:
                     pad = get_by_name(n.attribute, "pads").ints[-1]
                 except AttributeError:
@@ -305,7 +425,8 @@ class InferPool_Batch(Transformation):
                     continue
                 elif k == stride:
                     warnings.warn(
-                        """Inferring Pool_Batch node for k == stride.
+                        n.name
+                        + """: Inferring Pool_Batch node for k == stride.
                         This case can be optimized.
                         For example, for MaxPool run InferStreamingMaxPool before
                         InferPool_Batch """
@@ -366,7 +487,7 @@ class InferPool_Batch(Transformation):
                 accum_bits = 0
                 pool_size_param = k
                 pad_value = 0
-                if n.op_type == "MaxPool":
+                if n.op_type in ["MaxPool", "MaxPoolNHWC"]:
                     pool_fxn = "MaxPool"
                     odt = idt
                     pad_value = idt.min()
@@ -396,6 +517,7 @@ class InferPool_Batch(Transformation):
                     pad_value=pad_value,
                     depthwise=1,
                     input_shape="(1,{},{},{})".format(ifm_dim, ifm_dim, ifm_ch),
+                    name="Im2Col_" + n.name,
                 )
 
                 # Warning PE has to be equal to ifm_ch until Im2Col is replaced by
@@ -418,6 +540,7 @@ class InferPool_Batch(Transformation):
                     AccumBits=accum_bits,
                     Size=pool_size_param,
                     BatchSize=1,
+                    name="Pool_Batch_" + n.name,
                 )
 
                 if dlayout == "NCHW":
@@ -466,16 +589,18 @@ class InferBinaryStreamingFCLayer(Transformation):
                 mm_output = n.output[0]
                 mm_in_shape = model.get_tensor_shape(mm_input)
                 mm_out_shape = model.get_tensor_shape(mm_output)
-                assert (
-                    model.get_tensor_datatype(mm_input) == DataType.BINARY
-                ), """First
+                assert model.get_tensor_datatype(mm_input) == DataType["BINARY"], (
+                    n.name
+                    + """: First
                 input for xnorpopcount is not set to FINN DataType BINARY."""
-                assert (
-                    model.get_tensor_datatype(mm_weight) == DataType.BINARY
-                ), """Second
+                )
+                assert model.get_tensor_datatype(mm_weight) == DataType["BINARY"], (
+                    n.name
+                    + """: Second
                 input (weights) for xnorpopcount is not set to FINN DataType BINARY."""
-                idt = DataType.BINARY
-                wdt = DataType.BINARY
+                )
+                idt = DataType["BINARY"]
+                wdt = DataType["BINARY"]
                 mm_output = n.output[0]
                 W = model.get_initializer(mm_weight)
                 # extract weight shape, note that ONNX and finn-hlslib
@@ -487,13 +612,12 @@ class InferBinaryStreamingFCLayer(Transformation):
                 # create node with no parallelization first
                 pe = 1
                 simd = 1
-                assert mh % pe == 0, "Requirement MH divisable by PE is violated."
-                assert mw % simd == 0, "Requirement MW divisable by SIMD is violated."
                 wmem = mw * mh // (pe * simd)
-                assert (
-                    mw * mh == wmem * pe * simd
-                ), """Requirement (MW * MH) divisiable by
+                assert mw * mh == wmem * pe * simd, (
+                    n.name
+                    + """: Requirement (MW * MH) divisiable by
                 (WMEM * PE * SIMD) is violated."""
+                )
                 # see if we have any following thresholds
                 consumer = model.find_consumer(mm_output)
                 if consumer is not None and consumer.op_type == "MultiThreshold":
@@ -503,10 +627,11 @@ class InferBinaryStreamingFCLayer(Transformation):
                     mt_out_shape = model.get_tensor_shape(mt_output)
                     mt_thres = consumer.input[1]
                     T = model.get_initializer(mt_thres)
-                    assert (
-                        T.shape[0] == 1 or T.shape[0] == mh
-                    ), """First dimension of
+                    assert T.shape[0] == 1 or T.shape[0] == mh, (
+                        consumer.name
+                        + """: First dimension of
                     thresholds neither 1 nor MH."""
+                    )
                     odt = model.get_tensor_datatype(mt_output)
                     if odt.bitwidth() == 1:
                         # covers both bipolar and binary
@@ -534,6 +659,7 @@ class InferBinaryStreamingFCLayer(Transformation):
                         noActivation=0,
                         numInputVectors=list(mm_in_shape[:-1]),
                         mem_mode=self.mem_mode,
+                        name=n.name,
                     )
                     graph.node.insert(node_ind, new_node)
                     # remove old nodes
@@ -564,6 +690,7 @@ class InferBinaryStreamingFCLayer(Transformation):
                         noActivation=1,
                         numInputVectors=list(mm_in_shape[:-1]),
                         mem_mode=self.mem_mode,
+                        name=n.name,
                     )
                     graph.node.insert(node_ind, new_node)
                     # remove old node
@@ -611,15 +738,12 @@ class InferQuantizedStreamingFCLayer(Transformation):
                     # create node with no parallelization first
                     pe = 1
                     simd = 1
-                    assert mh % pe == 0, "Requirement MH divisable by PE is violated."
-                    assert (
-                        mw % simd == 0
-                    ), "Requirement MW divisable by SIMD is violated."
                     wmem = mw * mh // (pe * simd)
-                    assert (
-                        mw * mh == wmem * pe * simd
-                    ), """Requirement (MW * MH) divisible by
+                    assert mw * mh == wmem * pe * simd, (
+                        n.name
+                        + """: Requirement (MW * MH) divisible by
                     (WMEM * PE * SIMD) is violated."""
+                    )
                     # see if we have any following thresholds
                     consumer = model.find_consumer(mm_output)
                     if consumer is not None and consumer.op_type == "MultiThreshold":
@@ -629,27 +753,30 @@ class InferQuantizedStreamingFCLayer(Transformation):
                         mt_out_shape = model.get_tensor_shape(mt_output)
                         mt_thres = consumer.input[1]
                         T = model.get_initializer(mt_thres)
-                        assert (
-                            T.shape[0] == 1 or T.shape[0] == mh
-                        ), """First dimension of
+                        assert T.shape[0] == 1 or T.shape[0] == mh, (
+                            consumer.name
+                            + """: First dimension of
                         thresholds neither 1 nor MH."""
+                        )
                         odt = model.get_tensor_datatype(mt_output)
                         scale = getCustomOp(consumer).get_nodeattr("out_scale")
                         actval = getCustomOp(consumer).get_nodeattr("out_bias")
-                        assert (
-                            int(actval) == actval
-                        ), "out_bias must be integer for HLS conversion."
+                        assert int(actval) == actval, (
+                            consumer.name
+                            + ": out_bias must be integer for HLS conversion."
+                        )
                         actval = int(actval)
-                        odt_is_bipolar = odt == DataType.BIPOLAR
+                        odt_is_bipolar = odt == DataType["BIPOLAR"]
                         bipolar_ok = (
                             odt_is_bipolar and (scale == 2.0) and (actval == -1)
                         )
-                        assert (
-                            scale == 1.0 or bipolar_ok
-                        ), "out_scale = 1.0 or bipolar output needed for conversion."
-                        assert (not odt.signed()) or (
-                            actval < 0
-                        ), "Signed output requres actval < 0"
+                        assert scale == 1.0 or bipolar_ok, (
+                            consumer.name
+                            + ": out_scale=1 or bipolar output needed for conversion."
+                        )
+                        assert (not odt.signed()) or (actval < 0), (
+                            consumer.name + ": Signed output requres actval < 0"
+                        )
                         model.set_tensor_shape(mm_input, mm_in_shape)
                         model.set_tensor_shape(mt_output, mt_out_shape)
                         if bipolar_ok:
@@ -675,6 +802,7 @@ class InferQuantizedStreamingFCLayer(Transformation):
                             noActivation=0,
                             numInputVectors=list(mm_in_shape[:-1]),
                             mem_mode=self.mem_mode,
+                            name="StreamingFCLayer_Batch_" + n.name,
                         )
                         graph.node.insert(node_ind, new_node)
                         # remove old nodes
@@ -705,6 +833,7 @@ class InferQuantizedStreamingFCLayer(Transformation):
                             noActivation=1,
                             numInputVectors=list(mm_in_shape[:-1]),
                             mem_mode=self.mem_mode,
+                            name="StreamingFCLayer_Batch_" + n.name,
                         )
                         graph.node.insert(node_ind, new_node)
                         # remove old node
@@ -739,7 +868,8 @@ class InferVVAU(Transformation):
                     k_h, k_w = sparsity["dw"]["kernel_shape"]
                 except KeyError:
                     raise Exception(
-                        """Sparsity doesn't indicate that MatMul
+                        n.name
+                        + """: sparsity annotation doesn't indicate that MatMul
                         belongs to a depthwise convolution."""
                     )
 
@@ -775,9 +905,6 @@ class InferVVAU(Transformation):
                     model.set_tensor_shape(mm_weight, (channels, 1, k_h, k_w))
                     # create node with pe=channels as default
                     pe = channels
-                    assert (
-                        channels % pe == 0
-                    ), "Requirement Channels divisable by PE is violated."
                     # see if we have any following thresholds
                     consumer = model.find_consumer(mm_output)
                     if consumer is not None and consumer.op_type == "MultiThreshold":
@@ -786,23 +913,26 @@ class InferVVAU(Transformation):
                         mt_out_shape = model.get_tensor_shape(mt_output)
                         mt_thres = consumer.input[1]
                         T = model.get_initializer(mt_thres)
-                        assert (
-                            T.shape[0] == 1 or T.shape[0] == channels
-                        ), """First dimension of
+                        assert T.shape[0] == 1 or T.shape[0] == channels, (
+                            consumer.name
+                            + """: First dimension of
                         thresholds neither 1 nor Channels."""
+                        )
                         odt = model.get_tensor_datatype(mt_output)
                         scale = getCustomOp(consumer).get_nodeattr("out_scale")
-                        assert (
-                            scale == 1.0
-                        ), "out_scale must be equal to 1.0 for HLS conversion."
+                        assert scale == 1.0, (
+                            consumer.name
+                            + ": out_scale must be equal to 1.0 for HLS conversion."
+                        )
                         actval = getCustomOp(consumer).get_nodeattr("out_bias")
-                        assert (
-                            int(actval) == actval
-                        ), "out_bias must be integer for HLS conversion."
+                        assert int(actval) == actval, (
+                            consumer.name
+                            + ": out_bias must be integer for HLS conversion."
+                        )
                         actval = int(actval)
-                        assert (not odt.signed()) or (
-                            actval < 0
-                        ), "Signed output requres actval < 0"
+                        assert (not odt.signed()) or (actval < 0), (
+                            consumer.name + ": Signed output requres actval < 0"
+                        )
                         model.set_tensor_shape(mm_input, mm_in_shape)
                         model.set_tensor_shape(mt_output, mt_out_shape)
                         # create and insert new Vector_Vector_Activate_Batch node
@@ -822,6 +952,7 @@ class InferVVAU(Transformation):
                             outputDataType=odt.name,
                             ActVal=actval,
                             noActivation=0,
+                            name="Vector_Vector_Activate_Batch_" + n.name,
                         )
                         graph.node.insert(node_ind, new_node)
                         # remove old nodes
@@ -850,6 +981,7 @@ class InferVVAU(Transformation):
                             outputDataType=odt.name,
                             ActVal=0,
                             noActivation=1,
+                            name="Vector_Vector_Activate_Batch_" + n.name,
                         )
                         graph.node.insert(node_ind, new_node)
                         # remove old node
@@ -907,21 +1039,22 @@ class InferThresholdingLayer(Transformation):
                 ifc = int(thl_in_shape[-1])
                 # create node with no parallelization first
                 pe = 1
-                assert ifc % pe == 0, "Requirement IFC divisable by PE is violated."
 
                 odt = model.get_tensor_datatype(thl_output)
                 scale = getCustomOp(node).get_nodeattr("out_scale")
-                assert (
-                    scale == 1.0
-                ), "MultiThreshold out_scale must be equal to 1.0 for HLS conversion."
+                assert scale == 1.0, (
+                    node.name
+                    + ": MultiThreshold out_scale must be 1 for HLS conversion."
+                )
                 actval = getCustomOp(node).get_nodeattr("out_bias")
-                assert (
-                    int(actval) == actval
-                ), "MultiThreshold out_bias must be integer for HLS conversion."
+                assert int(actval) == actval, (
+                    node.name
+                    + ": MultiThreshold out_bias must be integer for HLS conversion."
+                )
                 actval = int(actval)
-                assert (not odt.signed()) or (
-                    actval < 0
-                ), "Signed output requres actval < 0"
+                assert (not odt.signed()) or (actval < 0), (
+                    node.name + ": Signed output requres actval < 0"
+                )
                 # create and insert new Thresholding_Batch node
                 new_node = helper.make_node(
                     "Thresholding_Batch",
@@ -938,6 +1071,7 @@ class InferThresholdingLayer(Transformation):
                     numInputVectors=list(thl_in_shape[:-1]),
                     ActVal=actval,
                     mem_mode=self.mem_mode,
+                    name="Thresholding_Batch_" + node.name,
                 )
                 graph.node.insert(insert_point, new_node)
                 # remove old node
@@ -1011,9 +1145,6 @@ class InferAddStreamsLayer(Transformation):
                 num_channels = int(in0_shape[-1])
                 # create node with no parallelization first
                 pe = 1
-                assert (
-                    num_channels % pe == 0
-                ), "Requirement Channels divisable by PE is violated."
 
                 # create and insert new StreamingFCLayer node
                 new_node = helper.make_node(
@@ -1026,6 +1157,7 @@ class InferAddStreamsLayer(Transformation):
                     PE=pe,
                     inputDataType=idt.name,
                     numInputVectors=in0_shape[:-1],
+                    name="AddStreams_Batch_" + node.name,
                 )
                 graph.node.insert(insert_point, new_node)
                 # remove old node
@@ -1072,9 +1204,6 @@ class InferDuplicateStreamsLayer(Transformation):
 
                 # create node with no parallelization first
                 pe = 1
-                assert (
-                    num_ch % pe == 0
-                ), "Requirement channels divisable by PE is violated."
 
                 dup_node = helper.make_node(
                     "DuplicateStreams_Batch",
@@ -1086,6 +1215,7 @@ class InferDuplicateStreamsLayer(Transformation):
                     PE=pe,
                     inputDataType=dt.name,
                     numInputVectors=vecs,
+                    name="DuplicateStreams_Batch_" + node.name,
                 )
 
                 graph.node.insert(node_ind, dup_node)
@@ -1121,10 +1251,10 @@ class InferChannelwiseLinearLayer(Transformation):
         for v in vals:
             assert int(v) == v, "Error float value"
 
-        for k in DataType.__members__:
+        for k in DataType.get_accumulator_dt_cands():
             dt = DataType[k]
 
-            if dt in [DataType.BIPOLAR, DataType.TERNARY, DataType.FLOAT32]:
+            if dt in [DataType["BIPOLAR"], DataType["TERNARY"], DataType["FLOAT32"]]:
                 # not currently supported
                 continue
 
@@ -1140,9 +1270,9 @@ class InferChannelwiseLinearLayer(Transformation):
         )
 
         if (0 <= vals).all():
-            return DataType.UINT64
+            return DataType["UINT64"]
         else:
-            return DataType.INT64
+            return DataType["INT64"]
 
     def apply(self, model):
         graph = model.graph
@@ -1254,6 +1384,7 @@ class InferChannelwiseLinearLayer(Transformation):
                     paramDataType=pdt.name,
                     outputDataType=odt.name,
                     numInputVectors=list(ll_in_shape[:-1]),
+                    name="ChannelwiseOp_Batch_" + node.name,
                 )
                 graph.node.insert(insert_point, new_node)
                 # remove old node
@@ -1296,9 +1427,6 @@ class InferLabelSelectLayer(Transformation):
                 num_inp_vecs = list(fc_in_shape[:-1])
                 # create node with no parallelization first
                 pe = 1
-                assert (
-                    num_labels % pe == 0
-                ), "Requirement Labels divisable by PE is violated."
 
                 k = model.get_initializer(k_input)[0]
 
@@ -1314,6 +1442,7 @@ class InferLabelSelectLayer(Transformation):
                     K=k,
                     inputDataType=idt.name,
                     numInputVectors=num_inp_vecs,
+                    name="LabelSelect_Batch_" + node.name,
                 )
                 graph.node.insert(node_ind, new_node)
                 # remove old node
@@ -1367,9 +1496,6 @@ class InferGlobalAccPoolLayer(Transformation):
                 vecs = in0_shape[:-1]
                 # create node with no parallelization first
                 pe = 1
-                assert (
-                    num_ch % pe == 0
-                ), "Requirement Labels divisable by PE is violated."
 
                 # create an additional tensor of the same shape and layout as result
                 out_shape = model.get_tensor_shape(result)
@@ -1390,6 +1516,7 @@ class InferGlobalAccPoolLayer(Transformation):
                     PE=pe,
                     inputDataType=idt.name,
                     numInputVectors=vecs,
+                    name="GlobalAccPool_Batch_" + node.name,
                 )
 
                 mul_value = helper.make_tensor_value_info(
@@ -1413,3 +1540,56 @@ class InferGlobalAccPoolLayer(Transformation):
             model = model.transform(InferShapes())
             model = model.transform(InferDataTypes())
         return (model, graph_modified)
+
+
+class InferLookupLayer(Transformation):
+    """Convert Gather nodes with constant op0 into Lookup HLS layers."""
+
+    def apply(self, model):
+        graph = model.graph
+        node_ind = 0
+        graph_modified = False
+        for node in graph.node:
+            node_ind += 1
+            if node.op_type == "Gather":
+                emb_name = node.input[0]
+                embs = model.get_initializer(emb_name)
+                axis = get_by_name(node.attribute, "axis")
+                # skip conversion if input0 is not constant
+                if embs is None:
+                    continue
+                # skip conversion if axis != 0
+                if axis is not None and axis.i != 0:
+                    continue
+                ind_name = node.input[1]
+                ind_dtype = model.get_tensor_datatype(ind_name)
+                emb_dtype = model.get_tensor_datatype(emb_name)
+                # skip conversion if inputs are not unsigned integers
+                if (not ind_dtype.is_integer()) or ind_dtype.signed():
+                    continue
+                num_embs, emb_dim = embs.shape
+                out_name = node.output[0]
+                ishape = model.get_tensor_shape(node.input[1])
+                # create and insert new Lookup node
+                new_node = helper.make_node(
+                    "Lookup",
+                    [ind_name, emb_name],
+                    [out_name],
+                    domain="finn.custom_op.fpgadataflow",
+                    backend="fpgadataflow",
+                    name="Lookup_" + node.name,
+                    NumEmbeddings=num_embs,
+                    EmbeddingDim=emb_dim,
+                    EmbeddingType=emb_dtype.name,
+                    InputType=ind_dtype.name,
+                    InputShape=list(ishape),
+                )
+                graph.node.insert(node_ind, new_node)
+                # remove old node
+                graph.node.remove(node)
+                graph_modified = True
+
+        if graph_modified:
+            model = model.transform(InferShapes())
+            model = model.transform(InferDataTypes())
+        return (model, graph_modified)
diff --git a/src/finn/transformation/fpgadataflow/create_dataflow_partition.py b/src/finn/transformation/fpgadataflow/create_dataflow_partition.py
index 0aba60f9b6f08210c40f305694495b77f517f323..9b2577bc2b863e1075fc3252412ff1001b955cda 100644
--- a/src/finn/transformation/fpgadataflow/create_dataflow_partition.py
+++ b/src/finn/transformation/fpgadataflow/create_dataflow_partition.py
@@ -26,11 +26,11 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import copy
-from onnx import helper
-
+from finn.core.modelwrapper import ModelWrapper
 from finn.custom_op.registry import getCustomOp
 from finn.transformation.base import Transformation
+from finn.transformation.create_generic_partitions import PartitionFromLambda
+from finn.transformation.fpgadataflow.externalize_params import ExternalizeParams
 from finn.util.basic import get_by_name, make_build_dir
 
 
@@ -41,120 +41,76 @@ class CreateDataflowPartition(Transformation):
     that indicates the filename for the second graph that only contains
     dataflow nodes. No action is taken if there are no dataflow nodes."""
 
-    def __init__(self):
+    def __init__(self, partition_model_dir=None):
         super().__init__()
+        if partition_model_dir is None:
+            self.partition_model_dir = make_build_dir("dataflow_partition_")
+        else:
+            self.partition_model_dir = partition_model_dir
 
     def apply(self, model):
-        target_partition_id = 0
-        # we currently assume that all dataflow nodes belonging to the same partition
-        # are connected to each other and there is a single input/output to/from each.
-        # NOTE: all dataflow nodes with no partition_id set are moved to partition 0
-        # TODO: check the assumption and/or improve this.
-        while True:
-            all_nodes = list(model.graph.node)
-            df_nodes = filter(
-                lambda x: get_by_name(x.attribute, "backend") is not None, all_nodes
-            )
-            df_nodes = filter(
-                lambda x: get_by_name(x.attribute, "backend").s.decode("UTF-8")
-                == "fpgadataflow"
-                and (
-                    get_by_name(x.attribute, "partition_id") is None
-                    or get_by_name(x.attribute, "partition_id").i == target_partition_id
-                )
-                and x.op_type != "StreamingDataflowPartition",
-                df_nodes,
-            )
-            df_nodes = list(df_nodes)
-            non_df_nodes = filter(lambda x: x not in df_nodes, all_nodes)
-            non_df_nodes = list(non_df_nodes)
-
-            if len(df_nodes) == 0:
-                # no changes if no dataflow nodes are present
-                break
-            else:
-                # partition the model into two models
-                df_model = copy.deepcopy(model)
-                non_df_model = model
-                # remove all non-dataflow nodes from the dataflow model
-                for node_to_remove in non_df_nodes:
-                    df_model.graph.node.remove(node_to_remove)
-                # identify the entry and exit points for the dataflow part
-                df_in = df_model.graph.node[0].input[0]
-                df_out = df_model.graph.node[-1].output[0]
-                df_in_vi = df_model.get_tensor_valueinfo(df_in)
-                df_out_vi = df_model.get_tensor_valueinfo(df_out)
-                # set df graph in/out to be df_in/df_out
-                df_model.graph.input.remove(df_model.graph.input[0])
-                df_model.graph.input.insert(0, df_in_vi)
-                df_model.graph.output.remove(df_model.graph.output[0])
-                df_model.graph.output.insert(0, df_out_vi)
-                # parse StreamingFCLayers looking for external weight memories
-                fc_extw_nodes = filter(
-                    lambda x: x.op_type == "StreamingFCLayer_Batch"
-                    and get_by_name(x.attribute, "mem_mode") is not None
-                    and get_by_name(x.attribute, "mem_mode").s.decode("UTF-8")
-                    == "external",
-                    df_nodes,
-                )
-                fc_extw_nodes = list(fc_extw_nodes)
-                extra_df_inputs = []
+        def filter_fc_extw(x):
+            if x.op_type == "IODMA":
+                burst_mode = get_by_name(x.attribute, "burstMode")
+                if burst_mode is not None:
+                    burst_mode = burst_mode.s.decode("UTF-8")
+                    return burst_mode == "wrap"
 
-                for i in range(len(fc_extw_nodes)):
-                    fc_weight_vi = df_model.get_tensor_valueinfo(
-                        fc_extw_nodes[i].input[1]
-                    )
-                    df_model.graph.input.insert(i + 1, fc_weight_vi)
-                    extra_df_inputs.append(fc_extw_nodes[i].input[1])
+        extw_dma_nodes = list(filter(filter_fc_extw, model.graph.node))
+        if len(extw_dma_nodes) > 0:
+            model = model.transform(ExternalizeParams())
 
-                # save model
-                df_model_dir = make_build_dir(
-                    "dataflow_partition" + str(target_partition_id) + "_"
-                )
-                df_model_filename = df_model_dir + "/df_model.onnx"
-                df_model.cleanup()
-                df_model.save(df_model_filename)
-                # remove all dataflow nodes from the non-dataflow model
-                # keep track of where the dataflow part starts
-                df_start_ind = all_nodes.index(df_nodes[0])
-
-                # get and check floorplan
-                inst = getCustomOp(df_nodes[0])
-                slr = inst.get_nodeattr("slr")
-                for node in df_nodes[1:]:
-                    inst = getCustomOp(node)
-                    assert slr == inst.get_nodeattr(
-                        "slr"
-                    ), """all nodes with
-                same partition_id must have the same slr id"""
-
-                # check that there is only one non-null mem_port per partition
-                nmemports = 0
-                mem_port = ""
-                for node in df_nodes:
-                    inst = getCustomOp(node)
-                    port = inst.get_nodeattr("mem_port")
-                    if port is not None and port != "":
-                        nmemports += 1
-                        mem_port = port
-                assert nmemports <= 1, """too many memory ports per partition"""
+        def assign_partition_id(node):
+            if node.op_type in ["GenericPartition", "StreamingDataflowPartition"]:
+                return -1
+            else:
+                backend = get_by_name(node.attribute, "backend")
+                if backend is not None and backend.s.decode("UTF-8") == "fpgadataflow":
+                    assigned_partition = get_by_name(node.attribute, "partition_id")
+                    if assigned_partition is not None:
+                        return assigned_partition.i
+                    else:
+                        return 0
+                else:
+                    return -1
 
-                for node_to_remove in df_nodes:
-                    non_df_model.graph.node.remove(node_to_remove)
-                # create StreamingDataflow node with df_in/df_out io
-                df_node = helper.make_node(
-                    "StreamingDataflowPartition",
-                    [df_in] + extra_df_inputs,
-                    [df_out],
-                    # use the model attribute to mark the df model
-                    model=df_model_filename,
-                    domain="finn.custom_op.fpgadataflow",
-                    partition_id=target_partition_id,
-                    slr=slr,
-                    mem_port=mem_port,
-                )
-                non_df_model.graph.node.insert(df_start_ind, df_node)
-                model = non_df_model
-                target_partition_id += 1
+        # first, use the generic partitioning functionality to split up the graph
+        parent_model = model.transform(
+            PartitionFromLambda(
+                partitioning=assign_partition_id, partition_dir=self.partition_model_dir
+            )
+        )
+        # change node types to StreamingDataflowPartition
+        p_nodes = parent_model.get_nodes_by_op_type("GenericPartition")
+        for partition_ind, p_node in enumerate(p_nodes):
+            # go into partition to extract some info
+            p_node_inst = getCustomOp(p_node)
+            node_model_filename = p_node_inst.get_nodeattr("model")
+            p_model = ModelWrapper(node_model_filename)
+            # check floorplan (SLR assignment per node)
+            inst = getCustomOp(p_model.graph.node[0])
+            slr = inst.get_nodeattr("slr")
+            for node in p_model.graph.node:
+                inst = getCustomOp(node)
+                assert slr == inst.get_nodeattr(
+                    "slr"
+                ), """all nodes with same partition_id must have the same slr id"""
+            # check that there is only one non-null mem_port per partition
+            nmemports = 0
+            mem_port = ""
+            for node in p_model.graph.node:
+                inst = getCustomOp(node)
+                port = inst.get_nodeattr("mem_port")
+                if port is not None and port != "":
+                    nmemports += 1
+                    mem_port = port
+            assert nmemports <= 1, """Too many memory ports per partition"""
+            # done, change node type and add info in parent graph
+            p_node.op_type = "StreamingDataflowPartition"
+            p_node.domain = "finn.custom_op.fpgadataflow"
+            new_p_node_inst = getCustomOp(p_node)
+            new_p_node_inst.set_nodeattr("partition_id", partition_ind)
+            new_p_node_inst.set_nodeattr("slr", slr)
+            new_p_node_inst.set_nodeattr("mem_port", mem_port)
 
-        return (model, False)
+        return (parent_model, False)
diff --git a/src/finn/transformation/fpgadataflow/create_stitched_ip.py b/src/finn/transformation/fpgadataflow/create_stitched_ip.py
index 7ae3c9fee7a16e744c8419abb0256b51a1922c52..19688d3a570fd503edf96d8329b9f2765988ccfb 100644
--- a/src/finn/transformation/fpgadataflow/create_stitched_ip.py
+++ b/src/finn/transformation/fpgadataflow/create_stitched_ip.py
@@ -86,11 +86,6 @@ class CreateStitchedIP(Transformation):
         self.clk_ns = clk_ns
         self.ip_name = ip_name
         self.vitis = vitis
-        if float(clk_ns) not in [5.0, 10.0, 20.0]:
-            warnings.warn(
-                """The chosen frequency may lead to failure due to clock divider
-                constraints."""
-            )
         self.has_aximm = False
         self.has_m_axis = False
         self.m_axis_idx = 0
@@ -221,6 +216,13 @@ class CreateStitchedIP(Transformation):
         ip_dirs = ["list"]
         # add RTL streamer IP
         ip_dirs.append("/workspace/finn/finn-rtllib/memstream")
+        if model.graph.node[0].op_type not in ["StreamingFIFO", "IODMA"]:
+            warnings.warn(
+                """First node is not StreamingFIFO or IODMA.
+                You may experience incorrect stitched-IP rtlsim or hardware
+                behavior. It is strongly recommended to insert FIFOs prior to
+                calling CreateStitchedIP."""
+            )
         # ensure that all nodes are fpgadataflow, and that IPs are generated
         for node in model.graph.node:
             assert is_fpgadataflow_node(
@@ -330,12 +332,13 @@ class CreateStitchedIP(Transformation):
         )
         tcl.append("set_property core_revision 2 [ipx::find_open_core %s]" % block_vlnv)
         tcl.append("ipx::create_xgui_files [ipx::find_open_core %s]" % block_vlnv)
+        # mark bus interface params as user-resolvable to avoid FREQ_MHZ mismatches
+        tcl.append(
+            "set_property value_resolve_type user [ipx::get_bus_parameters "
+            "-of [ipx::get_bus_interfaces -of [ipx::current_core ]]]"
+        )
         # if targeting Vitis, add some properties to the IP
         if self.vitis:
-            tcl.append(
-                "set_property value_resolve_type user [ipx::get_bus_parameters "
-                "-of [ipx::get_bus_interfaces -of [ipx::current_core ]]]"
-            )
             # replace source code with dcp
             tcl.append(
                 "set_property sdx_kernel true [ipx::find_open_core %s]" % block_vlnv
diff --git a/src/finn/transformation/fpgadataflow/externalize_params.py b/src/finn/transformation/fpgadataflow/externalize_params.py
new file mode 100644
index 0000000000000000000000000000000000000000..dcb66a8538fdff46214c23491f48a59459625082
--- /dev/null
+++ b/src/finn/transformation/fpgadataflow/externalize_params.py
@@ -0,0 +1,75 @@
+# Copyright (c) 2021, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+from finn.transformation.base import Transformation
+from finn.util.basic import get_by_name
+
+
+class ExternalizeParams(Transformation):
+    """Create top-level graph inputs for IODMAs serving layers where weights are
+    marked as external using mem_mode="external"."""
+
+    def __init__(self):
+        super().__init__()
+
+    def apply(self, model):
+        graph_modified = False
+
+        def filter_fc_extw(x):
+            if x.op_type == "IODMA":
+                burst_mode = get_by_name(x.attribute, "burstMode")
+                if burst_mode is not None:
+                    burst_mode = burst_mode.s.decode("UTF-8")
+                    return burst_mode == "wrap"
+
+        dma_extw_nodes = list(filter(filter_fc_extw, model.graph.node))
+
+        for dma_extw in dma_extw_nodes:
+            extw_tensor_name = dma_extw.input[0]
+            extw_tensor_name_out = dma_extw.output[0]
+            if extw_tensor_name in [x.name for x in model.graph.input]:
+                continue
+            else:
+                extw_vi = model.get_tensor_valueinfo(extw_tensor_name)
+                assert extw_vi is not None
+                model.graph.value_info.remove(extw_vi)
+                model.graph.input.append(extw_vi)
+                iodma_init = model.get_initializer(extw_vi.name)
+                assert iodma_init is not None
+                # remove output-side initializer to get correct dataflow partitioning
+                model.graph.initializer.remove(
+                    [
+                        x
+                        for x in model.graph.initializer
+                        if x.name == extw_tensor_name_out
+                    ][0]
+                )
+                graph_modified = True
+
+        return (model, graph_modified)
diff --git a/src/finn/transformation/fpgadataflow/insert_fifo.py b/src/finn/transformation/fpgadataflow/insert_fifo.py
index ef56db6376703ce1eb0134c173de61a562bca6e6..c8bb716922823876f5f16ffe62f17c425d49aa74 100644
--- a/src/finn/transformation/fpgadataflow/insert_fifo.py
+++ b/src/finn/transformation/fpgadataflow/insert_fifo.py
@@ -180,49 +180,50 @@ class InsertFIFO(Transformation):
                 n.input[0] = fifo_output_tensor.name
 
             # insert FIFO as last node, except when last node is DMA
-            if (
-                graph.node[-1].op_type != "StreamingFIFO"
-                and graph.node[-1].op_type != "IODMA"
-            ):
-                n = graph.node[-1]
-                assert (
-                    n.op_type != "TLastMarker"
-                ), """Insert tlast marker should be done
-                    after inserting the FIFOs"""
-                graph_out_name = graph.output[0].name
-                n0 = getCustomOp(n)
-                # determine fifo node attributes
-                fld_shape = n0.get_folded_output_shape()
-                dtype = n0.get_output_datatype()
-                fifo_depth = n0.get_nodeattr("outFIFODepth")
-
-                if fifo_depth <= 2:
-                    warnings.warn("Overriding output FIFO depth to 32")
-                    fifo_depth = 32
-
-                # create fifo node
-                fifo_input_tensor = oh.make_tensor_value_info(
-                    model.make_new_valueinfo_name(),
-                    TensorProto.FLOAT,
-                    n0.get_normal_output_shape(),
-                )
-                graph.value_info.append(fifo_input_tensor)
-                model.set_tensor_datatype(fifo_input_tensor.name, dtype)
-
-                fifo_node = oh.make_node(
-                    "StreamingFIFO",
-                    [fifo_input_tensor.name],
-                    [graph_out_name],
-                    domain="finn.custom_op.fpgadataflow",
-                    backend="fpgadataflow",
-                    depth=fifo_depth,
-                    folded_shape=fld_shape,
-                    dataType=str(dtype.name),
-                )
-                # insert fifo
-                graph.node.append(fifo_node)
-
-                # set fifo output tensor as new input tensor of second node
-                n.output[0] = fifo_input_tensor.name
+            graph_out_names = [x.name for x in model.graph.output]
+            for graph_out_name in graph_out_names:
+                final_node = model.find_producer(graph_out_name)
+                if (
+                    final_node.op_type != "StreamingFIFO"
+                    and final_node.op_type != "IODMA"
+                ):
+                    assert (
+                        final_node.op_type != "TLastMarker"
+                    ), """Insert tlast marker should be done
+                        after inserting the FIFOs"""
+                    n0 = getCustomOp(final_node)
+                    # determine fifo node attributes
+                    fld_shape = n0.get_folded_output_shape()
+                    dtype = n0.get_output_datatype()
+                    fifo_depth = n0.get_nodeattr("outFIFODepth")
+
+                    if fifo_depth <= 2:
+                        warnings.warn("Overriding output FIFO depth to 32")
+                        fifo_depth = 32
+
+                    # create fifo node
+                    fifo_input_tensor = oh.make_tensor_value_info(
+                        model.make_new_valueinfo_name(),
+                        TensorProto.FLOAT,
+                        n0.get_normal_output_shape(),
+                    )
+                    graph.value_info.append(fifo_input_tensor)
+                    model.set_tensor_datatype(fifo_input_tensor.name, dtype)
+
+                    fifo_node = oh.make_node(
+                        "StreamingFIFO",
+                        [fifo_input_tensor.name],
+                        [graph_out_name],
+                        domain="finn.custom_op.fpgadataflow",
+                        backend="fpgadataflow",
+                        depth=fifo_depth,
+                        folded_shape=fld_shape,
+                        dataType=str(dtype.name),
+                    )
+                    # insert fifo
+                    graph.node.append(fifo_node)
+
+                    # set fifo output tensor as new input tensor of second node
+                    final_node.output[0] = fifo_input_tensor.name
 
         return (model, graph_modified)
diff --git a/src/finn/transformation/fpgadataflow/insert_iodma.py b/src/finn/transformation/fpgadataflow/insert_iodma.py
index d4b2a1032aeb305c85ffb535ac821692ce747c18..d0ef270816c362af730a75b59be71d0457e0b8e2 100644
--- a/src/finn/transformation/fpgadataflow/insert_iodma.py
+++ b/src/finn/transformation/fpgadataflow/insert_iodma.py
@@ -87,6 +87,7 @@ class InsertIODMA(Transformation):
         return reshaped_w
 
     def apply(self, model):
+        modified = False
         # only makes sense for a pure fpgadataflow graph -- so we check!
         all_nodes = list(model.graph.node)
         assert all(
@@ -102,59 +103,14 @@ class InsertIODMA(Transformation):
                 all_nodes,
             )
         )
-        graph_in_name = model.graph.input[0].name
-        first_node = model.find_consumer(graph_in_name)
-        graph_out_name = model.graph.output[0].name
-        final_node = model.find_producer(graph_out_name)
-        if (
-            final_node.op_type == "IODMA"
-            and first_node.op_type == "IODMA"
-            and len(fc_extw_nodes) == 0
-        ):
-            # TODO maybe check the correctness of properties
-            return (model, False)
-        else:
-            if final_node.op_type != "IODMA":
-                out_shape = model.get_tensor_shape(graph_out_name)
-                out_dtype = model.get_tensor_datatype(graph_out_name)
-                final_node_inst = getCustomOp(final_node)
-                out_folded_shape = final_node_inst.get_folded_output_shape()
-                # take advantage of AXI stream width padding for DMA alignment
-                # (AXI streams are always padded to 8 bits)
-                # this is the width of stream input to DMA
-                padded_outstream_width = final_node_inst.get_outstream_width_padded()
-                padded_outstream_bytes = padded_outstream_width // 8
-                # determine the feasible interface width
-                transfer_bits = padded_outstream_width * np.prod(out_folded_shape[:-1])
-                intfwidth = math.gcd(transfer_bits, self.max_intfwidth)
-                assert (
-                    intfwidth % 8 == 0
-                ), "No feasible interface width for transfer size"
-                # make new buffer
-                final_node_out = oh.make_tensor_value_info(
-                    model.make_new_valueinfo_name(), TensorProto.FLOAT, out_shape
-                )
-                model.graph.value_info.append(final_node_out)
-                model.set_tensor_datatype(final_node_out.name, out_dtype)
-                # reroute final node output to final_node_out_name
-                final_node.output[0] = final_node_out.name
-                # FIXME: currently always using 8-bit dtypes to work around the
-                # padding problems for i/o DMA
-                dma_node = oh.make_node(
-                    "IODMA",
-                    [final_node_out.name],
-                    [graph_out_name],
-                    numInputVectors=out_folded_shape[:-1],
-                    NumChannels=padded_outstream_bytes,
-                    dataType="UINT8",
-                    intfWidth=intfwidth,
-                    streamWidth=padded_outstream_width,
-                    direction="out",
-                    domain="finn.custom_op.fpgadataflow",
-                    backend="fpgadataflow",
-                )
-                model.graph.node.append(dma_node)
-            if first_node.op_type != "IODMA":
+        # insert IODMAs for graph inputs
+        graph_in_names = [x.name for x in model.graph.input]
+        for graph_in_name in graph_in_names:
+            first_node = model.find_consumer(graph_in_name)
+            if first_node.op_type == "IODMA":
+                # IODMA already inserted for this input
+                continue
+            else:
                 in_shape = model.get_tensor_shape(graph_in_name)
                 in_dtype = model.get_tensor_datatype(graph_in_name)
                 first_node_inst = getCustomOp(first_node)
@@ -194,47 +150,96 @@ class InsertIODMA(Transformation):
                     backend="fpgadataflow",
                 )
                 model.graph.node.insert(0, dma_node)
-            for fc_node in fc_extw_nodes:
-                fc_inst = getCustomOp(fc_node)
-                fc_w_name = fc_node.input[1]
-                w_shape = model.get_tensor_shape(fc_w_name)
-                w_dtype = model.get_tensor_datatype(fc_w_name)
+                modified = True
+        # insert IODMAs for graph outputs
+        graph_out_names = [x.name for x in model.graph.output]
+        for graph_out_name in graph_out_names:
+            final_node = model.find_producer(graph_out_name)
+            if final_node.op_type == "IODMA":
+                continue
+            else:
+                out_shape = model.get_tensor_shape(graph_out_name)
+                out_dtype = model.get_tensor_datatype(graph_out_name)
+                final_node_inst = getCustomOp(final_node)
+                out_folded_shape = final_node_inst.get_folded_output_shape()
+                # take advantage of AXI stream width padding for DMA alignment
+                # (AXI streams are always padded to 8 bits)
+                # this is the width of stream input to DMA
+                padded_outstream_width = final_node_inst.get_outstream_width_padded()
+                padded_outstream_bytes = padded_outstream_width // 8
                 # determine the feasible interface width
-                transfer_bits = np.prod(w_shape) * w_dtype.bitwidth()
+                transfer_bits = padded_outstream_width * np.prod(out_folded_shape[:-1])
                 intfwidth = math.gcd(transfer_bits, self.max_intfwidth)
                 assert (
                     intfwidth % 8 == 0
                 ), "No feasible interface width for transfer size"
-                # calculate width of stream output from DMA
-                pe = get_by_name(fc_node.attribute, "PE").i
-                simd = get_by_name(fc_node.attribute, "SIMD").i
-                streamWidth = fc_inst.get_weightstream_width_padded()
                 # make new buffer
-                W = model.get_initializer(fc_w_name)
-                iodma_mem = self.get_mem_init(W, pe, simd)
-                model.set_initializer(fc_w_name, iodma_mem)
-
-                fc_node_in = oh.make_tensor_value_info(
-                    model.make_new_valueinfo_name(), TensorProto.FLOAT, iodma_mem.shape
+                final_node_out = oh.make_tensor_value_info(
+                    model.make_new_valueinfo_name(), TensorProto.FLOAT, out_shape
                 )
-                model.graph.value_info.append(fc_node_in)
-                model.set_tensor_datatype(fc_node_in.name, w_dtype)
-                model.set_initializer(fc_node_in.name, W)
+                model.graph.value_info.append(final_node_out)
+                model.set_tensor_datatype(final_node_out.name, out_dtype)
+                # reroute final node output to final_node_out_name
+                final_node.output[0] = final_node_out.name
+                # FIXME: currently always using 8-bit dtypes to work around the
+                # padding problems for i/o DMA
                 dma_node = oh.make_node(
                     "IODMA",
-                    [fc_w_name],
-                    [fc_node_in.name],
-                    numInputVectors=[iodma_mem.shape[0]],
-                    NumChannels=pe * simd,
-                    dataType=str(w_dtype.name),
+                    [final_node_out.name],
+                    [graph_out_name],
+                    numInputVectors=out_folded_shape[:-1],
+                    NumChannels=padded_outstream_bytes,
+                    dataType="UINT8",
                     intfWidth=intfwidth,
-                    streamWidth=streamWidth,
-                    direction="in",
-                    burstMode="wrap",
+                    streamWidth=padded_outstream_width,
+                    direction="out",
                     domain="finn.custom_op.fpgadataflow",
                     backend="fpgadataflow",
                 )
-                fc_node.input[1] = fc_node_in.name
-                model.graph.node.insert(0, dma_node)
+                model.graph.node.append(dma_node)
+                modified = True
+
+        for fc_node in fc_extw_nodes:
+            fc_inst = getCustomOp(fc_node)
+            fc_w_name = fc_node.input[1]
+            w_shape = model.get_tensor_shape(fc_w_name)
+            w_dtype = model.get_tensor_datatype(fc_w_name)
+            # determine the feasible interface width
+            transfer_bits = np.prod(w_shape) * w_dtype.bitwidth()
+            intfwidth = math.gcd(transfer_bits, self.max_intfwidth)
+            assert intfwidth % 8 == 0, "No feasible interface width for transfer size"
+            # calculate width of stream output from DMA
+            pe = get_by_name(fc_node.attribute, "PE").i
+            simd = get_by_name(fc_node.attribute, "SIMD").i
+            streamWidth = fc_inst.get_weightstream_width_padded()
+            # make new buffer
+            W = model.get_initializer(fc_w_name)
+            iodma_mem = self.get_mem_init(W, pe, simd)
+            model.set_initializer(fc_w_name, iodma_mem)
+
+            fc_node_in = oh.make_tensor_value_info(
+                model.make_new_valueinfo_name(), TensorProto.FLOAT, iodma_mem.shape
+            )
+            model.graph.value_info.append(fc_node_in)
+            model.set_tensor_datatype(fc_node_in.name, w_dtype)
+            model.set_initializer(fc_node_in.name, W)
+            dma_node = oh.make_node(
+                "IODMA",
+                [fc_w_name],
+                [fc_node_in.name],
+                numInputVectors=[iodma_mem.shape[0]],
+                NumChannels=pe * simd,
+                dataType=str(w_dtype.name),
+                intfWidth=intfwidth,
+                streamWidth=streamWidth,
+                direction="in",
+                burstMode="wrap",
+                domain="finn.custom_op.fpgadataflow",
+                backend="fpgadataflow",
+            )
+            fc_node.input[1] = fc_node_in.name
+            model.graph.node.insert(0, dma_node)
+            modified = True
+        if modified:
             model = model.transform(SortGraph())
-            return (model, True)
+        return (model, modified)
diff --git a/src/finn/transformation/fpgadataflow/make_pynq_driver.py b/src/finn/transformation/fpgadataflow/make_pynq_driver.py
index be2176a34763fdb5521a0acdfc3137fb4b4a766e..2c3bd7ee59e23566bbd0acf2241ca67ed2beb3ea 100644
--- a/src/finn/transformation/fpgadataflow/make_pynq_driver.py
+++ b/src/finn/transformation/fpgadataflow/make_pynq_driver.py
@@ -90,6 +90,7 @@ class MakePYNQDriver(Transformation):
         self.platform = platform
 
     def apply(self, model):
+
         # create a temporary folder for the generated driver
         pynq_driver_dir = make_build_dir(prefix="pynq_driver_")
         model.set_metadata_prop("pynq_driver_dir", pynq_driver_dir)
@@ -100,59 +101,100 @@ class MakePYNQDriver(Transformation):
         )
         driver_base_py = pynq_driver_dir + "/driver_base.py"
         shutil.copy(driver_base_template, driver_base_py)
-
         # extract input-output shapes from the graph
         # TODO convert this to an analysis pass?
-        i_tensor_name = model.graph.input[0].name
-        o_tensor_name = model.graph.output[0].name
-        i_tensor_shape_normal = tuple(model.get_tensor_shape(i_tensor_name))
-        o_tensor_shape_normal = tuple(model.get_tensor_shape(o_tensor_name))
-        i_tensor_dt = model.get_tensor_datatype(i_tensor_name)
-        o_tensor_dt = model.get_tensor_datatype(o_tensor_name)
-
-        first_node = model.find_consumer(i_tensor_name)
-        last_node = model.find_producer(o_tensor_name)
-        if first_node.op_type == "StreamingDataflowPartition":
-            # IODMAs and dataflow partitions have already been created
-            # extract folded i/o shapes from IODMA consumer/producer
-            first_df_model = ModelWrapper(getCustomOp(first_node).get_nodeattr("model"))
+        idt = []
+        idma_names = []
+        ishape_normal = []
+        ishape_folded = []
+        ishape_packed = []
+        for idma_ind, graph_in in enumerate(model.graph.input):
+            i_tensor_name = graph_in.name
+            # get inp tensor properties
+            i_tensor_dt = model.get_tensor_datatype(i_tensor_name)
+            i_tensor_shape_normal = tuple(model.get_tensor_shape(i_tensor_name))
+            # go down into dataflow partition to get folded shape info etc
+            # TODO consider setting these as attributes during dataflow partitioning
+            i_consumer = model.find_consumer(i_tensor_name)
+            assert (
+                i_consumer.op_type == "StreamingDataflowPartition"
+            ), """
+                Ensure CreateDataflowPartition called before driver creation."""
+            first_df_model = ModelWrapper(getCustomOp(i_consumer).get_nodeattr("model"))
             assert (
                 first_df_model.graph.node[0].op_type == "IODMA"
             ), "First partition must hold input IODMA"
-            successors = model.find_direct_successors(first_node)
+            successors = model.find_direct_successors(i_consumer)
+            successor_input_num = list(successors[0].input).index(i_consumer.output[0])
             successor_sdp = getCustomOp(successors[0])
             successor_df_model = ModelWrapper(successor_sdp.get_nodeattr("model"))
             first_node = successor_df_model.find_consumer(
-                successor_df_model.graph.input[0].name
+                successor_df_model.graph.input[successor_input_num].name
             )
-
-            last_df_model = ModelWrapper(getCustomOp(last_node).get_nodeattr("model"))
+            i_tensor_shape_folded = tuple(
+                getCustomOp(first_node).get_folded_input_shape()
+            )
+            # generate dummy folded i/o tensors and their packed versions
+            i_tensor_dummy_folded = gen_finn_dt_tensor(
+                i_tensor_dt, i_tensor_shape_folded
+            )
+            i_tensor_dummy_packed = dpk.finnpy_to_packed_bytearray(
+                i_tensor_dummy_folded, i_tensor_dt
+            )
+            i_tensor_shape_packed = i_tensor_dummy_packed.shape
+            # append all input tensor info to relevant lists
+            idt.append("DataType['%s']" % i_tensor_dt.name)
+            ishape_normal.append(i_tensor_shape_normal)
+            ishape_folded.append(i_tensor_shape_folded)
+            ishape_packed.append(i_tensor_shape_packed)
+            idma_names.append(getCustomOp(i_consumer).get_nodeattr("instance_name"))
+
+        odt = []
+        odma_names = []
+        oshape_normal = []
+        oshape_folded = []
+        oshape_packed = []
+        for odma_ind, graph_out in enumerate(model.graph.output):
+            o_tensor_name = graph_out.name
+            # get inp tensor properties
+            o_tensor_dt = model.get_tensor_datatype(o_tensor_name)
+            o_tensor_shape_normal = tuple(model.get_tensor_shape(o_tensor_name))
+            # go down into IODMA partition to get folded shape info etc
+            # TODO consider setting these as attributes during dataflow partitioning
+            o_producer = model.find_producer(o_tensor_name)
+            assert (
+                o_producer.op_type == "StreamingDataflowPartition"
+            ), """
+                Ensure CreateDataflowPartition called before driver creation."""
+            df_model = ModelWrapper(getCustomOp(o_producer).get_nodeattr("model"))
             assert (
-                last_df_model.graph.node[0].op_type == "IODMA"
-            ), "Last partition must hold output IODMA"
-            predecessors = model.find_direct_predecessors(last_node)
+                df_model.graph.node[-1].op_type == "IODMA"
+            ), "Partition must hold output IODMA"
+            predecessors = model.find_direct_predecessors(o_producer)
+            predecessor_output_num = list(predecessors[0].output).index(
+                o_producer.input[0]
+            )
             predecessor_sdp = getCustomOp(predecessors[0])
             predecessor_df_model = ModelWrapper(predecessor_sdp.get_nodeattr("model"))
             last_node = predecessor_df_model.find_producer(
-                predecessor_df_model.graph.output[0].name
+                predecessor_df_model.graph.output[predecessor_output_num].name
             )
-
-        # else: transformation called before IODMA/SDP creation (legacy flow)
-        # can access folded i/o shapes directly
-        i_tensor_shape_folded = tuple(getCustomOp(first_node).get_folded_input_shape())
-        o_tensor_shape_folded = tuple(getCustomOp(last_node).get_folded_output_shape())
-
-        # generate dummy folded i/o tensors and their packed versions
-        i_tensor_dummy_folded = gen_finn_dt_tensor(i_tensor_dt, i_tensor_shape_folded)
-        o_tensor_dummy_folded = gen_finn_dt_tensor(o_tensor_dt, o_tensor_shape_folded)
-        i_tensor_dummy_packed = dpk.finnpy_to_packed_bytearray(
-            i_tensor_dummy_folded, i_tensor_dt
-        )
-        o_tensor_dummy_packed = dpk.finnpy_to_packed_bytearray(
-            o_tensor_dummy_folded, o_tensor_dt
-        )
-        i_tensor_shape_packed = i_tensor_dummy_packed.shape
-        o_tensor_shape_packed = o_tensor_dummy_packed.shape
+            o_tensor_shape_folded = tuple(
+                getCustomOp(last_node).get_folded_output_shape()
+            )
+            o_tensor_dummy_folded = gen_finn_dt_tensor(
+                o_tensor_dt, o_tensor_shape_folded
+            )
+            o_tensor_dummy_packed = dpk.finnpy_to_packed_bytearray(
+                o_tensor_dummy_folded, o_tensor_dt
+            )
+            o_tensor_shape_packed = o_tensor_dummy_packed.shape
+            # append all output tensor info to relevant lists
+            odt.append("DataType['%s']" % o_tensor_dt.name)
+            oshape_normal.append(o_tensor_shape_normal)
+            oshape_folded.append(o_tensor_shape_folded)
+            oshape_packed.append(o_tensor_shape_packed)
+            odma_names.append(getCustomOp(o_producer).get_nodeattr("instance_name"))
 
         # generate external weights npy files
         weights_dir = pynq_driver_dir + "/runtime_weights"
@@ -166,47 +208,50 @@ class MakePYNQDriver(Transformation):
                 node.op_type == "StreamingDataflowPartition"
             ), "CreateDataflowPartition needs to be applied before driver generation"
 
-            producer = model.find_producer(node.input[0])
-            init_tensor = model.get_initializer(node.input[0])
+            if len(node.input) > 0:
+                producer = model.find_producer(node.input[0])
+                init_tensor = model.get_initializer(node.input[0])
+            else:
+                producer = None
+                init_tensor = None
 
             if producer is None:  # input dma?
-                idma_name = "idma" + str(idma_idx)
-                if init_tensor is not None:  # input weights dma?
+                sdp_inst = getCustomOp(node)
+                idma_name = sdp_inst.get_nodeattr("instance_name")
+                df_model = ModelWrapper(sdp_inst.get_nodeattr("model"))
+                assert df_model.graph.node[0].op_type == "IODMA"
+                iodma_node = getCustomOp(df_model.graph.node[0])
+                if iodma_node.get_nodeattr("burstMode") == "wrap":  # input weights dma?
+                    init_tensor = df_model.get_initializer(
+                        iodma_node.onnx_node.input[0]
+                    )
                     ext_weight_dma_cnt += 1
-                    w_dtype = model.get_tensor_datatype(node.input[0])
+                    w_dtype = df_model.get_tensor_datatype(
+                        iodma_node.onnx_node.input[0]
+                    )
                     init_external_tensor = to_external_tensor(init_tensor, w_dtype)
                     np.save(
                         weights_dir + "/" + idma_name + ".npy", init_external_tensor
                     )
-                else:
-                    net_input_name = idma_name
-
                 idma_idx += 1
 
         # fill in the driver template
         driver_py = pynq_driver_dir + "/driver.py"
         driver = template_driver.pynq_driver_template
 
-        def mss(x, batch_var_name="1"):
-            # "make shape string"
-            # for a shape like (1, ...) emit a string (N, ...)
-            # where N is the default value for batch_var_name
-            # this lets the driver work with a batch of samples at once
-            ret = str(x)
-            ret = ret.replace("(1,", "(%s," % batch_var_name)
-            ret = ret.replace("[1,", "[%s," % batch_var_name)
-            return ret
-
         driver = driver.replace("$PLATFORM$", self.platform)
-        driver = driver.replace("$INPUT_FINN_DATATYPE$", str(i_tensor_dt))
-        driver = driver.replace("$INPUT_SHAPE_NORMAL$", mss(i_tensor_shape_normal))
-        driver = driver.replace("$INPUT_SHAPE_FOLDED$", mss(i_tensor_shape_folded))
-        driver = driver.replace("$INPUT_SHAPE_PACKED$", mss(i_tensor_shape_packed))
-        driver = driver.replace("$OUTPUT_FINN_DATATYPE$", str(o_tensor_dt))
-        driver = driver.replace("$OUTPUT_SHAPE_NORMAL$", mss(o_tensor_shape_normal))
-        driver = driver.replace("$OUTPUT_SHAPE_FOLDED$", mss(o_tensor_shape_folded))
-        driver = driver.replace("$OUTPUT_SHAPE_PACKED$", mss(o_tensor_shape_packed))
-        driver = driver.replace("$INPUT_DMA_NAME$", "'%s'" % net_input_name)
+        driver = driver.replace("$INPUT_FINN_DATATYPE$", str(idt).replace('"', ""))
+        driver = driver.replace("$INPUT_SHAPE_NORMAL$", str(ishape_normal))
+        driver = driver.replace("$INPUT_SHAPE_FOLDED$", str(ishape_folded))
+        driver = driver.replace("$INPUT_SHAPE_PACKED$", str(ishape_packed))
+        driver = driver.replace("$OUTPUT_FINN_DATATYPE$", str(odt).replace('"', ""))
+        driver = driver.replace("$OUTPUT_SHAPE_NORMAL$", str(oshape_normal))
+        driver = driver.replace("$OUTPUT_SHAPE_FOLDED$", str(oshape_folded))
+        driver = driver.replace("$OUTPUT_SHAPE_PACKED$", str(oshape_packed))
+        driver = driver.replace("$INPUT_DMA_NAME$", "%s" % str(idma_names))
+        driver = driver.replace("$OUTPUT_DMA_NAME$", "%s" % str(odma_names))
+        driver = driver.replace("$NUM_INPUTS$", str(len(idma_names)))
+        driver = driver.replace("$NUM_OUTPUTS$", str(len(odma_names)))
         driver = driver.replace("$EXT_WEIGHT_NUM$", str(ext_weight_dma_cnt))
 
         with open(driver_py, "w") as f:
diff --git a/src/finn/transformation/fpgadataflow/make_zynq_proj.py b/src/finn/transformation/fpgadataflow/make_zynq_proj.py
index dbcca1a23051fc3f62f9b402e774c7de9dd0112b..80ce8f0163a23293423ac208451c901eb645643c 100644
--- a/src/finn/transformation/fpgadataflow/make_zynq_proj.py
+++ b/src/finn/transformation/fpgadataflow/make_zynq_proj.py
@@ -42,11 +42,10 @@ from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.insert_dwc import InsertDWC
 from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
 from finn.transformation.fpgadataflow.insert_iodma import InsertIODMA
-from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
 from finn.transformation.infer_data_layouts import InferDataLayouts
-from finn.util.basic import get_by_name, make_build_dir, pynq_part_map
+from finn.util.basic import make_build_dir, pynq_part_map
 
 from . import templates
 
@@ -54,19 +53,22 @@ from . import templates
 def collect_ip_dirs(model, ipstitch_path):
     # collect list of all IP dirs
     ip_dirs = []
+    need_memstreamer = False
     for node in model.graph.node:
-        ip_dir_attribute = get_by_name(node.attribute, "ip_path")
-        assert (
-            ip_dir_attribute is not None
-        ), """Node attribute "ip_path" is
-        empty. Please run transformation HLSSynth_ipgen first."""
-        ip_dir_value = ip_dir_attribute.s.decode("UTF-8")
+        node_inst = getCustomOp(node)
+        ip_dir_value = node_inst.get_nodeattr("ip_path")
         assert os.path.isdir(
             ip_dir_value
         ), """The directory that should
         contain the generated ip blocks doesn't exist."""
         ip_dirs += [ip_dir_value]
+        if node.op_type in ["StreamingFCLayer_Batch", "Thresholding_Batch"]:
+            if node_inst.get_nodeattr("mem_mode") == "decoupled":
+                need_memstreamer = True
     ip_dirs += [ipstitch_path + "/ip"]
+    if need_memstreamer:
+        # add RTL streamer IP
+        ip_dirs.append("/workspace/finn/finn-rtllib/memstream")
     return ip_dirs
 
 
@@ -142,16 +144,21 @@ class MakeZYNQProject(Transformation):
             # assume only one connection from each ip to the next
             # all aximm allocated to DDR[0]
             # all kernels allocated to SLR0
-            producer = model.find_producer(node.input[0])
+            if len(node.input) == 0:
+                producer = None
+            else:
+                producer = model.find_producer(node.input[0])
             consumer = model.find_consumers(node.output[0])
             # define kernel instances
             # name kernels connected to graph inputs as idmaxx
-            # name kernels connected to graph inputs as odmaxx
+            # name kernels connected to graph outputs as odmaxx
             if producer is None or consumer is None:
                 if producer is None:
                     instance_names[node.name] = "idma" + str(idma_idx)
+                    idma_idx += 1
                 elif consumer is None:
                     instance_names[node.name] = "odma" + str(odma_idx)
+                    odma_idx += 1
                 config.append(
                     "create_bd_cell -type ip -vlnv %s %s"
                     % (vivado_stitch_vlnv, instance_names[node.name])
@@ -176,7 +183,7 @@ class MakeZYNQProject(Transformation):
                     "assign_axi_addr_proc %s/%s"
                     % (instance_names[node.name], axilite_intf_name)
                 )
-                idma_idx += 1
+
                 aximm_idx += 1
                 axilite_idx += 1
             else:
@@ -197,6 +204,7 @@ class MakeZYNQProject(Transformation):
                         % (instance_names[node.name], axilite_intf_name)
                     )
                     axilite_idx += 1
+            sdp_node.set_nodeattr("instance_name", instance_names[node.name])
 
             config.append(
                 "connect_bd_net [get_bd_pins %s/ap_clk] "
@@ -301,12 +309,19 @@ class ZynqBuild(Transformation):
 
     """
 
-    def __init__(self, platform, period_ns, enable_debug=False):
+    def __init__(
+        self,
+        platform,
+        period_ns,
+        enable_debug=False,
+        partition_model_dir=None,
+    ):
         super().__init__()
         self.fpga_part = pynq_part_map[platform]
         self.period_ns = period_ns
         self.platform = platform
         self.enable_debug = enable_debug
+        self.partition_model_dir = partition_model_dir
 
     def apply(self, model):
         # first infer layouts
@@ -316,7 +331,7 @@ class ZynqBuild(Transformation):
             InsertIODMA(64),
             InsertDWC(),
             Floorplan(),
-            CreateDataflowPartition(),
+            CreateDataflowPartition(partition_model_dir=self.partition_model_dir),
         ]
         for trn in prep_transforms:
             model = model.transform(trn)
@@ -338,7 +353,7 @@ class ZynqBuild(Transformation):
             kernel_model = kernel_model.transform(HLSSynthIP())
             kernel_model = kernel_model.transform(
                 CreateStitchedIP(
-                    self.fpga_part, self.period_ns, sdp_node.onnx_node.name, True
+                    self.fpga_part, self.period_ns, sdp_node.onnx_node.name, False
                 )
             )
             kernel_model.set_metadata_prop("platform", "zynq-iodma")
@@ -351,6 +366,4 @@ class ZynqBuild(Transformation):
         # set platform attribute for correct remote execution
         model.set_metadata_prop("platform", "zynq-iodma")
 
-        # create driver
-        model = model.transform(MakePYNQDriver(platform="zynq-iodma"))
         return (model, False)
diff --git a/src/finn/transformation/fpgadataflow/set_fifo_depths.py b/src/finn/transformation/fpgadataflow/set_fifo_depths.py
index c06c34574aa22a23d1307232b0fd8e65224f1983..39eb049565475b462ea0df9d88b46e3598e6cdd9 100644
--- a/src/finn/transformation/fpgadataflow/set_fifo_depths.py
+++ b/src/finn/transformation/fpgadataflow/set_fifo_depths.py
@@ -87,9 +87,14 @@ class RemoveShallowFIFOs(Transformation):
     def apply(self, model):
         shallow_fifos = []
         for node in model.graph.node:
+            if len(node.input) > 0:
+                is_first_node = model.find_producer(node.input[0]) is None
+            else:
+                is_first_node = True
             if (
                 node.op_type == "StreamingFIFO"
                 and getCustomOp(node).get_nodeattr("depth") <= self.shallow_threshold
+                and (not is_first_node)
             ):
                 # bypass shallow fifos
                 shallow_fifos.append(node)
diff --git a/src/finn/transformation/fpgadataflow/set_folding.py b/src/finn/transformation/fpgadataflow/set_folding.py
index 914dda9554395fc89cac8692e13339ae3ce9baf7..64d7a080724820d58a026bafbe74a4d7567b2179 100644
--- a/src/finn/transformation/fpgadataflow/set_folding.py
+++ b/src/finn/transformation/fpgadataflow/set_folding.py
@@ -26,6 +26,7 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import numpy as np
 import warnings
 
 from finn.analysis.fpgadataflow.dataflow_performance import dataflow_performance
@@ -154,9 +155,16 @@ class SetFolding(Transformation):
                     pe = node_inst.get_nodeattr("PE")
                     swu_node_inst.set_nodeattr("SIMD", pe)
                 else:
-                    raise Exception(
-                        "Expected SWU on DW op input, found " + swu_node.op_type
-                    )
+                    if op_type == "Vector_Vector_Activate_Batch":
+                        ksize = np.prod(node_inst.get_nodeattr("Kernel"))
+                    elif op_type == "Pool_Batch":
+                        ksize = node_inst.get_nodeattr("KernelSize")
+                    else:
+                        raise Exception("Undefined edge case for %s" % op_type)
+                    if ksize != 1:  # pointwise vvau/pool lack a SWU
+                        raise Exception(
+                            "Expected SWU on DW op input, found " + swu_node.op_type
+                        )
             elif op_type in simd_ops:
                 if op_type == "ConvolutionInputGenerator":
                     depthwise = node_inst.get_nodeattr("depthwise")
diff --git a/src/finn/transformation/fpgadataflow/template_driver.py b/src/finn/transformation/fpgadataflow/template_driver.py
index 5265835dd2530a5c93ceefbef629a43d6f33de52..31dd22573e35894794dc522c0cf6ab47ce6c6cfc 100644
--- a/src/finn/transformation/fpgadataflow/template_driver.py
+++ b/src/finn/transformation/fpgadataflow/template_driver.py
@@ -79,7 +79,10 @@ io_shape_dict = {
     "ishape_packed" : $INPUT_SHAPE_PACKED$,
     "oshape_packed" : $OUTPUT_SHAPE_PACKED$,
     "input_dma_name" : $INPUT_DMA_NAME$,
-    "number_of_external_weights": $EXT_WEIGHT_NUM$
+    "output_dma_name" : $OUTPUT_DMA_NAME$,
+    "number_of_external_weights": $EXT_WEIGHT_NUM$,
+    "num_inputs" : $NUM_INPUTS$,
+    "num_outputs" : $NUM_OUTPUTS$,
 }
 
 if __name__ == "__main__":
@@ -88,8 +91,8 @@ if __name__ == "__main__":
     parser.add_argument('--platform', help='Target platform: zynq-iodma alveo', default="$PLATFORM$")
     parser.add_argument('--batchsize', help='number of samples for inference', type=int, default=1)
     parser.add_argument('--bitfile', help='name of bitfile (i.e. "resizer.bit")', default="resizer.bit")
-    parser.add_argument('--inputfile', help='name of input npy file (i.e. "input.npy")', default="input.npy")
-    parser.add_argument('--outputfile', help='name of output npy file (i.e. "output.npy")', default="output.npy")
+    parser.add_argument('--inputfile', help='name(s) of input npy file(s) (i.e. "input.npy")', nargs="*", type=str, default=["input.npy"])
+    parser.add_argument('--outputfile', help='name(s) of output npy file(s) (i.e. "output.npy")', nargs="*", type=str, default=["output.npy"])
     parser.add_argument('--runtime_weight_dir', help='path to folder containing runtime-writable .dat weights', default="runtime_weights/")
     # parse arguments
     args = parser.parse_args()
@@ -111,16 +114,15 @@ if __name__ == "__main__":
     # for the remote execution the data from the input npy file has to be loaded,
     # packed and copied to the PYNQ buffer
     if exec_mode == "execute":
-        # remove old output file to prevent reusing old output
-        # in case execution fails
-        try:
-            os.remove(outputfile)
-        except FileNotFoundError:
-            pass
-        # load desired input .npy file
-        ibuf_normal = np.load(inputfile)
+        # load desired input .npy file(s)
+        ibuf_normal = []
+        for ifn in inputfile:
+            ibuf_normal.append(np.load(ifn))
         obuf_normal = accel.execute(ibuf_normal)
-        np.save(outputfile, obuf_normal)
+        if not isinstance(obuf_normal, list):
+            obuf_normal = [obuf_normal]
+        for o, obuf in enumerate(obuf_normal):
+            np.save(outputfile[o], obuf)
     elif exec_mode == "throughput_test":
         # remove old metrics file
         try:
diff --git a/src/finn/transformation/fpgadataflow/templates.py b/src/finn/transformation/fpgadataflow/templates.py
index ae13f6e4e464fea8884f89e7f071e53e28a5c623..a12f359c7d3f1c29a17694ef4987a1a349286234 100644
--- a/src/finn/transformation/fpgadataflow/templates.py
+++ b/src/finn/transformation/fpgadataflow/templates.py
@@ -119,6 +119,7 @@ if {$BOARD == "ZCU104"} {
     set ZYNQ_TYPE "zynq_us+"
 } elseif {$BOARD == "Pynq-Z2"} {
     set ZYNQ_TYPE "zynq_7000"
+    set_property board_part tul.com.tw:pynq-z2:part0:1.0 [current_project]
 } elseif {$BOARD == "Pynq-Z1"} {
     set ZYNQ_TYPE "zynq_7000"
     set_property board_part www.digilentinc.com:pynq-z1:part0:1.0 [current_project]
@@ -134,6 +135,7 @@ if {$ZYNQ_TYPE == "zynq_us+"} {
     set_property -dict [list CONFIG.PSU__USE__S_AXI_GP2 {1}] [get_bd_cells zynq_ps]
     set_property -dict [list CONFIG.PSU__USE__M_AXI_GP1 {0}] [get_bd_cells zynq_ps]
     #set frequency of PS clock (this can't always be exactly met)
+    set_property -dict [list CONFIG.PSU__OVERRIDE__BASIC_CLOCK {0}] [get_bd_cells zynq_ps]
     set_property -dict [list CONFIG.PSU__CRL_APB__PL0_REF_CTRL__FREQMHZ [expr int($FREQ_MHZ)]] [get_bd_cells zynq_ps]
 } elseif {$ZYNQ_TYPE == "zynq_7000"} {
     create_bd_cell -type ip -vlnv xilinx.com:ip:processing_system7:5.5 zynq_ps
diff --git a/src/finn/transformation/fpgadataflow/vitis_build.py b/src/finn/transformation/fpgadataflow/vitis_build.py
index 502b6f2bffd0d64980ae911d28b845ad90633a44..a2865321418343efbfdae12c111ba4334ecfee28 100644
--- a/src/finn/transformation/fpgadataflow/vitis_build.py
+++ b/src/finn/transformation/fpgadataflow/vitis_build.py
@@ -43,7 +43,6 @@ from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.insert_dwc import InsertDWC
 from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
 from finn.transformation.fpgadataflow.insert_iodma import InsertIODMA
-from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.general import (
     GiveReadableTensorNames,
@@ -207,7 +206,10 @@ class VitisLink(Transformation):
             # has axis, aximm and axilite
             # everything else is axis-only
             # assume only one connection from each ip to the next
-            producer = model.find_producer(node.input[0])
+            if len(node.input) == 0:
+                producer = None
+            else:
+                producer = model.find_producer(node.input[0])
             consumer = model.find_consumers(node.output[0])
             # define kernel instances
             # name kernels connected to graph inputs as idmaxx
@@ -223,6 +225,7 @@ class VitisLink(Transformation):
             else:
                 instance_names[node.name] = node.name
                 config.append("nk=%s:1:%s" % (node.name, instance_names[node.name]))
+            sdp_node.set_nodeattr("instance_name", instance_names[node.name])
             # explicitly assign SLRs if the slr attribute is not -1
             node_slr = sdp_node.get_nodeattr("slr")
             if node_slr != -1:
@@ -375,6 +378,7 @@ class VitisBuild(Transformation):
         enable_debug=False,
         floorplan_file=None,
         enable_link=True,
+        partition_model_dir=None,
     ):
         super().__init__()
         self.fpga_part = fpga_part
@@ -384,6 +388,7 @@ class VitisBuild(Transformation):
         self.enable_debug = enable_debug
         self.floorplan_file = floorplan_file
         self.enable_link = enable_link
+        self.partition_model_dir = partition_model_dir
 
     def apply(self, model):
         _check_vitis_envvars()
@@ -398,7 +403,9 @@ class VitisBuild(Transformation):
 
         model = model.transform(Floorplan(floorplan=self.floorplan_file))
 
-        model = model.transform(CreateDataflowPartition())
+        model = model.transform(
+            CreateDataflowPartition(partition_model_dir=self.partition_model_dir)
+        )
         model = model.transform(GiveUniqueNodeNames())
         model = model.transform(GiveReadableTensorNames())
 
@@ -439,6 +446,4 @@ class VitisBuild(Transformation):
         # set platform attribute for correct remote execution
         model.set_metadata_prop("platform", "alveo")
 
-        # create driver
-        model = model.transform(MakePYNQDriver(platform="alveo"))
         return (model, False)
diff --git a/src/finn/transformation/qonnx/convert_qonnx_to_finn.py b/src/finn/transformation/qonnx/convert_qonnx_to_finn.py
new file mode 100644
index 0000000000000000000000000000000000000000..70656e4d0987924ba43d0e657414d0d172feb5ce
--- /dev/null
+++ b/src/finn/transformation/qonnx/convert_qonnx_to_finn.py
@@ -0,0 +1,99 @@
+# Copyright (c) 2021, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from qonnx.transformation.quant_constant_folding import FoldTransposeIntoQuantInit
+
+from finn.transformation.base import Transformation
+from finn.transformation.extract_conv_bias import ExtractBiasFromConv
+from finn.transformation.gemm_to_matmul import GemmToMatMul
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.qonnx.fold_quant_weights import FoldQuantWeights
+from finn.transformation.qonnx.infer_quant_avg_pool_2d import (
+    AvgPoolAndTruncToQuantAvgPool,
+)
+from finn.transformation.qonnx.quant_act_to_multithreshold import (
+    ConvertQuantActToMultiThreshold,
+    default_filter_function_generator,
+)
+from finn.transformation.remove import RemoveIdentityOps
+
+
+class ConvertQONNXtoFINN(Transformation):
+    """Converts QONNX dialect to FINN ONNX dialect.
+    First the weights are converted using the FoldQuantWeights transformation,
+    then the ConvertQuantActToMultiThreshold transformation is used to convert
+    the activations.
+    If incompatibilities are found a ValueError or RuntimeError is raised.
+
+    The optional keyword argument `filter_function`
+    presents a way to control which Quant and BipolarQuant nodes in the activation path
+    are converted to MultiThreshold nodes. A warning will be emitted when a Quant node
+    is not converted to a MultiThreshold node.
+
+    :param filter_function: Each candidate Quant and BinaryQant node is first evaluated
+    by this function. If the function returns False,
+    then the node is not converted to a MultiTrheshold node.
+    The function is given the model and candidate node as parameters.
+    Per default a filter function is inserted, which disables the conversion of
+    Quant nodes, which have a bit width of larger than 8.
+    Defaults to: default_filter_function_generator(max_multithreshold_bit_width=8)
+    """
+
+    def __init__(
+        self,
+        filter_function=default_filter_function_generator(
+            max_multithreshold_bit_width=8
+        ),
+    ):
+        super().__init__()
+        self._filter_function = filter_function
+
+    def apply(self, model):
+        # Extract the bias from Conv node
+        model = model.transform(ExtractBiasFromConv())
+        # Gemm operations are not supported by FINN, so we convert them to MatMul
+        model = model.transform(GemmToMatMul())
+        model = model.transform(FoldTransposeIntoQuantInit())
+        # Make sure the datatypes exist, these are required for folding the weights
+        model = model.transform(InferDataTypes())
+        # Fold weights
+        model = model.transform(FoldQuantWeights())
+        # Convert activations
+        model = model.transform(
+            ConvertQuantActToMultiThreshold(
+                filter_function=self._filter_function,
+            )
+        )
+        # Recompute datatypes
+        model = model.transform(InferDataTypes())
+        # Convert AvgPool -> Mul -> Trunc structure to QuantAvgPool2d
+        model = model.transform(AvgPoolAndTruncToQuantAvgPool())
+        # Remove empty padding if it exists
+        model = model.transform(RemoveIdentityOps())
+
+        return model, False
diff --git a/src/finn/transformation/qonnx/fold_quant_weights.py b/src/finn/transformation/qonnx/fold_quant_weights.py
new file mode 100644
index 0000000000000000000000000000000000000000..12c854d3bab2b762abc3649e15beff29ff8de3ac
--- /dev/null
+++ b/src/finn/transformation/qonnx/fold_quant_weights.py
@@ -0,0 +1,205 @@
+# Copyright (c) 2021, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import numpy as np
+from onnx import TensorProto, helper
+from qonnx.transformation.quant_constant_folding import FoldTransposeIntoQuantInit
+
+import finn.core.onnx_exec as oxe
+from finn.custom_op.registry import getCustomOp
+from finn.transformation.base import Transformation
+from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.remove import remove_node_and_rewire
+
+
+class FoldQuantWeights(Transformation):
+    """Merges Quant nodes, which are used as weights into the initializer
+    of the weight tensor.
+    """
+
+    def apply(self, model):
+        graph = model.graph
+        node_ind = 0
+        graph_modified = False
+        execution_context = model.make_empty_exec_context()
+        for n in graph.node:
+            node_ind += 1
+            if n.op_type == "Quant" or n.op_type == "BipolarQuant":
+                node_inp_inits = list(map(lambda x: model.get_initializer(x), n.input))
+                node_inp_dyn = list(filter(lambda x: x is None, node_inp_inits))
+                node_out = n.output[0]
+                is_all_constant_inputs = len(node_inp_dyn) == 0
+                ishape = model.get_tensor_shape(n.input[0])
+                is_const_shape = (n.op_type == "Shape") and (ishape is not None)
+                if is_all_constant_inputs or is_const_shape:
+                    # Check node validity
+                    if (
+                        n.op_type == "Quant"
+                        and not model.get_initializer(n.input[2]) == 0
+                    ):
+                        raise ValueError(
+                            "Only Quant nodes with zero-point == 0 "
+                            "are currently supported."
+                        )
+                    if model.is_fork_node(n):
+                        raise ValueError(
+                            "Weights quantized with the Quant node are not "
+                            "allowed to be fork nodes node."
+                        )
+                    target_node = model.find_direct_successors(n)
+                    if target_node is None:
+                        raise RuntimeError(
+                            "Weights quantized with the Quant node must have "
+                            "a successor node."
+                        )
+                    else:
+                        target_node = target_node[0]
+                    # If there is a DebugMarker in the weight path,
+                    # then the DebugMarker needs to be removed before any further
+                    # action is taken. Because this node interferes
+                    # with how the constant folding tries to determine how to
+                    # apply scale factors and in any case the DebugMarker would not
+                    # return useful information after folding.
+                    if target_node.op_type == "DebugMarker":
+                        remove_node_and_rewire(model, target_node)
+                        model = model.transform(FoldTransposeIntoQuantInit())
+                        return model, True
+
+                    # Continue with constant folding the quant node
+                    scale = model.get_initializer(n.input[1])
+                    unity_scale = (scale.flatten() == 1.0).all()
+                    # this node has no dynamic inputs, only constant ones -- so we can
+                    # do constant folding.
+                    oxe.execute_node(n, execution_context, graph)
+                    q_node_output = execution_context[node_out]
+                    # Check we can directly constant fold
+                    if unity_scale:
+                        # use the execution result as an initializer
+                        model.set_initializer(node_out, q_node_output)
+                    else:
+                        # Check next operator type
+                        mul_like_nodes = ["Mul", "Div", "Conv", "MatMul"]
+                        add_like_nodes = ["Add", "Sub"]
+                        all_supported_ops = mul_like_nodes.copy()
+                        all_supported_ops.extend(add_like_nodes)
+
+                        if target_node.op_type not in all_supported_ops:
+                            raise ValueError(
+                                f"Can't constant fold Quant weight node "
+                                f"into node type {target_node.op_type} "
+                                f"at node: {target_node}."
+                            )
+
+                        # For both mul and Add:
+                        # Move the scale factor behind the next operator
+                        scale = model.get_initializer(n.input[1])
+                        new_initializer = q_node_output / scale
+                        # Round, to correct for floating point errors
+                        new_initializer = np.round(new_initializer)
+                        model.set_initializer(node_out, new_initializer)
+                        q_inst = getCustomOp(n)
+                        new_dtype = q_inst.get_integer_datatype(model)
+                        model.set_tensor_datatype(node_out, new_dtype)
+
+                        # Reshape scale for Conv if required
+                        if target_node.op_type == "Conv" and len(scale.shape) > 0:
+                            bias_shape = [1] * len(scale.shape)
+                            bias_shape[1] = -1
+                            scale = scale.reshape(bias_shape)
+
+                        if scale.shape == (1,):
+                            scale = scale[0]
+                            mul_shape = tuple()
+                        else:
+                            mul_shape = scale.shape
+                        mul_tensor = helper.make_tensor_value_info(
+                            model.make_new_valueinfo_name(),
+                            TensorProto.FLOAT,
+                            mul_shape,
+                        )
+                        graph.value_info.append(mul_tensor)
+                        model.set_initializer(mul_tensor.name, scale)
+
+                        successor = model.find_consumers(node_out)
+                        if successor is None:
+                            raise RuntimeError(
+                                "Can only constant fold scaled Quant weights "
+                                "if a successor exists."
+                            )
+                        successor = successor[0]
+                        succ_output_name = successor.output[0]
+
+                        output_shape = model.get_tensor_shape(successor.output[0])
+                        act_mul_tensor = helper.make_tensor_value_info(
+                            model.make_new_valueinfo_name(),
+                            TensorProto.FLOAT,
+                            output_shape,
+                        )
+                        graph.value_info.append(act_mul_tensor)
+                        successor.output[0] = act_mul_tensor.name
+
+                        mul_node = helper.make_node(
+                            "Mul",
+                            [act_mul_tensor.name, mul_tensor.name],
+                            [succ_output_name],
+                        )
+                        graph.node.insert(node_ind, mul_node)
+
+                        if target_node.op_type in add_like_nodes:
+                            # Move the scale factor behind also in-front of
+                            # the next operator
+                            div_tensor = helper.make_tensor_value_info(
+                                model.make_new_valueinfo_name(),
+                                TensorProto.FLOAT,
+                                mul_shape,
+                            )
+                            graph.value_info.append(div_tensor)
+                            model.set_initializer(div_tensor.name, scale)
+
+                            succ_input_name = successor.input[0]
+                            act_mul_tensor = helper.make_tensor_value_info(
+                                model.make_new_valueinfo_name(),
+                                TensorProto.FLOAT,
+                                output_shape,
+                            )
+                            graph.value_info.append(act_mul_tensor)
+                            successor.input[0] = act_mul_tensor.name
+
+                            div_node = helper.make_node(
+                                "Div",
+                                [succ_input_name, div_tensor.name],
+                                [act_mul_tensor.name],
+                            )
+                            graph.node.insert(node_ind, div_node)
+
+                    # remove old node
+                    graph.node.remove(n)
+                    graph_modified = True
+                    model = model.transform(InferShapes())
+                    return (model, graph_modified)
+        return (model, graph_modified)
diff --git a/src/finn/transformation/qonnx/infer_quant_avg_pool_2d.py b/src/finn/transformation/qonnx/infer_quant_avg_pool_2d.py
new file mode 100644
index 0000000000000000000000000000000000000000..faad31fa06e76b245f25b6f0aa583fec5c0da29a
--- /dev/null
+++ b/src/finn/transformation/qonnx/infer_quant_avg_pool_2d.py
@@ -0,0 +1,315 @@
+# Copyright (c) 2021, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+import math
+from onnx import TensorProto, helper
+
+from finn.core.datatype import DataType
+from finn.custom_op.registry import getCustomOp
+from finn.transformation.base import Transformation
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
+from finn.util.basic import get_by_name
+
+
+def _get_signed_from_upstream(model, trunc_node):
+    """
+    Find out what the sign of the input to the trunc node is,
+    by looking at the upstream nodes.
+    """
+    node = trunc_node
+    # Check if the input of this node already has a FINN datatype
+    signed = None
+    inp_dt = model.get_tensor_datatype(node.input[0])
+    if inp_dt is not None and inp_dt is not DataType["FLOAT32"]:
+        signed = inp_dt.signed()
+    # Go further up the graph, since the datatype inference works top down
+    # these nodes should either be sign preserving ops or they already have a
+    # datatype defined for the output tensor.
+    curr_node = node
+    if signed is None:
+        while curr_node is not None:
+            if model.is_join_node(curr_node):
+                raise RuntimeError(
+                    "Datatype Inference for the Trunc node only supports "
+                    "linear nodes in the upstream path."
+                )
+            next_node = model.find_direct_predecessors(curr_node)
+            if next_node is None:
+                raise RuntimeError(
+                    "Could not infere the Datatype for the Trunc node due to "
+                    "missing upstream ndoes."
+                )
+            next_node = next_node[0]
+            out_dt = model.get_tensor_datatype(next_node.output[0])
+            if out_dt is not None and out_dt is not DataType["FLOAT32"]:
+                signed = out_dt.signed()
+                break
+            # Special cases where the node has an internal or intrinsic datatype.
+            if next_node.op_type == "MultiThreshold":
+                mt_inst = getCustomOp(next_node)
+                out_dt = DataType[mt_inst.get_nodeattr("out_dtype")]
+                if out_dt is not None and out_dt is not DataType["FLOAT32"]:
+                    signed = out_dt.signed()
+                    break
+            if next_node.op_type == "BipolarQuant":
+                signed = True
+                break
+            if next_node.op_type == "Quant":
+                q_inst = getCustomOp(next_node)
+                out_dt = q_inst.get_integer_datatype(model)
+                if out_dt is not None and out_dt is not DataType["FLOAT32"]:
+                    signed = out_dt.signed()
+                    break
+
+            # Check if we are allowed to move on to the next op
+            sign_preserving_ops = ["Add", "Mul", "AveragePool", "Pad"]
+            if next_node.op_type not in sign_preserving_ops:
+                raise RuntimeError(
+                    f"Could not infere the Datatype for the Trunc node, "
+                    f"because the sign of the input datatype could not be infered "
+                    f"from upstream nodes. And traversal further up the graph was "
+                    f"disallowed, since the next node type {next_node.op_type} "
+                    f"is not in the list of "
+                    f"sign preserving ops {sign_preserving_ops}."
+                )
+            curr_node = next_node
+
+    if signed is None:
+        raise RuntimeError(
+            "Could not infere the Datatype for the Trunc node, "
+            "because the sign of the input datatype could not be infered "
+            "from upstream nodes."
+        )
+
+    return signed
+
+
+class AvgPoolAndTruncToQuantAvgPool(Transformation):
+    """
+    Convert a section of nodes of the pattern:
+    AveragePool -> Mul (scalar) -> Trunc
+    To the FINN op: QuantAvgPool2d
+    """
+
+    def apply(self, model):
+        graph = model.graph
+        node_ind = 0
+        for n in graph.node:
+            node_ind += 1
+            if n.op_type == "AveragePool":
+                mul_node = model.find_direct_successors(n)
+                if (
+                    mul_node is not None
+                    and len(mul_node) == 1
+                    and mul_node[0].op_type == "Mul"
+                ):
+                    mul_node = mul_node[0]
+                    t_node = model.find_direct_successors(mul_node)
+                    if (
+                        t_node is not None
+                        and len(t_node) == 1
+                        and t_node[0].op_type == "Trunc"
+                    ):
+                        t_node = t_node[0]
+                        running_node_index = node_ind
+                        # Check node for compatibility
+                        # Avg pooling node
+                        k_s = get_by_name(n.attribute, "kernel_shape")
+                        if k_s is None or len(k_s.ints) != 2 or len(set(k_s.ints)) != 1:
+                            raise ValueError(
+                                "FINN only supports average pooling with "
+                                "2D square kernels."
+                            )
+                        k_s = k_s.ints[0]
+
+                        pads = get_by_name(n.attribute, "pads")
+                        if (
+                            pads is None
+                            or len(set(pads.ints)) != 1
+                            or pads.ints[0] != 0
+                        ):
+                            raise ValueError(
+                                "FINN dosn't support padding for average pooling."
+                            )
+
+                        stride = get_by_name(n.attribute, "strides")
+                        if (
+                            stride is None
+                            or len(stride.ints) != 2
+                            or len(set(stride.ints)) != 1
+                        ):
+                            raise ValueError(
+                                "FINN only supports 2D strides with equal values in "
+                                "each direction."
+                            )
+                        stride = stride.ints[0]
+
+                        # Mul node
+                        mul_val = model.get_initializer(mul_node.input[1])
+                        if (
+                            mul_val is None
+                            or len(mul_val.shape) != 0
+                            or mul_val != k_s * k_s
+                        ):
+                            raise ValueError(
+                                f"The Mul node after the AveragePool node must have "
+                                f"static initialization at the second input, "
+                                f"further the initialization must be of zero dimension "
+                                f"and the value of the initialization must be "
+                                f"the quadratic value of the kernel size, "
+                                f"in this case {k_s * k_s}."
+                            )
+
+                        # Trunc node
+                        rounding_mode = get_by_name(t_node.attribute, "rounding_mode")
+                        if rounding_mode is None or rounding_mode.s != b"FLOOR":
+                            raise ValueError(
+                                "The Trunc node must have the rounding_mode "
+                                "set to 'FLOOR'."
+                            )
+                        for inp in t_node.input[1:]:
+                            if model.get_initializer(inp) is None:
+                                raise ValueError(
+                                    f"All inputs of the Trunc node, "
+                                    f"except the first, must be statically "
+                                    f"initialized. However, {inp} is not."
+                                )
+                        zero_pt = model.get_initializer(t_node.input[2])
+                        if len(zero_pt.shape) != 0 or zero_pt != 0:
+                            raise ValueError(
+                                f"Finn only supports 0 as the zero point for "
+                                f"the Trunc node, it currently is {zero_pt}."
+                            )
+                        trunc_in_bits = model.get_initializer(t_node.input[3]).flatten()
+                        trunc_out_bits = model.get_initializer(
+                            t_node.input[4]
+                        ).flatten()
+                        if (
+                            len(trunc_in_bits.shape) != 1
+                            or len(trunc_out_bits.shape) != 1
+                        ):
+                            raise ValueError(
+                                f"Finn only supports scalar bit widths "
+                                f"for the Trunc node. The input bit width "
+                                f"currently is: {trunc_in_bits}, "
+                                f"while the output bit width is: {trunc_out_bits}."
+                            )
+                        trunc_in_bits = int(trunc_in_bits[0])
+                        trunc_out_bits = int(trunc_out_bits[0])
+
+                        # Calculate parameters for the QuantAvgPool2d node,
+                        # Calculate input bit width. Basically this backwards:
+                        # https://github.com/Xilinx/finn-base/blob/
+                        # 7c2603a95e90e4de2575020e575c24eab6a15889/src/finn/custom_op/
+                        # general/quantavgpool2d.py#L94
+                        ibits = math.floor(
+                            math.log(2 ** trunc_in_bits / (k_s * k_s), 2)
+                        )
+                        # Get sign
+                        signed = _get_signed_from_upstream(model, t_node)
+                        # ToDo: Change this to NHWC,
+                        #  when the channels last layout comes around.
+                        data_layout = "NCHW"
+
+                        # Insert scale nodes, QuantAvgPool2d node and required tensors
+                        scale = model.get_initializer(t_node.input[1])
+                        scale_div_tensor = helper.make_tensor_value_info(
+                            model.make_new_valueinfo_name(),
+                            TensorProto.FLOAT,
+                            None,
+                        )
+                        graph.value_info.append(scale_div_tensor)
+                        model.set_initializer(scale_div_tensor.name, scale)
+
+                        act_scale_div_tensor = helper.make_tensor_value_info(
+                            model.make_new_valueinfo_name(),
+                            TensorProto.FLOAT,
+                            None,
+                        )
+                        graph.value_info.append(act_scale_div_tensor)
+
+                        scale_div_node = helper.make_node(
+                            "Div",
+                            [n.input[0], scale_div_tensor.name],
+                            [act_scale_div_tensor.name],
+                        )
+                        graph.node.insert(running_node_index, scale_div_node)
+                        running_node_index += 1
+
+                        act_scale_mul_tensor = helper.make_tensor_value_info(
+                            model.make_new_valueinfo_name(),
+                            TensorProto.FLOAT,
+                            None,
+                        )
+                        graph.value_info.append(act_scale_mul_tensor)
+
+                        QuantAvgPool2d_node = helper.make_node(
+                            "QuantAvgPool2d",
+                            [act_scale_div_tensor.name],
+                            [act_scale_mul_tensor.name],
+                            domain="finn.custom_op.general",
+                            stride=stride,
+                            kernel=k_s,
+                            ibits=ibits,
+                            obits=trunc_out_bits,
+                            signed=int(signed),
+                            data_layout=data_layout,
+                        )
+                        graph.node.insert(running_node_index, QuantAvgPool2d_node)
+                        running_node_index += 1
+
+                        scale_mul_tensor = helper.make_tensor_value_info(
+                            model.make_new_valueinfo_name(),
+                            TensorProto.FLOAT,
+                            None,
+                        )
+                        graph.value_info.append(scale_mul_tensor)
+                        model.set_initializer(scale_mul_tensor.name, scale)
+
+                        scale_mul_node = helper.make_node(
+                            "Mul",
+                            [act_scale_mul_tensor.name, scale_mul_tensor.name],
+                            [t_node.output[0]],
+                        )
+                        graph.node.insert(running_node_index, scale_mul_node)
+                        running_node_index += 1
+
+                        # Remove old nodes
+                        graph.node.remove(n)
+                        graph.node.remove(mul_node)
+                        graph.node.remove(t_node)
+
+                        # Recompute shapes and datatypes
+                        model = model.transform(InferShapes())
+                        model = model.transform(InferDataTypes())
+
+                        return model, True
+
+        return model, False
diff --git a/src/finn/transformation/qonnx/qonnx_activation_handlers.py b/src/finn/transformation/qonnx/qonnx_activation_handlers.py
new file mode 100644
index 0000000000000000000000000000000000000000..3336b1eee7fa9d54092cd56b9ba0edaf9d0884b1
--- /dev/null
+++ b/src/finn/transformation/qonnx/qonnx_activation_handlers.py
@@ -0,0 +1,524 @@
+# Copyright (c) 2021, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import numpy as np
+from abc import ABC, abstractmethod
+from onnx import TensorProto, helper
+
+from finn.core.modelwrapper import ModelWrapper
+from finn.custom_op.registry import getCustomOp
+
+
+class QuantActBaseHandler(ABC):
+    """Base class for converting quantized activation expressed in the QONNX dialect
+    to the FINN ONNX dialect.
+    :param model: The model on which this handler should operate.
+    :type model: class: `finn.core.modelwrapper.ModelWrapper`
+    :param quant_node: The Quant node which a given handler should replace.
+    :param quant_node_index: The index of the Quant node in the given model.
+    :type quant_node_index: `int`
+    """
+
+    def __init__(self, model: ModelWrapper, quant_node, quant_node_index: int):
+        """Base class constructor"""
+        super().__init__()
+        self._model = model
+        self._q_node = quant_node
+        self._q_index = quant_node_index
+
+    @property
+    @classmethod
+    @abstractmethod
+    def valid_predecessor_op_types(self):
+        """Defines which op types the preceding node is allowed to have for
+        this type of activation.
+        """
+        raise NotImplementedError()
+
+    @abstractmethod
+    def _check_compatibility(self):
+        """Check for compatibility with FINN.
+        There are many more possible combinations of QONNX settings,
+        than what is supported by FINN.
+        """
+        raise NotImplementedError()
+
+    @abstractmethod
+    def _calculate_act_bias(self):
+        """Calculate the activation bias,
+        which is introduced as an Add node behind the MultiTrheshold node.
+        """
+        raise NotImplementedError()
+
+    @abstractmethod
+    def _calculate_thresholds(self):
+        """Calculate the threshold array for the MultiThreshold node."""
+        raise NotImplementedError()
+
+    @abstractmethod
+    def _calculate_act_scale(self):
+        """Calculate the activation scale,
+        which is indroduced as a Mul node behind the Add node
+        for the activation bias.
+        """
+        raise NotImplementedError()
+
+    @abstractmethod
+    def _remove_activation_node(self, multi_threshold_node):
+        """Remove the activation node in front of the Quant node."""
+        raise NotImplementedError()
+
+    def _extract_output_datatype(self):
+        """Get the output datatype for the MultiThreshold node."""
+        q_inst = getCustomOp(self._q_node)
+        dtype = q_inst.get_integer_datatype(self._model)
+        dtype = dtype.name
+        return dtype
+
+    def calculate_node_parameters(self):
+        """Calculate all parameters required for replacing the QONNX style activation
+        with a FINN style one.
+        """
+        return {
+            "out_dtype": self._extract_output_datatype(),
+            "thresholds": self._calculate_thresholds(),
+            "adder_bias": self._calculate_act_bias(),
+            "mul_scale": self._calculate_act_scale(),
+        }
+
+    def replace_quant_node(self):
+        """Replace the given QONNX style activation with a FINN style one."""
+
+        # Check that we actually support what the user is trying to do
+        self._check_compatibility()
+
+        # Shorten instance variables
+        model = self._model
+        graph = model.graph
+        n = self._q_node
+        running_node_index = self._q_index
+
+        # Calculate insertion parameters
+        parameter_dict = self.calculate_node_parameters()
+        thresholds = parameter_dict["thresholds"]
+        out_dtype = parameter_dict["out_dtype"]
+        adder_bias = parameter_dict["adder_bias"]
+        mul_scale = parameter_dict["mul_scale"]
+
+        # Modify graph
+        # Insert threshold tensor
+        thresh_tensor = helper.make_tensor_value_info(
+            model.make_new_valueinfo_name(),
+            TensorProto.FLOAT,
+            thresholds.shape,
+        )
+        graph.value_info.append(thresh_tensor)
+        model.set_initializer(thresh_tensor.name, thresholds)
+
+        # Insert MultiThreshold node
+        outp_trans_node = helper.make_node(
+            "MultiThreshold",
+            [n.input[0], thresh_tensor.name],
+            [n.output[0]],
+            out_dtype="FLOAT32",
+            domain="finn.custom_op.general",
+        )
+        graph.node.insert(running_node_index, outp_trans_node)
+        running_node_index += 1
+
+        # Get the MultiThreshold node instance to work with
+        mt_node = graph.node[running_node_index - 1]
+        mt_inst = getCustomOp(mt_node)
+
+        # Set scale and bias
+        # If these values are scalar then they can be set as attributes
+        # of the MultiThreshold node, if not they get inserted as adder and mul nodes
+        # behind the MultiTrheshold nodes.
+        bias_scalar = adder_bias.shape == (1,) or len(adder_bias.shape) == 0
+        scale_scalar = mul_scale.shape == (1,) or len(mul_scale.shape) == 0
+        if scale_scalar and bias_scalar and self._q_node.op_type == "BipolarQuant":
+            # Get Quant parameters
+            mul_scale = np.atleast_1d(mul_scale)
+            # ONNX only accepts 64bit floats as attributes
+            mul_scale = mul_scale.astype(dtype=np.float64)
+            adder_bias = np.atleast_1d(adder_bias)
+            adder_bias = adder_bias.astype(dtype=np.float64)
+
+            # Set Bias and scale
+            mt_inst.set_nodeattr("out_scale", mul_scale[0])
+            # FINN applies scale first then bias,
+            # which is the other way around in Brevitas,
+            # we thus need to adjust the bias in the MultiThreshold node
+            finn_bias = adder_bias[0] * mul_scale[0]
+            mt_inst.set_nodeattr("out_bias", finn_bias)
+
+            # Set the output data type
+            mt_inst.set_nodeattr("out_dtype", out_dtype)
+        else:
+            # Set datatype
+            mt_inst.set_nodeattr("out_dtype", out_dtype)
+
+            # Insertion parameters
+            up_stream_node = mt_node
+
+            # Set bias
+            zero_bias = False
+            if bias_scalar:
+                adder_bias = np.atleast_1d(adder_bias)
+                # ONNX only accepts 64bit floats as attributes
+                adder_bias = adder_bias.astype(dtype=np.float64)[0]
+                add_shape = tuple()
+                if adder_bias == 0.0:
+                    zero_bias = True
+            else:
+                add_shape = adder_bias.shape
+
+            if not zero_bias:
+                # Insert Add node
+                add_tensor = helper.make_tensor_value_info(
+                    model.make_new_valueinfo_name(),
+                    TensorProto.FLOAT,
+                    add_shape,
+                )
+                graph.value_info.append(add_tensor)
+                model.set_initializer(add_tensor.name, adder_bias)
+
+                output_shape = model.get_tensor_shape(n.output[0])
+                act_add_tensor = helper.make_tensor_value_info(
+                    model.make_new_valueinfo_name(),
+                    TensorProto.FLOAT,
+                    output_shape,
+                )
+                graph.value_info.append(act_add_tensor)
+
+                add_node = helper.make_node(
+                    "Add",
+                    [act_add_tensor.name, add_tensor.name],
+                    [n.output[0]],
+                )
+                graph.node.insert(running_node_index, add_node)
+                running_node_index += 1
+                add_node = graph.node[running_node_index - 1]
+
+                # Re-point the upstream node
+                up_stream_node.output[0] = act_add_tensor.name
+                up_stream_node = add_node
+
+            # Set scale
+            # Insert Mul node
+            unity_scale = False
+            if scale_scalar:
+                mul_scale = np.atleast_1d(mul_scale)
+                mul_scale = mul_scale.astype(dtype=np.float64)[0]
+                mul_shape = tuple()
+                if mul_scale == 1.0:
+                    unity_scale = True
+            else:
+                mul_shape = mul_scale.shape
+
+            if not unity_scale:
+                mul_tensor = helper.make_tensor_value_info(
+                    model.make_new_valueinfo_name(),
+                    TensorProto.FLOAT,
+                    mul_shape,
+                )
+                graph.value_info.append(mul_tensor)
+                model.set_initializer(mul_tensor.name, mul_scale)
+
+                output_shape = model.get_tensor_shape(n.output[0])
+                act_mul_tensor = helper.make_tensor_value_info(
+                    model.make_new_valueinfo_name(),
+                    TensorProto.FLOAT,
+                    output_shape,
+                )
+                graph.value_info.append(act_mul_tensor)
+
+                mul_node = helper.make_node(
+                    "Mul",
+                    [act_mul_tensor.name, mul_tensor.name],
+                    [n.output[0]],
+                )
+                graph.node.insert(running_node_index, mul_node)
+                running_node_index += 1
+                mul_node = graph.node[running_node_index - 1]
+
+                # Re-point the upstream node
+                up_stream_node.output[0] = act_mul_tensor.name
+                up_stream_node = mul_node
+
+        # Remove activation node
+        self._remove_activation_node(mt_node)
+
+        # Remove the Quant node
+        graph.node.remove(n)
+
+        # return the internal model representation
+        return self._model
+
+
+class QuantReluHandler(QuantActBaseHandler):
+    """Class for converting a quantized relu operation expressed in the QONNX
+    dialect to the FINN ONNX dialect."""
+
+    valid_predecessor_op_types = [
+        "Relu",
+    ]
+
+    def _check_compatibility(self):
+        if self._q_node.op_type == "Quant":
+            q_inst = getCustomOp(self._q_node)
+            narrow = q_inst.get_nodeattr("narrow")
+            signed = q_inst.get_nodeattr("signed")
+            if signed or narrow:
+                raise ValueError(
+                    "FINN only supports unsigned and non-narrow Quant nodes "
+                    "for Relu activations."
+                )
+            if not self._model.get_initializer(self._q_node.input[2]) == 0:
+                raise ValueError(
+                    "Only Quant nodes with zero-point == 0 "
+                    "are currently supported for ReLu activations."
+                )
+        elif self._q_node.op_type == "BipolarQuant":
+            return
+        else:
+            raise RuntimeError("Got an unexpected quantizer node type")
+
+    def _calculate_act_bias(self):
+        # No bias allowed for Relu activations, see: https://github.com/Xilinx/
+        # brevitas/blob/a5bfd6dc5e030f0047ac1ee47932b60e8e873e17/src/brevitas/
+        # export/onnx/finn/handler/act.py#L48
+        bias = np.array([0.0])
+        return bias
+
+    def _calculate_thresholds(self):
+        # Gather parameters
+        if self._q_node.op_type == "Quant":
+            bit_width = self._model.get_initializer(self._q_node.input[3])
+        elif self._q_node.op_type == "BipolarQuant":
+            bit_width = 1.0
+        else:
+            raise RuntimeError("Got an unexpected quantizer node type")
+        quant_scale = self._model.get_initializer(self._q_node.input[1]).astype(
+            np.float32
+        )
+        # q_inst = getCustomOp(self._q_node)
+        # narrow = q_inst.get_nodeattr("narrow")
+
+        # Calculate thersholds, see: https://github.com/Xilinx/brevitas/blob/
+        # a5bfd6dc5e030f0047ac1ee47932b60e8e873e17/src/brevitas/export/
+        # onnx/finn/handler/act.py#L21
+        num_distinct_values = 2 ** bit_width
+        num_thresholds = int(num_distinct_values - 1)
+        flat_scale = quant_scale.flatten().astype(np.float32)
+        num_scale_channels = flat_scale.shape[0]
+        step = np.abs(flat_scale).astype(np.float32)
+        min_threshold = step / 2
+        thresholds = np.empty((num_scale_channels, num_thresholds)).astype(np.float32)
+        for c in range(num_scale_channels):
+            for t in range(num_thresholds):
+                thresholds[c][t] = min_threshold[c] + step[c] * t
+
+        # ToDo: The index 1 needs to be changed to -1 for the channels last format
+        num_output_channels = self._model.get_tensor_shape(self._q_node.output[0])[1]
+        final_shape = (num_output_channels, num_thresholds)
+        if thresholds.shape != final_shape:
+            thresholds = np.broadcast_to(thresholds, final_shape)
+
+        return thresholds
+
+    def _calculate_act_scale(self):
+        # Gather parameters
+        quant_scale = self._model.get_initializer(self._q_node.input[1])
+        # Calculate scale, see: https://github.com/Xilinx/brevitas/blob/
+        # a5bfd6dc5e030f0047ac1ee47932b60e8e873e17/src/brevitas/export/
+        # onnx/finn/handler/act.py#L40
+        scale = quant_scale
+        return scale
+
+    def _remove_activation_node(self, multi_threshold_node):
+        # Find the activation node
+        act_node = self._model.find_direct_predecessors(self._q_node)
+        if act_node is None:
+            raise RuntimeError(
+                "For handling of Relu activations a predecesor to "
+                "the Quant node must exist."
+            )
+        act_node = act_node[0]
+        if not act_node.op_type == "Relu":
+            raise RuntimeError(
+                "The predecesor of the Quant node must be Relu for handling "
+                "of Relu activations."
+            )
+
+        # Reroute upstream tensor
+        multi_threshold_node.input[0] = act_node.input[0]
+
+        # Remove the activation node
+        self._model.graph.node.remove(act_node)
+
+
+class QuantIdentityHandler(QuantActBaseHandler):
+    """Class for converting a quantized identity operation expressed in the QONNX
+    dialect to the FINN ONNX dialect.
+    This handler also takes care of quantized HardTanh activations, because
+    these are equivalent to quantized identity activations.
+    """
+
+    valid_predecessor_op_types = [
+        "BatchNormalization",
+        "Sub",
+        "Add",
+        "Mul",
+        "Div",
+        "DebugMarker",
+        None,
+    ]
+
+    def _check_compatibility(self):
+        # Gather parameters to check
+        if self._q_node.op_type == "Quant":
+            q_inst = getCustomOp(self._q_node)
+            signed = q_inst.get_nodeattr("signed")
+            if not signed:
+                raise ValueError(
+                    "FINN only supports signed Quant nodes for identity activations."
+                )
+            if not self._model.get_initializer(self._q_node.input[2]) == 0:
+                raise ValueError(
+                    "Only Quant nodes with zero-point == 0 "
+                    "are currently supported for identity activations."
+                )
+        elif self._q_node.op_type == "BipolarQuant":
+            quant_scale = self._model.get_initializer(self._q_node.input[1])
+            if (quant_scale.flatten().shape[0] != 1) or quant_scale.flatten()[0] != 1.0:
+                raise ValueError(
+                    "FINN only supports Bipolar identity activations "
+                    "with out per channel scaling and the scaling must be 1. "
+                )
+        else:
+            raise RuntimeError("Got an unexpected quantizer node type")
+
+    def _calculate_act_bias(self):
+        # Gather parameters
+        q_inst = getCustomOp(self._q_node)
+        if self._q_node.op_type == "Quant":
+            bit_width = self._model.get_initializer(self._q_node.input[3])
+            narrow = q_inst.get_nodeattr("narrow")
+        elif self._q_node.op_type == "BipolarQuant":
+            bit_width = 1.0
+        else:
+            raise RuntimeError("Got an unexpected quantizer node type")
+        # Calculate bias, see: https://github.com/Xilinx/brevitas/blob/
+        # a5bfd6dc5e030f0047ac1ee47932b60e8e873e17/src/brevitas/export/
+        # onnx/finn/handler/act.py#L64
+        if bit_width == 1.0:
+            bias = np.array([-0.5])
+        else:
+            if narrow:
+                min_non_scaled_val = -(2 ** (bit_width - 1) - 1)
+            else:
+                min_non_scaled_val = -(2 ** (bit_width - 1))
+            bias = np.array([min_non_scaled_val])
+        return bias
+
+    def _calculate_thresholds(self):
+        # Gather parameters
+        quant_scale = self._model.get_initializer(self._q_node.input[1])
+        q_inst = getCustomOp(self._q_node)
+        if self._q_node.op_type == "Quant":
+            bit_width = self._model.get_initializer(self._q_node.input[3])
+            narrow = q_inst.get_nodeattr("narrow")
+        elif self._q_node.op_type == "BipolarQuant":
+            bit_width = 1.0
+        else:
+            raise RuntimeError("Got an unexpected quantizer node type")
+
+        # Calculate thersholds, see: https://github.com/Xilinx/brevitas/
+        # blob/a5bfd6dc5e030f0047ac1ee47932b60e8e873e17/src/brevitas/
+        # export/onnx/finn/handler/act.py#L76
+        if bit_width == 1.0:
+            thresholds = np.empty([1, 1])
+            thresholds[0] = 0
+            return thresholds
+        else:
+            if narrow:
+                num_distinct_values = 2 ** bit_width - 1
+            else:
+                num_distinct_values = 2 ** bit_width
+
+            num_thresholds = int(num_distinct_values - 1)
+            flat_scale = quant_scale.flatten()
+            num_scale_channels = flat_scale.shape[0]
+            step = np.abs(flat_scale)
+            half_step = step / 2.0
+            thresholds = np.empty((num_scale_channels, num_thresholds))
+            # compute the value of the smallest threshold, we'll neg-bias all
+            # generated thresholds by this much
+            min_threshold = -half_step - step * ((num_thresholds // 2) - 1)
+            if not narrow:
+                min_threshold -= step
+            for c in range(num_scale_channels):
+                for t in range(num_thresholds):
+                    thresholds[c][t] = min_threshold[c] + step[c] * t
+
+            # ToDo: The index 1 needs to be changed to -1 for the channels last format
+            num_output_channels = self._model.get_tensor_shape(self._q_node.output[0])[
+                1
+            ]
+            final_shape = (num_output_channels, num_thresholds)
+            if thresholds.shape != final_shape:
+                thresholds = np.broadcast_to(thresholds, final_shape)
+
+            return thresholds
+
+    def _calculate_act_scale(self):
+        # Gather parameters
+        if self._q_node.op_type == "Quant":
+            bit_width = self._model.get_initializer(self._q_node.input[3])
+        elif self._q_node.op_type == "BipolarQuant":
+            bit_width = 1.0
+        else:
+            raise RuntimeError("Got an unexpected quantizer node type")
+        quant_scale = self._model.get_initializer(self._q_node.input[1])
+        # Calculate scale, see: https://github.com/Xilinx/brevitas/
+        # blob/a5bfd6dc5e030f0047ac1ee47932b60e8e873e17/src/brevitas/
+        # export/onnx/finn/handler/act.py#L111
+        if bit_width != 1:
+            scale = quant_scale
+        else:
+            assert (
+                quant_scale.flatten().shape[0] == 1
+            ), "Unsupported BIPOLAR per channel scale"
+            assert quant_scale.flatten()[0] == 1.0, "Unsupported BIPOLAR scale != 1"
+            scale = quant_scale * 2
+        return scale
+
+    def _remove_activation_node(self, multi_threshold_node):
+        # The Quant identity activation has per definition no explicit activation node
+        return
diff --git a/src/finn/transformation/qonnx/quant_act_to_multithreshold.py b/src/finn/transformation/qonnx/quant_act_to_multithreshold.py
new file mode 100644
index 0000000000000000000000000000000000000000..29ba93dfcfe6d18e0ff8927b6d646cb310d0262a
--- /dev/null
+++ b/src/finn/transformation/qonnx/quant_act_to_multithreshold.py
@@ -0,0 +1,160 @@
+# Copyright (c) 2021, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+import warnings
+
+from finn.transformation.base import Transformation
+from finn.transformation.qonnx.qonnx_activation_handlers import QuantActBaseHandler
+
+
+def default_filter_function_generator(max_multithreshold_bit_width=8):
+    """
+    This function generates the default filter function for the
+    ConvertQuantActToMultiThreshold transformation. Per default the returned
+    function disables the conversion of Quant nodes which have a bit width above 8 bit.
+
+    This function generator can be used as a template to write custom
+    filter functions.
+    """
+
+    def filter_function(model, q_node):
+        if q_node.op_type == "Quant":
+            bit_width = model.get_initializer(q_node.input[3])
+        elif q_node.op_type == "BipolarQuant":
+            bit_width = 1.0
+        else:
+            raise RuntimeError("Got an unexpected quantizer node type")
+        if bit_width is None:
+            raise ValueError("Quant nodes must have a static bit width.")
+        if bit_width > max_multithreshold_bit_width:
+            warnings.warn(
+                f'The Quant node with name: "{q_node.name}" was not converted to a '
+                f"MultiThreshold node, because its bit width of {bit_width} is "
+                f"higher than the configured maximum bit width of "
+                f"{max_multithreshold_bit_width}."
+            )
+            return False
+        return True
+
+    return filter_function
+
+
+class ConvertQuantActToMultiThreshold(Transformation):
+    """
+    Converts Quant nodes in the activation path to MultiThreshold nodes.
+
+    The optional keyword argument `filter_function`
+    presents a way to control which Quant and BipolarQuant nodes in the activation path
+    are converted to MultiThreshold nodes. A warning will be emitted when a Quant node
+    is not converted to a MultiThreshold node.
+
+    :param filter_function: Each candidate Quant and BinaryQant node is first evaluated
+    by this function. If the function returns False,
+    then the node is not converted to a MultiTrheshold node.
+    The function is given the model and candidate node as parameters.
+    Per default a filter function is inserted, which disables the conversion of
+    Quant nodes, which have a bit width of larger than 8.
+    Defaults to: default_filter_function_generator(max_multithreshold_bit_width=8)
+    """
+
+    def __init__(
+        self,
+        filter_function=default_filter_function_generator(
+            max_multithreshold_bit_width=8
+        ),
+    ):
+        super().__init__()
+        self._filter_function = filter_function
+
+    def apply(self, model):
+        graph = model.graph
+        node_ind = 0
+        graph_modified = False
+
+        for n in graph.node:
+            node_ind += 1
+            if n.op_type == "Quant" or n.op_type == "BipolarQuant":
+                # Check that the node is in the activation path
+                inp = model.get_initializer(n.input[0])
+                out = model.get_initializer(n.output[0])
+                if not (inp is None and out is None):
+                    continue
+                predecessor = model.find_direct_predecessors(n)
+                if predecessor is not None:
+                    predecessor_op_type = predecessor[0].op_type
+                else:
+                    predecessor_op_type = predecessor
+                if model.is_fork_node(n):
+                    raise ValueError(
+                        "Forking Quant/BipolarQuant nodes are currently "
+                        "not supported by FINN."
+                    )
+                if n.op_type == "Quant" and not model.get_initializer(n.input[2]) == 0:
+                    raise ValueError(
+                        "Only Quant nodes with zero-point == 0 are currently supported."
+                    )
+
+                # Check that this node passes the user filter
+                if not self._filter_function(model, n):
+                    warnings.warn(
+                        f'The Quant node with name: "{n.name}" was not converted to a '
+                        f"MultiThreshold node, because the filtering function "
+                        f"returned False for this node."
+                    )
+                    continue
+
+                # Check for possible ambiguity in handler selection
+                valid_predecessors = []
+                for cls in QuantActBaseHandler.__subclasses__():
+                    valid_predecessors.extend(cls.valid_predecessor_op_types)
+                if len(valid_predecessors) != len(set(valid_predecessors)):
+                    raise RuntimeError(
+                        "Two or more activation handlers declare the same "
+                        "type of valid predecessor node. "
+                        "This leads to ambiguity in the handler selection "
+                        "and must thus be avoided."
+                    )
+
+                # Try to find a fitting handler for this Quant activation node
+                for handler_cls in QuantActBaseHandler.__subclasses__():
+                    if predecessor_op_type in handler_cls.valid_predecessor_op_types:
+                        handler = handler_cls(model, n, node_ind)
+                        break
+                else:
+                    raise ValueError(
+                        f"Quant nodes in the activation path and with predecessor "
+                        f"nodes of type {predecessor_op_type} are currently not "
+                        f"supported by FINN and can not be converted to "
+                        f"MultiThreshold nodes."
+                    )
+                model = handler.replace_quant_node()
+                graph_modified = True
+                return (model, graph_modified)
+
+        return (model, graph_modified)
diff --git a/src/finn/transformation/streamline/__init__.py b/src/finn/transformation/streamline/__init__.py
index ea547571677a9d90a226b55de8582145b8c298f4..d0ec26a4d10c688db7931e40d7cfd840394b55a1 100644
--- a/src/finn/transformation/streamline/__init__.py
+++ b/src/finn/transformation/streamline/__init__.py
@@ -39,6 +39,7 @@ from finn.transformation.general import (
     GiveUniqueNodeNames,
 )
 from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.remove import RemoveIdentityOps
 from finn.transformation.streamline.absorb import (
     Absorb1BitMulIntoConv,
     Absorb1BitMulIntoMatMul,
@@ -51,7 +52,6 @@ from finn.transformation.streamline.collapse_repeated import (
     CollapseRepeatedAdd,
     CollapseRepeatedMul,
 )
-from finn.transformation.streamline.remove import RemoveIdentityOps
 from finn.transformation.streamline.reorder import (
     MoveAddPastConv,
     MoveAddPastMul,
diff --git a/src/finn/transformation/streamline/absorb.py b/src/finn/transformation/streamline/absorb.py
index 1e2830356fe0133038caaa1dbc43f97ca98378d1..97ae3b51a849a4174c9853cb41c0d6d72bdf8dad 100644
--- a/src/finn/transformation/streamline/absorb.py
+++ b/src/finn/transformation/streamline/absorb.py
@@ -205,7 +205,7 @@ class FactorOutMulSignMagnitude(Transformation):
                 actual_ndims = len(tuple(filter(lambda x: x > 1, A.shape)))
                 is_1d = actual_ndims == 1
                 is_not_bipolar = (
-                    model.get_tensor_datatype(mul_weight_name) != DataType.BIPOLAR
+                    model.get_tensor_datatype(mul_weight_name) != DataType["BIPOLAR"]
                 )
                 is_signed = (A < 0).any()
                 if is_signed and (is_scalar or is_1d) and is_not_bipolar:
@@ -217,7 +217,7 @@ class FactorOutMulSignMagnitude(Transformation):
                     # create new mul node with sign(A) as the operand
                     sgn = np.sign(A)
                     model.set_initializer(sign_mul_param_name, sgn)
-                    model.set_tensor_datatype(sign_mul_param_name, DataType.BIPOLAR)
+                    model.set_tensor_datatype(sign_mul_param_name, DataType["BIPOLAR"])
                     # replace original mul weight by magnitudes
                     model.set_initializer(mul_weight_name, np.abs(A))
                     new_mul = oh.make_node(
@@ -308,56 +308,61 @@ class Absorb1BitMulIntoConv(Transformation):
 
 
 class AbsorbTransposeIntoMultiThreshold(Transformation):
-    """Change (NCHWTranspose -> MultiThreshold -> NHWCTranspose) to (MultiThreshold)
-    with NHWC mode. For (NCHWTranspose -> MultiThreshold) move Transpose past MT."""
+    """For (NCHWTranspose -> MultiThreshold) move Transpose past MultiThreshold
+    and set its data_layout mode to NHWC."""
 
     def apply(self, model):
         graph = model.graph
         node_ind = 0
         graph_modified = False
-        for n in graph.node:
+        nodes = [n for n in model.graph.node]
+        for n in nodes:
             node_ind += 1
             if n.op_type == "Transpose" and not model.is_fork_node(n):
                 perms = list(get_by_name(n.attribute, "perm").ints)
                 if perms == [0, 3, 1, 2]:
                     mt_cand = model.find_consumer(n.output[0])
-                    if mt_cand.op_type == "MultiThreshold" and not model.is_fork_node(
-                        mt_cand
+                    if (
+                        mt_cand is not None
+                        and mt_cand.op_type == "MultiThreshold"
+                        # and not model.is_fork_node(mt_cand)
                     ):
-                        final_t_cand = model.find_consumer(mt_cand.output[0])
-                        if final_t_cand.op_type == "Transpose":
-                            perms = list(
-                                get_by_name(final_t_cand.attribute, "perm").ints
-                            )
-                            if perms == [0, 2, 3, 1]:
-                                mt = getCustomOp(mt_cand)
-                                mt.set_nodeattr("data_layout", "NHWC")
-                                # get rid of tranpose nodes, wire MT directly
-                                mt_cand.input[0] = n.input[0]
-                                mt_cand.output[0] = final_t_cand.output[0]
-                                graph.node.remove(n)
-                                graph.node.remove(final_t_cand)
-                                graph_modified = True
-                        else:
-                            mt = getCustomOp(mt_cand)
-                            mt.set_nodeattr("data_layout", "NHWC")
-                            # get rid of first tranpose node
-                            mt_cand.input[0] = n.input[0]
-                            graph.node.remove(n)
-                            # fix output shape for MultiThreshold
-                            mt_ishape = model.get_tensor_shape(mt_cand.input[0])
-                            model.set_tensor_shape(mt_cand.output[0], mt_ishape)
-                            # re-insert Transpose behind MultiThreshold
-                            transpose_output = model.make_new_valueinfo_name()
-                            new_transpose = oh.make_node(
-                                "Transpose",
-                                [mt_cand.output[0]],
-                                [transpose_output],
-                                perm=[0, 3, 1, 2],
-                            )
-                            graph.node.insert(node_ind + 1, new_transpose)
-                            final_t_cand.input[0] = transpose_output
-                            graph_modified = True
+                        mt_cand_orig_output = mt_cand.output[0]
+                        mt = getCustomOp(mt_cand)
+                        mt.set_nodeattr("data_layout", "NHWC")
+                        # Rewire input of MultiThreshold node
+                        mt_cand.input[0] = n.input[0]
+                        # Make new intermediate tensor
+                        intermediate_tensor_name = model.make_new_valueinfo_name()
+                        intermediate_tensor_shape = model.get_tensor_shape(n.input[0])
+                        intermediate_tensor_finn_dtype = model.get_tensor_datatype(
+                            mt_cand.output[0]
+                        )
+                        # Create a new ValueInfoProto and set the shape
+                        model.set_tensor_shape(
+                            intermediate_tensor_name, intermediate_tensor_shape
+                        )
+                        # Set the tensor layout
+                        model.set_tensor_layout(
+                            intermediate_tensor_name, DataLayout.NHWC
+                        )
+                        # Set the tensor FINN datatype
+                        model.set_tensor_datatype(
+                            intermediate_tensor_name, intermediate_tensor_finn_dtype
+                        )
+                        # Rewire output of MT node
+                        mt_cand.output[0] = intermediate_tensor_name
+                        # Get rid of first transpose node
+                        graph.node.remove(n)
+                        # Create new Transpose node
+                        new_transpose = oh.make_node(
+                            "Transpose",
+                            [intermediate_tensor_name],
+                            [mt_cand_orig_output],
+                            perm=[0, 3, 1, 2],
+                        )
+                        graph.node.insert(node_ind + 1, new_transpose)
+                        graph_modified = True
         if graph_modified:
             model = model.transform(InferDataTypes())
         return (model, graph_modified)
@@ -457,7 +462,7 @@ class AbsorbScalarMulAddIntoTopK(Transformation):
                         graph.node.remove(prod)
                         n.input[0] = prod_input
                         # to avoid error the dataype is set to float32
-                        model.set_tensor_datatype(n.input[0], DataType.FLOAT32)
+                        model.set_tensor_datatype(n.input[0], DataType["FLOAT32"])
                         graph_modified = True
         if graph_modified:
             model = model.transform(InferShapes())
@@ -531,11 +536,20 @@ class AbsorbConsecutiveTransposes(Transformation):
                             # TODO implement this to allow for forks as producers
                             consumers = model.find_direct_successors(next_node)
                             prod = model.find_producer(n.input[0])
-                            for cons in consumers:
-                                for cons_in in cons.input:
-                                    if cons_in == next_node.output[0]:
-                                        prod.output[0] = cons_in
-                                        break
+                            if prod is not None:
+                                for cons in consumers:
+                                    for cons_in in cons.input:
+                                        if cons_in == next_node.output[0]:
+                                            prod.output[0] = cons_in
+                                            break
+                            else:
+                                # n.input[0] is top-level graph input
+                                # wire consumers directly to that
+                                for cons in consumers:
+                                    for i, iname in enumerate(cons.input):
+                                        if iname == next_node.output[0]:
+                                            cons.input[i] = n.input[0]
+
                             # remove both transposes
                             graph.node.remove(n)
                             graph.node.remove(next_node)
@@ -544,3 +558,81 @@ class AbsorbConsecutiveTransposes(Transformation):
         if graph_modified:
             model = model.transform(InferDataTypes())
         return (model, graph_modified)
+
+
+class AbsorbTransposeIntoResize(Transformation):
+    """For (NCHWTranspose -> Resize) move Transpose past Resize and
+    change the Resize node's attributes accordingly."""
+
+    def apply(self, model):
+        graph = model.graph
+        node_ind = 0
+        graph_modified = False
+        for node in graph.node:
+            node_ind += 1
+            if node.op_type == "Transpose" and not model.is_fork_node(node):
+                perms = list(get_by_name(node.attribute, "perm").ints)
+                if perms == [0, 3, 1, 2]:
+                    mt_cand = model.find_consumer(node.output[0])
+                    if mt_cand is not None and mt_cand.op_type == "Resize":
+                        mode = get_by_name(mt_cand.attribute, "mode").s.decode("ascii")
+                        # skip if mode is not nearest
+                        if mode != "nearest":
+                            continue
+                        # if sizes specified, turn into scales
+                        if len(mt_cand.input) > 3:
+                            sizes = model.get_initializer(mt_cand.input[3])
+                        else:
+                            sizes = None
+                        if sizes is not None:
+                            ishape = model.get_tensor_shape(mt_cand.input[0])
+                            ns, cs, hs, ws = sizes / np.asarray(ishape)
+                            model.set_initializer(
+                                mt_cand.input[2], np.asarray([ns, cs, hs, ws])
+                            )
+                            mt_cand.input.remove(mt_cand.input[3])
+                        # scales already specified, transpose indices to NHWC
+                        scales = model.get_initializer(mt_cand.input[2])
+                        assert scales is not None
+                        ns, cs, hs, ws = scales
+                        model.set_initializer(
+                            mt_cand.input[2], np.asarray([ns, hs, ws, cs])
+                        )
+                        # get rid of first tranpose node
+                        mt_cand.input[0] = node.input[0]
+                        graph.node.remove(node)
+                        is_last_node = mt_cand.output[0] in [
+                            x.name for x in model.graph.output
+                        ]
+
+                        new_tensor_name = model.make_new_valueinfo_name()
+                        if is_last_node:
+                            trans_input = new_tensor_name
+                            trans_output = mt_cand.output[0]
+                        else:
+                            trans_input = mt_cand.output[0]
+                            trans_output = new_tensor_name
+                        # fix tensor shapes for Resize and Transpose
+                        # n, c, h, w = model.get_tensor_shape(mt_cand.input[0])
+                        n, c, hx, wx = model.get_tensor_shape(mt_cand.output[0])
+                        model.set_tensor_shape(trans_input, (n, hx, wx, c))
+                        model.set_tensor_shape(trans_output, (n, c, hx, wx))
+                        # re-insert Transpose behind Resize
+                        new_transpose = oh.make_node(
+                            "Transpose",
+                            [trans_input],
+                            [trans_output],
+                            perm=[0, 3, 1, 2],
+                        )
+                        graph.node.insert(node_ind + 1, new_transpose)
+                        # rewire nodes
+                        final_t_cands = model.find_consumers(mt_cand.output[0])
+                        if final_t_cands is not None:
+                            # rewire next nodes' inputs
+                            for final_t_cand in final_t_cands:
+                                final_t_cand.input[0] = trans_output
+                        mt_cand.output[0] = trans_input
+                        graph_modified = True
+        if graph_modified:
+            model = model.transform(InferDataTypes())
+        return (model, graph_modified)
diff --git a/src/finn/transformation/streamline/collapse_repeated.py b/src/finn/transformation/streamline/collapse_repeated.py
index 50265046d94db1e7233a45b934fd68f08431a95d..92c48c84ffa1a161f623ef6b22caaeb92f4a8199 100644
--- a/src/finn/transformation/streamline/collapse_repeated.py
+++ b/src/finn/transformation/streamline/collapse_repeated.py
@@ -85,8 +85,8 @@ class CollapseRepeatedOp(Transformation):
                     # replace parameter value
                     model.set_initializer(new_node_param_name, new_param)
                     # be conservative with param/output DataTypes
-                    model.set_tensor_datatype(new_node_param_name, DataType.FLOAT32)
-                    model.set_tensor_datatype(end_name, DataType.FLOAT32)
+                    model.set_tensor_datatype(new_node_param_name, DataType["FLOAT32"])
+                    model.set_tensor_datatype(end_name, DataType["FLOAT32"])
                     # remove old nodes
                     graph.node.remove(n)
                     graph.node.remove(consumer)
diff --git a/src/finn/transformation/streamline/remove.py b/src/finn/transformation/streamline/remove.py
deleted file mode 100644
index 27e420a7936c2d9203150d2d682bf45e1aff0638..0000000000000000000000000000000000000000
--- a/src/finn/transformation/streamline/remove.py
+++ /dev/null
@@ -1,92 +0,0 @@
-# Copyright (c) 2020, Xilinx
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# * Redistributions of source code must retain the above copyright notice, this
-#   list of conditions and the following disclaimer.
-#
-# * Redistributions in binary form must reproduce the above copyright notice,
-#   this list of conditions and the following disclaimer in the documentation
-#   and/or other materials provided with the distribution.
-#
-# * Neither the name of FINN nor the names of its
-#   contributors may be used to endorse or promote products derived from
-#   this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
-import numpy as np
-
-from finn.transformation.base import Transformation
-from finn.transformation.infer_shapes import InferShapes
-
-
-def _remove_node_and_rewire(model, node):
-    producer = model.find_producer(node.input[0])
-    if producer is not None:
-        # wire output tensor to
-        # output of producer node
-        producer.output[0] = node.output[0]
-    else:
-        # node is first in graph
-        consumer = model.find_consumer(node.output[0])
-        assert consumer is not None, "Whole graph is identity"
-        assert consumer.input[0] == node.output[0]
-        # rewire consumer's input directly to graph input
-        consumer.input[0] = node.input[0]
-    # remove node
-    model.graph.node.remove(node)
-
-
-class RemoveIdentityOps(Transformation):
-    """Remove identity ops like Add/Sub with zero or Mul/Div with one. A tolerance
-    value (defaults to 1e-05) can be specified during init for the comparison
-    to zero/one."""
-
-    def __init__(self, atol=1e-05):
-        super().__init__()
-        self.atol = atol
-
-    def apply(self, model):
-        graph = model.graph
-        node_ind = 0
-        graph_modified = False
-        for n in graph.node:
-            node_ind += 1
-            if (
-                n.op_type in ["Add", "Sub"]
-                and not model.is_fork_node(n)
-                and not model.is_join_node(n)
-            ):
-                A = model.get_initializer(n.input[1])
-                if (
-                    A is not None
-                    and np.isclose(A, np.zeros_like(A), atol=self.atol).all()
-                ):
-                    _remove_node_and_rewire(model, n)
-
-            elif (
-                n.op_type in ["Mul", "Div"]
-                and not model.is_fork_node(n)
-                and not model.is_join_node(n)
-            ):
-                A = model.get_initializer(n.input[1])
-                if (
-                    A is not None
-                    and np.isclose(A, np.ones_like(A), atol=self.atol).all()
-                ):
-                    _remove_node_and_rewire(model, n)
-        model = model.transform(InferShapes())
-        return (model, graph_modified)
diff --git a/src/finn/transformation/streamline/reorder.py b/src/finn/transformation/streamline/reorder.py
index 1b22f474abe3f59ac91551efa3661b2612442776..0cdd6651d982426b1d81d7313346dcd899294bf7 100644
--- a/src/finn/transformation/streamline/reorder.py
+++ b/src/finn/transformation/streamline/reorder.py
@@ -408,16 +408,16 @@ class MoveMulPastDWConv(Transformation):
                         # rewire mul input to be conv input
                         conv_node.input[0] = start_name
                         model.set_tensor_shape(start_name, conv_in_shape)
-                        model.set_tensor_datatype(start_name, DataType.FLOAT32)
+                        model.set_tensor_datatype(start_name, DataType["FLOAT32"])
                         # use old conv input tensor as conv output
                         conv_node.output[0] = conv_in_name
                         model.set_tensor_shape(conv_in_name, conv_out_shape)
-                        model.set_tensor_datatype(conv_in_name, DataType.FLOAT32)
+                        model.set_tensor_datatype(conv_in_name, DataType["FLOAT32"])
                         # use new conv output as new mul node input
                         mul_node.input[0] = conv_in_name
                         # use old conv output as new mul node output
                         mul_node.output[0] = conv_out_name
-                        model.set_tensor_datatype(conv_out_name, DataType.FLOAT32)
+                        model.set_tensor_datatype(conv_out_name, DataType["FLOAT32"])
                         # move mul node past conv node
                         graph.node.remove(mul_node)
                         graph.node.insert(node_ind, mul_node)
@@ -482,16 +482,16 @@ class MoveMulPastMaxPool(Transformation):
                         # rewire mul input to be maxpool input
                         maxpool_node.input[0] = start_name
                         model.set_tensor_shape(start_name, maxpool_in_shape)
-                        model.set_tensor_datatype(start_name, DataType.FLOAT32)
+                        model.set_tensor_datatype(start_name, DataType["FLOAT32"])
                         # use old maxpool input tensor as maxpool output
                         maxpool_node.output[0] = maxpool_in_name
                         model.set_tensor_shape(maxpool_in_name, maxpool_out_shape)
-                        model.set_tensor_datatype(maxpool_in_name, DataType.FLOAT32)
+                        model.set_tensor_datatype(maxpool_in_name, DataType["FLOAT32"])
                         # use new maxpool output as new mul node input
                         mul_node.input[0] = maxpool_in_name
                         # use old maxpool output as new mul node output
                         mul_node.output[0] = maxpool_out_name
-                        model.set_tensor_datatype(maxpool_out_name, DataType.FLOAT32)
+                        model.set_tensor_datatype(maxpool_out_name, DataType["FLOAT32"])
                         # move mul node past maxpool node
                         graph.node.remove(mul_node)
                         graph.node.insert(node_ind, mul_node)
@@ -594,11 +594,17 @@ class MoveScalarLinearPastInvariants(Transformation):
         nodes = [n for n in graph.node]
         for n in nodes:
             node_ind += 1
+            is_nearest_neighbor_resample = False
+            if n.op_type == "Upsample" or n.op_type == "Resize":
+                # Extract mode and scales and input shape
+                mode = get_by_name(n.attribute, "mode").s.decode("ascii")
+                is_nearest_neighbor_resample = mode == "nearest"
             if (
                 n.op_type == "GlobalAveragePool"
                 or n.op_type == "Reshape"
                 or n.op_type == "Transpose"
                 or n.op_type == "Flatten"
+                or is_nearest_neighbor_resample
             ):
                 in0 = n.input[0]
                 if in0 is None:
@@ -617,6 +623,10 @@ class MoveScalarLinearPastInvariants(Transformation):
                     # if initializer is not scalar, skip
                     if np.prod(init0.shape) != 1:
                         continue
+                    # Flatten input if required
+                    if len(init0.shape) > 0:
+                        init0 = init0.flatten()[0]
+                        model.set_initializer(prod0.input[1], init0)
                     # move prod0 from input to output,
                     old_prod0_in = prod0.input[0]
                     old_prod0_out = prod0.output[0]
@@ -632,7 +642,7 @@ class MoveScalarLinearPastInvariants(Transformation):
                     model.set_tensor_shape(n.output[0], out_shape)
                     model.set_tensor_shape(prod0.output[0], out_shape)
                     model.set_tensor_datatype(prod0.output[0], scalar_op_odt)
-                    model.set_tensor_datatype(n.output[0], DataType.FLOAT32)
+                    model.set_tensor_datatype(n.output[0], DataType["FLOAT32"])
                     graph.node.remove(prod0)
                     graph.node.insert(node_ind - 1, prod0)
                     graph_modified = True
diff --git a/src/finn/transformation/streamline/sign_to_thres.py b/src/finn/transformation/streamline/sign_to_thres.py
index 13f2e8524af7ce2d3457d0637f1c6d02733f504b..61d7eb35430262b1ee90dfa478076fb6f7556612 100644
--- a/src/finn/transformation/streamline/sign_to_thres.py
+++ b/src/finn/transformation/streamline/sign_to_thres.py
@@ -69,6 +69,6 @@ class ConvertSignToThres(Transformation):
                 graph.node.insert(node_ind, mt_node)
                 graph.node.remove(n)
                 # add quantization annotations
-                model.set_tensor_datatype(sign_out_name, DataType.BIPOLAR)
+                model.set_tensor_datatype(sign_out_name, DataType["BIPOLAR"])
                 graph_modified = True
         return (model, graph_modified)
diff --git a/src/finn/util/create.py b/src/finn/util/create.py
index d9c5d7b1b59916edfc8730992535f3ddb57c4d60..62229a69b68c26dd191b3e1d4a44f1bb8b19ed07 100644
--- a/src/finn/util/create.py
+++ b/src/finn/util/create.py
@@ -49,10 +49,10 @@ def hls_random_mlp_maker(layer_spec):
             # no activation, produce accumulators
             T = None
             tdt = None
-            if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
-                odt = DataType.UINT32
+            if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]:
+                odt = DataType["UINT32"]
             else:
-                odt = DataType.INT32
+                odt = DataType["INT32"]
         else:
             odt = act
             (min, max) = calculate_signed_dot_prod_range(idt, wdt, mw)
@@ -61,13 +61,13 @@ def hls_random_mlp_maker(layer_spec):
             # provide non-decreasing thresholds
             T = np.sort(T, axis=1)
             # generate thresholds for activation
-            if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
-                tdt = DataType.UINT32
+            if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]:
+                tdt = DataType["UINT32"]
                 # bias thresholds to be positive
                 T = np.ceil((T + mw) / 2)
                 assert (T >= 0).all()
             else:
-                tdt = DataType.INT32
+                tdt = DataType["INT32"]
         lyr["T"] = T
         lyr["tdt"] = tdt
         lyr["odt"] = odt
@@ -120,11 +120,11 @@ def hls_mlp_maker(layer_spec):
         # StreamingFC:
         # - specify their datatypes as such
         # - specify their datatypes as BINARY as use binaryXnorMode
-        if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
+        if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]:
             # we'll internally convert weights/inputs to binary and specify the
             # datatypes as such, and also set the binaryXnorMode attribute to 1
-            export_wdt = DataType.BINARY
-            export_idt = DataType.BINARY
+            export_wdt = DataType["BINARY"]
+            export_idt = DataType["BINARY"]
             binary_xnor_mode = 1
         else:
             export_wdt = wdt
@@ -134,7 +134,7 @@ def hls_mlp_maker(layer_spec):
         if T is not None:
             no_act = 0
             node_inp_list = [current_in_name, current_W_name, current_T_name]
-            if odt == DataType.BIPOLAR:
+            if odt == DataType["BIPOLAR"]:
                 actval = 0
             else:
                 actval = odt.min()
diff --git a/tests/brevitas/test_brevitas_avg_pool_export.py b/tests/brevitas/test_brevitas_avg_pool_export.py
index 68e563da6351dad6e61d5a2d1ffcbfed9859d0f5..1b38914a83e7c5d68bb004df7545b518d6a93ddd 100644
--- a/tests/brevitas/test_brevitas_avg_pool_export.py
+++ b/tests/brevitas/test_brevitas_avg_pool_export.py
@@ -31,19 +31,23 @@ import numpy as np
 import os
 import torch
 from brevitas.export import FINNManager
+from brevitas.export.onnx.generic.manager import BrevitasONNXManager
 from brevitas.nn import QuantAvgPool2d
 from brevitas.quant_tensor import QuantTensor
+from qonnx.util.cleanup import cleanup as qonnx_cleanup
 
 import finn.core.onnx_exec as oxe
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.infer_datatypes import InferDataTypes
 from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
 from finn.util.basic import gen_finn_dt_tensor
 
-export_onnx_path = "test_brevitas_avg_pool_export.onnx"
+base_export_onnx_path = "test_brevitas_avg_pool_export.onnx"
 
 
+@pytest.mark.parametrize("QONNX_export", [False, True])
 @pytest.mark.parametrize("kernel_size", [2, 3])
 @pytest.mark.parametrize("stride", [1, 2])
 @pytest.mark.parametrize("signed", [True, False])
@@ -52,11 +56,23 @@ export_onnx_path = "test_brevitas_avg_pool_export.onnx"
 @pytest.mark.parametrize("channels", [2, 4])
 @pytest.mark.parametrize("idim", [7, 8])
 def test_brevitas_avg_pool_export(
-    kernel_size, stride, signed, bit_width, input_bit_width, channels, idim
+    kernel_size,
+    stride,
+    signed,
+    bit_width,
+    input_bit_width,
+    channels,
+    idim,
+    QONNX_export,
 ):
-
+    export_onnx_path = base_export_onnx_path.replace(
+        ".onnx", f"test_QONNX-{QONNX_export}.onnx"
+    )
     quant_avgpool = QuantAvgPool2d(
-        kernel_size=kernel_size, stride=stride, bit_width=bit_width
+        kernel_size=kernel_size,
+        stride=stride,
+        bit_width=bit_width,
+        return_quant_tensor=False,
     )
     quant_avgpool.eval()
 
@@ -69,31 +85,57 @@ def test_brevitas_avg_pool_export(
     # Brevitas QuantAvgPool layers need QuantTensors to export correctly
     # which requires setting up a QuantTensor instance with the scale
     # factor, zero point, bitwidth and signedness
-    scale_array = np.random.uniform(low=0, high=1, size=(1, channels, 1, 1)).astype(
-        np.float32
-    )
+    scale_array = np.ones((1, channels, 1, 1)).astype(np.float32)
+    scale_array *= 0.5
     input_tensor = torch.from_numpy(input_array * scale_array).float()
     scale_tensor = torch.from_numpy(scale_array).float()
     zp = torch.tensor(0.0)
     input_quant_tensor = QuantTensor(
-        input_tensor, scale_tensor, zp, input_bit_width, signed
+        input_tensor, scale_tensor, zp, input_bit_width, signed, training=False
     )
 
     # export
-    FINNManager.export(
-        quant_avgpool, export_path=export_onnx_path, input_t=input_quant_tensor
-    )
+    if QONNX_export:
+        BrevitasONNXManager.export(
+            quant_avgpool,
+            export_path=export_onnx_path,
+            input_t=input_quant_tensor,
+        )
+        model = ModelWrapper(export_onnx_path)
+
+        # Statically set the additional inputs generated by the BrevitasONNXManager
+        model.graph.input.remove(model.graph.input[3])
+        model.graph.input.remove(model.graph.input[2])
+        model.graph.input.remove(model.graph.input[1])
+        model.set_initializer("1", scale_array)
+        model.set_initializer("2", np.array(0.0).astype(np.float32))
+        model.set_initializer("3", np.array(input_bit_width).astype(np.float32))
+        model.save(export_onnx_path)
+
+        qonnx_cleanup(export_onnx_path, out_file=export_onnx_path)
+        model = ModelWrapper(export_onnx_path)
+        model = model.transform(ConvertQONNXtoFINN())
+        model.save(export_onnx_path)
+    else:
+        FINNManager.export(
+            quant_avgpool, export_path=export_onnx_path, input_t=input_quant_tensor
+        )
     model = ModelWrapper(export_onnx_path)
     model = model.transform(InferShapes())
     model = model.transform(InferDataTypes())
 
     # reference brevitas output
-    ref_output_array = quant_avgpool(input_quant_tensor).tensor.detach().numpy()
+    ref_output_array = quant_avgpool(input_quant_tensor).detach().numpy()
     # finn output
-    idict = {model.graph.input[0].name: input_array}
+    if QONNX_export:
+        # Manually apply the Quant tensor scaling for QONNX
+        idict = {model.graph.input[0].name: input_array * scale_array}
+    else:
+        idict = {model.graph.input[0].name: input_array}
     odict = oxe.execute_onnx(model, idict, True)
     finn_output = odict[model.graph.output[0].name]
     # compare outputs
     assert np.isclose(ref_output_array, finn_output).all()
     # cleanup
+    # assert False
     os.remove(export_onnx_path)
diff --git a/tests/brevitas/test_brevitas_cnv.py b/tests/brevitas/test_brevitas_cnv.py
index 8a1783ae9468244ad7e0999b59c3c7b696682dae..78ca361366902b37f826b575904126c783adbece 100644
--- a/tests/brevitas/test_brevitas_cnv.py
+++ b/tests/brevitas/test_brevitas_cnv.py
@@ -34,12 +34,15 @@ import brevitas.onnx as bo
 import numpy as np
 import os
 import torch
+from brevitas.export.onnx.generic.manager import BrevitasONNXManager
+from qonnx.util.cleanup import cleanup as qonnx_cleanup
 
 import finn.core.onnx_exec as oxe
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.fold_constants import FoldConstants
 from finn.transformation.general import GiveUniqueNodeNames, RemoveStaticGraphInputs
 from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
 from finn.util.test import get_test_model_trained
 
 export_onnx_path = "test_brevitas_cnv.onnx"
@@ -47,11 +50,20 @@ export_onnx_path = "test_brevitas_cnv.onnx"
 
 @pytest.mark.parametrize("abits", [1, 2])
 @pytest.mark.parametrize("wbits", [1, 2])
-def test_brevitas_cnv_export_exec(wbits, abits):
+@pytest.mark.parametrize("QONNX_export", [False, True])
+def test_brevitas_cnv_export_exec(wbits, abits, QONNX_export):
     if wbits > abits:
         pytest.skip("No wbits > abits cases at the moment")
     cnv = get_test_model_trained("CNV", wbits, abits)
-    bo.export_finn_onnx(cnv, (1, 3, 32, 32), export_onnx_path)
+    ishape = (1, 3, 32, 32)
+    if QONNX_export:
+        BrevitasONNXManager.export(cnv, ishape, export_onnx_path)
+        qonnx_cleanup(export_onnx_path, out_file=export_onnx_path)
+        model = ModelWrapper(export_onnx_path)
+        model = model.transform(ConvertQONNXtoFINN())
+        model.save(export_onnx_path)
+    else:
+        bo.export_finn_onnx(cnv, ishape, export_onnx_path)
     model = ModelWrapper(export_onnx_path)
     model = model.transform(GiveUniqueNodeNames())
     model = model.transform(InferShapes())
diff --git a/tests/brevitas/test_brevitas_debug.py b/tests/brevitas/test_brevitas_debug.py
index 4418368350b627644c76a7483c5c5dfaf031cda0..e42b93babefd9ca6a7a86def18a5cbb21d795c4c 100644
--- a/tests/brevitas/test_brevitas_debug.py
+++ b/tests/brevitas/test_brevitas_debug.py
@@ -26,38 +26,71 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import pytest
+
 import brevitas.onnx as bo
 import numpy as np
 import onnx
 import onnx.numpy_helper as nph
 import os
 import torch
+from brevitas.export.onnx.generic.manager import BrevitasONNXManager
 from pkgutil import get_data
+from qonnx.util.cleanup import cleanup as qonnx_cleanup
 
 import finn.core.onnx_exec as oxe
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.fold_constants import FoldConstants
 from finn.transformation.general import RemoveStaticGraphInputs
 from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
 from finn.util.test import get_test_model_trained
 
 
-def test_brevitas_debug():
+@pytest.mark.parametrize("QONNX_export", [False, True])
+@pytest.mark.parametrize("QONNX_FINN_conversion", [False, True])
+def test_brevitas_debug(QONNX_export, QONNX_FINN_conversion):
+    if (not QONNX_export) and QONNX_FINN_conversion:
+        pytest.skip("This test configuration is not valid and is thus skipped.")
     finn_onnx = "test_brevitas_debug.onnx"
     fc = get_test_model_trained("TFC", 2, 2)
-    dbg_hook = bo.enable_debug(fc)
-    bo.export_finn_onnx(fc, (1, 1, 28, 28), finn_onnx)
+    ishape = (1, 1, 28, 28)
+    if QONNX_export:
+        dbg_hook = bo.enable_debug(fc, proxy_level=True)
+        BrevitasONNXManager.export(fc, ishape, finn_onnx)
+        # DebugMarkers have the brevitas.onnx domain, so that needs adjusting
+        model = ModelWrapper(finn_onnx)
+        dbg_nodes = model.get_nodes_by_op_type("DebugMarker")
+        for dbg_node in dbg_nodes:
+            dbg_node.domain = "finn.custom_op.general"
+        model.save(finn_onnx)
+        qonnx_cleanup(finn_onnx, out_file=finn_onnx)
+        if QONNX_FINN_conversion:
+            model = ModelWrapper(finn_onnx)
+            model = model.transform(ConvertQONNXtoFINN())
+            model.save(finn_onnx)
+    else:
+        dbg_hook = bo.enable_debug(fc)
+        bo.export_finn_onnx(fc, ishape, finn_onnx)
+        model = ModelWrapper(finn_onnx)
+        # DebugMarkers have the brevitas.onnx domain, so that needs adjusting
+        # ToDo: We should probably have transformation pass, which does this
+        #  domain conversion for us?
+        dbg_nodes = model.get_nodes_by_op_type("DebugMarker")
+        for dbg_node in dbg_nodes:
+            dbg_node.domain = "finn.custom_op.general"
+        model = model.transform(InferShapes())
+        model = model.transform(FoldConstants())
+        model = model.transform(RemoveStaticGraphInputs())
+        model.save(finn_onnx)
     model = ModelWrapper(finn_onnx)
-    model = model.transform(InferShapes())
-    model = model.transform(FoldConstants())
-    model = model.transform(RemoveStaticGraphInputs())
     assert len(model.graph.input) == 1
     assert len(model.graph.output) == 1
     # load one of the test vectors
     raw_i = get_data("finn.data", "onnx/mnist-conv/test_data_set_0/input_0.pb")
     input_tensor = onnx.load_tensor_from_string(raw_i)
     # run using FINN-based execution
-    input_dict = {"0": nph.to_array(input_tensor)}
+    input_dict = {model.graph.input[0].name: nph.to_array(input_tensor)}
     output_dict = oxe.execute_onnx(model, input_dict, return_full_exec_context=True)
     produced = output_dict[model.graph.output[0].name]
     # run using PyTorch/Brevitas
@@ -70,9 +103,19 @@ def test_brevitas_debug():
     names_brevitas = set(dbg_hook.values.keys())
     names_finn = set(output_dict.keys())
     names_common = names_brevitas.intersection(names_finn)
-    assert len(names_common) == 16
+    # The different exports return debug markers in different numbers and places
+    print(len(names_common))
+    if QONNX_export and not QONNX_FINN_conversion:
+        assert len(names_common) == 12
+    elif QONNX_export and QONNX_FINN_conversion:
+        assert len(names_common) == 8
+    else:
+        assert len(names_common) == 16
     for dbg_name in names_common:
-        tensor_pytorch = dbg_hook.values[dbg_name].detach().numpy()
+        if QONNX_export:
+            tensor_pytorch = dbg_hook.values[dbg_name].value.detach().numpy()
+        else:
+            tensor_pytorch = dbg_hook.values[dbg_name].detach().numpy()
         tensor_finn = output_dict[dbg_name]
         assert np.isclose(tensor_finn, tensor_pytorch, atol=1e-5).all()
     os.remove(finn_onnx)
diff --git a/tests/brevitas/test_brevitas_fc.py b/tests/brevitas/test_brevitas_fc.py
index b280ab9e116f8b4735f31d16e08d8f1055470155..8e1e3de8d06b24ce946fb0a6726d875d0e75736e 100644
--- a/tests/brevitas/test_brevitas_fc.py
+++ b/tests/brevitas/test_brevitas_fc.py
@@ -33,13 +33,16 @@ import numpy as np
 import onnx
 import onnx.numpy_helper as nph
 import torch
+from brevitas.export.onnx.generic.manager import BrevitasONNXManager
 from pkgutil import get_data
+from qonnx.util.cleanup import cleanup as qonnx_cleanup
 
 import finn.core.onnx_exec as oxe
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.fold_constants import FoldConstants
 from finn.transformation.general import RemoveStaticGraphInputs
 from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
 from finn.util.basic import make_build_dir
 from finn.util.test import get_test_model_trained
 
@@ -52,15 +55,25 @@ export_onnx_path = make_build_dir("test_brevitas_fc_")
 @pytest.mark.parametrize("wbits", [1, 2])
 # network topology / size
 @pytest.mark.parametrize("size", ["TFC", "SFC", "LFC"])
-def test_brevitas_fc_onnx_export_and_exec(size, wbits, abits):
+# QONNX export
+@pytest.mark.parametrize("QONNX_export", [False, True])
+def test_brevitas_fc_onnx_export_and_exec(size, wbits, abits, QONNX_export):
     if size == "LFC" and wbits == 2 and abits == 2:
         pytest.skip("No LFC-w2a2 present at the moment")
     if wbits > abits:
         pytest.skip("No wbits > abits cases at the moment")
-    nname = "%s_%dW%dA" % (size, wbits, abits)
+    nname = "%s_%dW%dA_QONNX-%d" % (size, wbits, abits, QONNX_export)
     finn_onnx = export_onnx_path + "/%s.onnx" % nname
     fc = get_test_model_trained(size, wbits, abits)
-    bo.export_finn_onnx(fc, (1, 1, 28, 28), finn_onnx)
+    ishape = (1, 1, 28, 28)
+    if QONNX_export:
+        BrevitasONNXManager.export(fc, ishape, finn_onnx)
+        qonnx_cleanup(finn_onnx, out_file=finn_onnx)
+        model = ModelWrapper(finn_onnx)
+        model = model.transform(ConvertQONNXtoFINN())
+        model.save(finn_onnx)
+    else:
+        bo.export_finn_onnx(fc, ishape, finn_onnx)
     model = ModelWrapper(finn_onnx)
     model = model.transform(InferShapes())
     model = model.transform(FoldConstants())
@@ -71,7 +84,7 @@ def test_brevitas_fc_onnx_export_and_exec(size, wbits, abits):
     raw_i = get_data("finn.data", "onnx/mnist-conv/test_data_set_0/input_0.pb")
     input_tensor = onnx.load_tensor_from_string(raw_i)
     # run using FINN-based execution
-    input_dict = {"0": nph.to_array(input_tensor)}
+    input_dict = {model.graph.input[0].name: nph.to_array(input_tensor)}
     output_dict = oxe.execute_onnx(model, input_dict)
     produced = output_dict[list(output_dict.keys())[0]]
     # run using PyTorch/Brevitas
diff --git a/tests/brevitas/test_brevitas_mobilenet.py b/tests/brevitas/test_brevitas_mobilenet.py
index eb642adada9bd9abb8a328518770899d3da96ada..108c97c2e83b7f3ca9dd6ead746b3ef8b4d10af5 100644
--- a/tests/brevitas/test_brevitas_mobilenet.py
+++ b/tests/brevitas/test_brevitas_mobilenet.py
@@ -78,7 +78,9 @@ def test_brevitas_mobilenet():
     bo.export_finn_onnx(preproc, (1, 3, 224, 224), preproc_onnx)
     preproc_model = ModelWrapper(preproc_onnx)
     # set input finn datatype to UINT8
-    preproc_model.set_tensor_datatype(preproc_model.graph.input[0].name, DataType.UINT8)
+    preproc_model.set_tensor_datatype(
+        preproc_model.graph.input[0].name, DataType["UINT8"]
+    )
     preproc_model = preproc_model.transform(InferShapes())
     preproc_model = preproc_model.transform(GiveUniqueNodeNames())
     preproc_model = preproc_model.transform(GiveUniqueParameterTensors())
diff --git a/tests/brevitas/test_brevitas_non_scaled_QuantHardTanh_export.py b/tests/brevitas/test_brevitas_non_scaled_quanthardtanh_export.py
similarity index 82%
rename from tests/brevitas/test_brevitas_non_scaled_QuantHardTanh_export.py
rename to tests/brevitas/test_brevitas_non_scaled_quanthardtanh_export.py
index 6ddf71a5cba14916e3bcb13e65b1da2f4fddc63f..b530b4bd84c548319549a8b16e0c3a79584e075d 100644
--- a/tests/brevitas/test_brevitas_non_scaled_QuantHardTanh_export.py
+++ b/tests/brevitas/test_brevitas_non_scaled_quanthardtanh_export.py
@@ -36,11 +36,14 @@ import torch
 from brevitas.core.quant import QuantType
 from brevitas.core.restrict_val import RestrictValueType
 from brevitas.core.scaling import ScalingImplType
+from brevitas.export.onnx.generic.manager import BrevitasONNXManager
 from brevitas.nn import QuantHardTanh
+from qonnx.util.cleanup import cleanup as qonnx_cleanup
 
 import finn.core.onnx_exec as oxe
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
 
 export_onnx_path = "test_brevitas_non_scaled_QuantHardTanh_export.onnx"
 
@@ -48,7 +51,10 @@ export_onnx_path = "test_brevitas_non_scaled_QuantHardTanh_export.onnx"
 @pytest.mark.parametrize("abits", [1, 2, 4, 8])
 @pytest.mark.parametrize("narrow_range", [False, True])
 @pytest.mark.parametrize("max_val", [1.0, 1 - 2 ** (-7)])
-def test_brevitas_act_export_qhardtanh_nonscaled(abits, narrow_range, max_val):
+@pytest.mark.parametrize("QONNX_export", [False, True])
+def test_brevitas_act_export_qhardtanh_nonscaled(
+    abits, narrow_range, max_val, QONNX_export
+):
     def get_quant_type(bit_width):
         if bit_width is None:
             return QuantType.FP
@@ -69,7 +75,15 @@ def test_brevitas_act_export_qhardtanh_nonscaled(abits, narrow_range, max_val):
         scaling_impl_type=ScalingImplType.CONST,
         narrow_range=narrow_range,
     )
-    bo.export_finn_onnx(b_act, ishape, export_onnx_path)
+    if QONNX_export:
+        m_path = export_onnx_path
+        BrevitasONNXManager.export(b_act, ishape, m_path)
+        qonnx_cleanup(m_path, out_file=m_path)
+        model = ModelWrapper(m_path)
+        model = model.transform(ConvertQONNXtoFINN())
+        model.save(m_path)
+    else:
+        bo.export_finn_onnx(b_act, ishape, export_onnx_path)
     model = ModelWrapper(export_onnx_path)
     model = model.transform(InferShapes())
     inp_tensor = np.random.uniform(low=min_val, high=max_val, size=ishape).astype(
diff --git a/tests/brevitas/test_brevitas_QConv2d.py b/tests/brevitas/test_brevitas_qconv2d.py
similarity index 83%
rename from tests/brevitas/test_brevitas_QConv2d.py
rename to tests/brevitas/test_brevitas_qconv2d.py
index c1f790946bfa5f53194b96b1fea9c1722797a4a0..beaea4e51ecdd4cff9f0d4d0c16735cdecad207c 100644
--- a/tests/brevitas/test_brevitas_QConv2d.py
+++ b/tests/brevitas/test_brevitas_qconv2d.py
@@ -36,12 +36,15 @@ from brevitas.core.quant import QuantType
 from brevitas.core.restrict_val import RestrictValueType
 from brevitas.core.scaling import ScalingImplType
 from brevitas.core.stats import StatsOp
+from brevitas.export.onnx.generic.manager import BrevitasONNXManager
 from brevitas.nn import QuantConv2d
+from qonnx.util.cleanup import cleanup as qonnx_cleanup
 
 import finn.core.onnx_exec as oxe
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
 from finn.util.basic import gen_finn_dt_tensor
 
 export_onnx_path = "test_brevitas_conv.onnx"
@@ -50,7 +53,8 @@ export_onnx_path = "test_brevitas_conv.onnx"
 @pytest.mark.parametrize("dw", [False, True])
 @pytest.mark.parametrize("bias", [True, False])
 @pytest.mark.parametrize("in_channels", [32])
-def test_brevitas_QConv2d(dw, bias, in_channels):
+@pytest.mark.parametrize("QONNX_export", [False, True])
+def test_brevitas_QConv2d(dw, bias, in_channels, QONNX_export):
     ishape = (1, 32, 111, 111)
     if dw is True:
         groups = in_channels
@@ -86,10 +90,18 @@ def test_brevitas_QConv2d(dw, bias, in_channels):
         weight_narrow_range=True,
         weight_scaling_min_val=2e-16,
     )
-    weight_tensor = gen_finn_dt_tensor(DataType.INT4, w_shape)
+    weight_tensor = gen_finn_dt_tensor(DataType["INT4"], w_shape)
     b_conv.weight = torch.nn.Parameter(torch.from_numpy(weight_tensor).float())
     b_conv.eval()
-    bo.export_finn_onnx(b_conv, ishape, export_onnx_path)
+    if QONNX_export:
+        m_path = export_onnx_path
+        BrevitasONNXManager.export(b_conv, ishape, m_path)
+        qonnx_cleanup(m_path, out_file=m_path)
+        model = ModelWrapper(m_path)
+        model = model.transform(ConvertQONNXtoFINN())
+        model.save(m_path)
+    else:
+        bo.export_finn_onnx(b_conv, ishape, export_onnx_path)
     model = ModelWrapper(export_onnx_path)
     model = model.transform(InferShapes())
     inp_tensor = np.random.uniform(low=-1.0, high=1.0, size=ishape).astype(np.float32)
diff --git a/tests/brevitas/test_brevitas_qlinear.py b/tests/brevitas/test_brevitas_qlinear.py
index 873866b37727730b7cedd035f5edd93f7c1afe32..1099d3ec83336e5cd07707b35baea112b7a2aee6 100644
--- a/tests/brevitas/test_brevitas_qlinear.py
+++ b/tests/brevitas/test_brevitas_qlinear.py
@@ -33,12 +33,15 @@ import numpy as np
 import os
 import torch
 from brevitas.core.quant import QuantType
+from brevitas.export.onnx.generic.manager import BrevitasONNXManager
 from brevitas.nn import QuantLinear
+from qonnx.util.cleanup import cleanup as qonnx_cleanup
 
 import finn.core.onnx_exec as oxe
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
 from finn.util.basic import gen_finn_dt_tensor
 
 export_onnx_path = "test_brevitas_qlinear.onnx"
@@ -48,8 +51,11 @@ export_onnx_path = "test_brevitas_qlinear.onnx"
 @pytest.mark.parametrize("out_features", [4])
 @pytest.mark.parametrize("in_features", [3])
 @pytest.mark.parametrize("w_bits", [4])
-@pytest.mark.parametrize("i_dtype", [DataType.UINT4])
-def test_brevitas_qlinear(bias, out_features, in_features, w_bits, i_dtype):
+@pytest.mark.parametrize("i_dtype", [DataType["UINT4"]])
+@pytest.mark.parametrize("QONNX_export", [False, True])
+def test_brevitas_qlinear(
+    bias, out_features, in_features, w_bits, i_dtype, QONNX_export
+):
     i_shape = (1, in_features)
     w_shape = (out_features, in_features)
     b_linear = QuantLinear(
@@ -66,7 +72,15 @@ def test_brevitas_qlinear(bias, out_features, in_features, w_bits, i_dtype):
     )
     b_linear.weight.data = torch.from_numpy(weight_tensor_fp)
     b_linear.eval()
-    bo.export_finn_onnx(b_linear, i_shape, export_onnx_path)
+    if QONNX_export:
+        m_path = export_onnx_path
+        BrevitasONNXManager.export(b_linear, i_shape, m_path)
+        qonnx_cleanup(m_path, out_file=m_path)
+        model = ModelWrapper(m_path)
+        model = model.transform(ConvertQONNXtoFINN())
+        model.save(m_path)
+    else:
+        bo.export_finn_onnx(b_linear, i_shape, export_onnx_path)
     model = ModelWrapper(export_onnx_path)
     model = model.transform(InferShapes())
     inp_tensor = gen_finn_dt_tensor(i_dtype, i_shape)
diff --git a/tests/brevitas/test_brevitas_relu_act_export.py b/tests/brevitas/test_brevitas_relu_act_export.py
index bb59a8414feffbb8362de629f8b30ac200a5227f..57ead3b6c047220e90d4276620cc14b8f795fe08 100644
--- a/tests/brevitas/test_brevitas_relu_act_export.py
+++ b/tests/brevitas/test_brevitas_relu_act_export.py
@@ -36,11 +36,14 @@ import torch
 from brevitas.core.quant import QuantType
 from brevitas.core.restrict_val import RestrictValueType
 from brevitas.core.scaling import ScalingImplType
+from brevitas.export.onnx.generic.manager import BrevitasONNXManager
 from brevitas.nn import QuantReLU
+from qonnx.util.cleanup import cleanup as qonnx_cleanup
 
 import finn.core.onnx_exec as oxe
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
 
 export_onnx_path = "test_brevitas_relu_act_export.onnx"
 
@@ -50,7 +53,8 @@ export_onnx_path = "test_brevitas_relu_act_export.onnx"
 @pytest.mark.parametrize(
     "scaling_impl_type", [ScalingImplType.CONST, ScalingImplType.PARAMETER]
 )
-def test_brevitas_act_export_relu(abits, max_val, scaling_impl_type):
+@pytest.mark.parametrize("QONNX_export", [False, True])
+def test_brevitas_act_export_relu(abits, max_val, scaling_impl_type, QONNX_export):
     min_val = -1.0
     ishape = (1, 15)
 
@@ -71,8 +75,15 @@ scaling_impl.learned_value": torch.tensor(
             )
         }
         b_act.load_state_dict(checkpoint)
-
-    bo.export_finn_onnx(b_act, ishape, export_onnx_path)
+    if QONNX_export:
+        m_path = export_onnx_path
+        BrevitasONNXManager.export(b_act, ishape, m_path)
+        qonnx_cleanup(m_path, out_file=m_path)
+        model = ModelWrapper(m_path)
+        model = model.transform(ConvertQONNXtoFINN())
+        model.save(m_path)
+    else:
+        bo.export_finn_onnx(b_act, ishape, export_onnx_path)
     model = ModelWrapper(export_onnx_path)
     model = model.transform(InferShapes())
     inp_tensor = np.random.uniform(low=min_val, high=max_val, size=ishape).astype(
@@ -103,7 +114,10 @@ scaling_impl.learned_value": torch.tensor(
 @pytest.mark.parametrize("abits", [2, 4, 8])
 @pytest.mark.parametrize("max_val", [1.0, 1.5, 1 - 2 ** (-7)])
 @pytest.mark.parametrize("scaling_per_channel", [True, False])
-def test_brevitas_act_export_relu_imagenet(abits, max_val, scaling_per_channel):
+@pytest.mark.parametrize("QONNX_export", [False, True])
+def test_brevitas_act_export_relu_imagenet(
+    abits, max_val, scaling_per_channel, QONNX_export
+):
     out_channels = 32
     ishape = (1, out_channels, 1, 1)
     min_val = -1.0
@@ -115,7 +129,7 @@ def test_brevitas_act_export_relu_imagenet(abits, max_val, scaling_per_channel):
         restrict_scaling_type=RestrictValueType.LOG_FP,
         scaling_min_val=2e-16,
         max_val=6.0,
-        return_quant_tensor=True,
+        return_quant_tensor=False,
         per_channel_broadcastable_shape=(1, out_channels, 1, 1),
     )
     if scaling_per_channel is True:
@@ -129,7 +143,15 @@ scaling_impl.learned_value": rand_tensor.type(
         )
     }
     b_act.load_state_dict(checkpoint)
-    bo.export_finn_onnx(b_act, ishape, export_onnx_path)
+    if QONNX_export:
+        m_path = export_onnx_path
+        BrevitasONNXManager.export(b_act, ishape, m_path)
+        qonnx_cleanup(m_path, out_file=m_path)
+        model = ModelWrapper(m_path)
+        model = model.transform(ConvertQONNXtoFINN())
+        model.save(m_path)
+    else:
+        bo.export_finn_onnx(b_act, ishape, export_onnx_path)
     model = ModelWrapper(export_onnx_path)
     model = model.transform(InferShapes())
     inp_tensor = np.random.uniform(low=min_val, high=max_val, size=ishape).astype(
@@ -140,7 +162,7 @@ scaling_impl.learned_value": rand_tensor.type(
     produced = odict[model.graph.output[0].name]
     inp_tensor = torch.from_numpy(inp_tensor).float()
     b_act.eval()
-    expected = b_act.forward(inp_tensor).tensor.detach().numpy()
+    expected = b_act.forward(inp_tensor).detach().numpy()
     if not np.isclose(produced, expected, atol=1e-3).all():
         print(abits, max_val)
         print("scale: ", b_act.quant_act_scale().type(torch.FloatTensor).detach())
diff --git a/tests/brevitas/test_brevitas_scaled_QHardTanh_export.py b/tests/brevitas/test_brevitas_scaled_qhardtanh_export.py
similarity index 87%
rename from tests/brevitas/test_brevitas_scaled_QHardTanh_export.py
rename to tests/brevitas/test_brevitas_scaled_qhardtanh_export.py
index 345fae872119c75aa8e85cb5812c94dfc15bad7f..c6da2e2e971ee97cb73243284920cc87e8b4d7bb 100644
--- a/tests/brevitas/test_brevitas_scaled_QHardTanh_export.py
+++ b/tests/brevitas/test_brevitas_scaled_qhardtanh_export.py
@@ -36,11 +36,14 @@ import torch
 from brevitas.core.quant import QuantType
 from brevitas.core.restrict_val import RestrictValueType
 from brevitas.core.scaling import ScalingImplType
+from brevitas.export.onnx.generic.manager import BrevitasONNXManager
 from brevitas.nn import QuantHardTanh
+from qonnx.util.cleanup import cleanup as qonnx_cleanup
 
 import finn.core.onnx_exec as oxe
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
 
 export_onnx_path = "test_brevitas_scaled_QHardTanh_export.onnx"
 
@@ -52,8 +55,9 @@ export_onnx_path = "test_brevitas_scaled_QHardTanh_export.onnx"
 @pytest.mark.parametrize(
     "scaling_impl_type", [ScalingImplType.CONST, ScalingImplType.PARAMETER]
 )
+@pytest.mark.parametrize("QONNX_export", [False, True])
 def test_brevitas_act_export_qhardtanh_scaled(
-    abits, narrow_range, min_val, max_val, scaling_impl_type
+    abits, narrow_range, min_val, max_val, scaling_impl_type, QONNX_export
 ):
     def get_quant_type(bit_width):
         if bit_width is None:
@@ -84,8 +88,15 @@ tensor_quant.scaling_impl.learned_value": torch.tensor(
             )
         }
         b_act.load_state_dict(checkpoint)
-
-    bo.export_finn_onnx(b_act, ishape, export_onnx_path)
+    if QONNX_export:
+        m_path = export_onnx_path
+        BrevitasONNXManager.export(b_act, ishape, m_path)
+        qonnx_cleanup(m_path, out_file=m_path)
+        model = ModelWrapper(m_path)
+        model = model.transform(ConvertQONNXtoFINN())
+        model.save(m_path)
+    else:
+        bo.export_finn_onnx(b_act, ishape, export_onnx_path)
     model = ModelWrapper(export_onnx_path)
     model = model.transform(InferShapes())
     inp_tensor = np.random.uniform(low=min_val, high=max_val, size=ishape).astype(
diff --git a/tests/end2end/test_end2end_bnn_pynq.py b/tests/end2end/test_end2end_bnn_pynq.py
index 00a9fa721a320a8b70ee913e878955b9caddc3bf..1fddc7c1c26a0ba04d5849809ccf59b0a926a509 100644
--- a/tests/end2end/test_end2end_bnn_pynq.py
+++ b/tests/end2end/test_end2end_bnn_pynq.py
@@ -38,9 +38,11 @@ import os
 import subprocess
 import torch
 import warnings
+from brevitas.export.onnx.generic.manager import BrevitasONNXManager
 from collections import OrderedDict
 from dataset_loading import cifar, mnist
 from datetime import datetime
+from qonnx.util.cleanup import cleanup as qonnx_cleanup
 from scipy.stats import linregress
 
 import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
@@ -63,6 +65,7 @@ from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.insert_dwc import InsertDWC
 from finn.transformation.fpgadataflow.make_deployment import DeployToPYNQ
+from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver
 from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
@@ -81,6 +84,7 @@ from finn.transformation.insert_topk import InsertTopK
 from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
 from finn.transformation.merge_onnx_models import MergeONNXModels
 from finn.transformation.move_reshape import RemoveCNVtoFCFlatten
+from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
 from finn.transformation.streamline import Streamline
 from finn.transformation.streamline.reorder import (
     MakeMaxPoolNHWC,
@@ -103,8 +107,14 @@ mem_mode = "decoupled"
 rtlsim_trace = False
 
 
-def get_checkpoint_name(topology, wbits, abits, step):
-    return build_dir + "/end2end_%s_w%da%d_%s.onnx" % (topology, wbits, abits, step)
+def get_checkpoint_name(topology, wbits, abits, QONNX_export, step):
+    return build_dir + "/end2end_%s_w%da%d_QONNX-%d_%s.onnx" % (
+        topology,
+        wbits,
+        abits,
+        QONNX_export,
+        step,
+    )
 
 
 def get_dashboard_data(topology, wbits, abits):
@@ -302,15 +312,23 @@ def topology2dataset(topology):
 @pytest.mark.parametrize("wbits", [1, 2])
 @pytest.mark.parametrize("abits", [1, 2])
 @pytest.mark.parametrize("topology", ["lfc", "tfc", "cnv"])
+@pytest.mark.parametrize("QONNX_export", [False, True])
 class TestEnd2End:
-    def test_export(self, topology, wbits, abits):
+    def test_export(self, topology, wbits, abits, QONNX_export):
         if wbits > abits:
             pytest.skip("No wbits > abits end2end network configs for now")
         if topology == "lfc" and not (wbits == 1 and abits == 1):
             pytest.skip("Skipping certain lfc configs")
         (model, ishape) = get_trained_network_and_ishape(topology, wbits, abits)
-        chkpt_name = get_checkpoint_name(topology, wbits, abits, "export")
-        bo.export_finn_onnx(model, ishape, chkpt_name)
+        chkpt_name = get_checkpoint_name(topology, wbits, abits, QONNX_export, "export")
+        if QONNX_export:
+            BrevitasONNXManager.export(model, ishape, chkpt_name)
+            qonnx_cleanup(chkpt_name, out_file=chkpt_name)
+            model = ModelWrapper(chkpt_name)
+            model = model.transform(ConvertQONNXtoFINN())
+            model.save(chkpt_name)
+        else:
+            bo.export_finn_onnx(model, ishape, chkpt_name)
         nname = "%s_w%da%d" % (topology, wbits, abits)
         update_dashboard_data(topology, wbits, abits, "network", nname)
         dtstr = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
@@ -322,8 +340,10 @@ class TestEnd2End:
         update_dashboard_data(topology, wbits, abits, "finn-commit", finn_commit)
         assert os.path.isfile(chkpt_name)
 
-    def test_import_and_tidy(self, topology, wbits, abits):
-        prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "export")
+    def test_import_and_tidy(self, topology, wbits, abits, QONNX_export):
+        prev_chkpt_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "export"
+        )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)
         model = model.transform(InferShapes())
         model = model.transform(FoldConstants())
@@ -331,17 +351,23 @@ class TestEnd2End:
         model = model.transform(GiveReadableTensorNames())
         model = model.transform(InferDataTypes())
         model = model.transform(RemoveStaticGraphInputs())
-        chkpt = get_checkpoint_name(topology, wbits, abits, "import_and_tidy")
+        chkpt = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "import_and_tidy"
+        )
         model.save(chkpt)
 
-    def test_add_pre_and_postproc(self, topology, wbits, abits):
-        prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "import_and_tidy")
+    def test_add_pre_and_postproc(self, topology, wbits, abits, QONNX_export):
+        prev_chkpt_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "import_and_tidy"
+        )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)
         global_inp_name = model.graph.input[0].name
         ishape = model.get_tensor_shape(global_inp_name)
         # preprocessing: torchvision's ToTensor divides uint8 inputs by 255
         totensor_pyt = ToTensor()
-        chkpt_preproc_name = get_checkpoint_name(topology, wbits, abits, "preproc")
+        chkpt_preproc_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "preproc"
+        )
         bo.export_finn_onnx(totensor_pyt, ishape, chkpt_preproc_name)
         assert os.path.isfile(chkpt_preproc_name)
         # join preprocessing and core model
@@ -351,10 +377,12 @@ class TestEnd2End:
         model = model.transform(MergeONNXModels(pre_model))
         # add input quantization annotation: UINT8 for all BNN-PYNQ models
         global_inp_name = model.graph.input[0].name
-        model.set_tensor_datatype(global_inp_name, DataType.UINT8)
+        model.set_tensor_datatype(global_inp_name, DataType["UINT8"])
         # postprocessing: insert Top-1 node at the end
         model = model.transform(InsertTopK(k=1))
-        chkpt_name = get_checkpoint_name(topology, wbits, abits, "pre_post")
+        chkpt_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "pre_post"
+        )
         # tidy-up again
         model = model.transform(InferShapes())
         model = model.transform(FoldConstants())
@@ -365,8 +393,10 @@ class TestEnd2End:
         model.save(chkpt_name)
         assert os.path.isfile(chkpt_name)
 
-    def test_streamline(self, topology, wbits, abits):
-        prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "pre_post")
+    def test_streamline(self, topology, wbits, abits, QONNX_export):
+        prev_chkpt_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "pre_post"
+        )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)
         model = model.transform(absorb.AbsorbSignBiasIntoMultiThreshold())
         # move past any reshapes to be able to streamline input scaling
@@ -382,10 +412,14 @@ class TestEnd2End:
         model = model.transform(absorb.AbsorbScalarMulAddIntoTopK())
         model = model.transform(InferDataLayouts())
         model = model.transform(RemoveUnusedTensors())
-        model.save(get_checkpoint_name(topology, wbits, abits, "streamline"))
+        model.save(
+            get_checkpoint_name(topology, wbits, abits, QONNX_export, "streamline")
+        )
 
-    def test_convert_to_hls_layers(self, topology, wbits, abits):
-        prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "streamline")
+    def test_convert_to_hls_layers(self, topology, wbits, abits, QONNX_export):
+        prev_chkpt_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "streamline"
+        )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)
         if topology == "tfc" and wbits == 1 and abits == 1:
             # use standalone thresholds for tfc-w1a1 to also exercise that option
@@ -407,16 +441,55 @@ class TestEnd2End:
         model = model.transform(absorb.AbsorbConsecutiveTransposes())
         model = model.transform(GiveUniqueNodeNames())
         model = model.transform(InferDataLayouts())
-        model.save(get_checkpoint_name(topology, wbits, abits, "convert_to_hls_layers"))
+        model.save(
+            get_checkpoint_name(
+                topology, wbits, abits, QONNX_export, "convert_to_hls_layers"
+            )
+        )
+        exp_layer_counts = {
+            "tfc": [
+                ("Reshape", 1),
+                ("Thresholding_Batch", 1),
+                ("StreamingFCLayer_Batch", 4),
+                ("LabelSelect_Batch", 1),
+            ],
+            "tfc-1-1": [
+                ("Reshape", 1),
+                ("Thresholding_Batch", 4),
+                ("StreamingFCLayer_Batch", 4),
+                ("LabelSelect_Batch", 1),
+            ],
+            "lfc": [
+                ("Reshape", 1),
+                ("Thresholding_Batch", 1),
+                ("StreamingFCLayer_Batch", 4),
+                ("LabelSelect_Batch", 1),
+            ],
+            "cnv": [
+                ("Transpose", 1),
+                ("Thresholding_Batch", 1),
+                ("ConvolutionInputGenerator", 6),
+                ("StreamingFCLayer_Batch", 9),
+                ("StreamingMaxPool_Batch", 2),
+                ("LabelSelect_Batch", 1),
+            ],
+        }
+        if topology == "tfc" and wbits == 1 and abits == 1:
+            exp_key = "tfc-1-1"
+        else:
+            exp_key = topology
+        exp_layer_counts = exp_layer_counts[exp_key]
+        for (op_type, exp_count) in exp_layer_counts:
+            assert len(model.get_nodes_by_op_type(op_type)) == exp_count
 
-    def test_create_dataflow_partition(self, topology, wbits, abits):
+    def test_create_dataflow_partition(self, topology, wbits, abits, QONNX_export):
         prev_chkpt_name = get_checkpoint_name(
-            topology, wbits, abits, "convert_to_hls_layers"
+            topology, wbits, abits, QONNX_export, "convert_to_hls_layers"
         )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)
         parent_model = model.transform(CreateDataflowPartition())
         parent_model_chkpt = get_checkpoint_name(
-            topology, wbits, abits, "dataflow_parent"
+            topology, wbits, abits, QONNX_export, "dataflow_parent"
         )
         parent_model.save(parent_model_chkpt)
         sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
@@ -424,28 +497,36 @@ class TestEnd2End:
         dataflow_model_filename = sdp_node.get_nodeattr("model")
         dataflow_model = load_test_checkpoint_or_skip(dataflow_model_filename)
         dataflow_model_chkpt = get_checkpoint_name(
-            topology, wbits, abits, "dataflow_model"
+            topology, wbits, abits, QONNX_export, "dataflow_model"
         )
         dataflow_model.save(dataflow_model_chkpt)
 
-    def test_fold(self, topology, wbits, abits):
-        prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "dataflow_model")
+    def test_fold(self, topology, wbits, abits, QONNX_export):
+        prev_chkpt_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "dataflow_model"
+        )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)
         folding_fxn = get_folding_function(topology, wbits, abits)
         model = folding_fxn(model)
-        model.save(get_checkpoint_name(topology, wbits, abits, "fold"))
+        model.save(get_checkpoint_name(topology, wbits, abits, QONNX_export, "fold"))
 
     @pytest.mark.slow
     @pytest.mark.vivado
-    def test_cppsim(self, topology, wbits, abits):
-        prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "fold")
+    def test_cppsim(self, topology, wbits, abits, QONNX_export):
+        prev_chkpt_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "fold"
+        )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)
         model = model.transform(PrepareCppSim())
         model = model.transform(CompileCppSim())
         model = model.transform(SetExecMode("cppsim"))
-        cppsim_chkpt = get_checkpoint_name(topology, wbits, abits, "cppsim")
+        cppsim_chkpt = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "cppsim"
+        )
         model.save(cppsim_chkpt)
-        parent_chkpt = get_checkpoint_name(topology, wbits, abits, "dataflow_parent")
+        parent_chkpt = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "dataflow_parent"
+        )
         (input_tensor_npy, output_tensor_npy) = get_golden_io_pair(
             topology, wbits, abits, return_topk=1
         )
@@ -455,22 +536,28 @@ class TestEnd2End:
     @pytest.mark.slow
     @pytest.mark.vivado
     @pytest.mark.parametrize("kind", ["zynq", "alveo"])
-    def test_ipgen(self, topology, wbits, abits, kind):
+    def test_ipgen(self, topology, wbits, abits, QONNX_export, kind):
         if kind == "alveo" and ("VITIS_PATH" not in os.environ):
             pytest.skip("VITIS_PATH not set")
-        prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "fold")
+        prev_chkpt_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "fold"
+        )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)
         test_fpga_part = get_build_env(kind, target_clk_ns)["part"]
         model = model.transform(GiveUniqueNodeNames())
         model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
         model = model.transform(HLSSynthIP())
-        model.save(get_checkpoint_name(topology, wbits, abits, "ipgen_" + kind))
+        model.save(
+            get_checkpoint_name(topology, wbits, abits, QONNX_export, "ipgen_" + kind)
+        )
 
     @pytest.mark.slow
     @pytest.mark.vivado
     @pytest.mark.parametrize("kind", ["zynq", "alveo"])
-    def test_set_fifo_depths(self, topology, wbits, abits, kind):
-        prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "ipgen_" + kind)
+    def test_set_fifo_depths(self, topology, wbits, abits, QONNX_export, kind):
+        prev_chkpt_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "ipgen_" + kind
+        )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)
         test_fpga_part = get_build_env(kind, target_clk_ns)["part"]
         model = model.transform(InsertAndSetFIFODepths(test_fpga_part, target_clk_ns))
@@ -482,14 +569,18 @@ class TestEnd2End:
                 op_inst = getCustomOp(node)
                 assert op_inst.get_nodeattr("inFIFODepth") == 0
                 assert op_inst.get_nodeattr("outFIFODepth") == 0
-        model.save(get_checkpoint_name(topology, wbits, abits, "fifodepth_" + kind))
+        model.save(
+            get_checkpoint_name(
+                topology, wbits, abits, QONNX_export, "fifodepth_" + kind
+            )
+        )
 
     @pytest.mark.slow
     @pytest.mark.vivado
     @pytest.mark.parametrize("kind", ["zynq"])
-    def test_ipstitch_rtlsim(self, topology, wbits, abits, kind):
+    def test_ipstitch_rtlsim(self, topology, wbits, abits, QONNX_export, kind):
         prev_chkpt_name = get_checkpoint_name(
-            topology, wbits, abits, "fifodepth_" + kind
+            topology, wbits, abits, QONNX_export, "fifodepth_" + kind
         )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)
         test_fpga_part = get_build_env(kind, target_clk_ns)["part"]
@@ -513,54 +604,61 @@ class TestEnd2End:
             )
             os.environ["RTLSIM_TRACE_DEPTH"] = "3"
         rtlsim_chkpt = get_checkpoint_name(
-            topology, wbits, abits, "ipstitch_rtlsim_" + kind
+            topology, wbits, abits, QONNX_export, "ipstitch_rtlsim_" + kind
         )
         model.save(rtlsim_chkpt)
-        parent_chkpt = get_checkpoint_name(topology, wbits, abits, "dataflow_parent")
+        parent_chkpt = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "dataflow_parent"
+        )
         (input_tensor_npy, output_tensor_npy) = get_golden_io_pair(
             topology, wbits, abits, return_topk=1
         )
         y = execute_parent(parent_chkpt, rtlsim_chkpt, input_tensor_npy)
-        model = ModelWrapper(rtlsim_chkpt)
-        perf["cycles_rtlsim"] = model.get_metadata_prop("cycles_rtlsim")
-        # warnings.warn("Estimated & rtlsim performance: " + str(perf))
-        # for (k, v) in perf.items():
-        #    update_dashboard_data(topology, wbits, abits, k, v)
-        update_dashboard_data(
-            topology, wbits, abits, "cycles_rtlsim", perf["cycles_rtlsim"]
-        )
         assert np.isclose(y, output_tensor_npy).all()
 
     @pytest.mark.slow
     @pytest.mark.vivado
     @pytest.mark.parametrize("kind", ["zynq"])
-    def test_throughput_rtlsim(self, topology, wbits, abits, kind):
+    def test_throughput_rtlsim(self, topology, wbits, abits, QONNX_export, kind):
         prev_chkpt_name = get_checkpoint_name(
-            topology, wbits, abits, "ipstitch_rtlsim_" + kind
+            topology, wbits, abits, QONNX_export, "ipstitch_rtlsim_" + kind
         )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)
         n_nodes = len(model.graph.node)
         perf_est = model.analysis(dataflow_performance)
-        latency = int(model.get_metadata_prop("cycles_rtlsim"))
+        ret_b1 = throughput_test_rtlsim(model, batchsize=1)
+        latency = int(ret_b1["cycles"])
         cycles_per_sample_est = perf_est["max_cycles"]
         batchsize = 2 * n_nodes
         ret = throughput_test_rtlsim(model, batchsize=batchsize)
         res_cycles = ret["cycles"]
         est_cycles = latency + cycles_per_sample_est * batchsize
+        # warnings.warn("Estimated & rtlsim performance: " + str(perf))
+        # for (k, v) in perf.items():
+        #    update_dashboard_data(topology, wbits, abits, k, v)
+        update_dashboard_data(topology, wbits, abits, "cycles_rtlsim", latency)
         assert (abs(res_cycles - est_cycles) / res_cycles) < 0.15
 
     @pytest.mark.slow
     @pytest.mark.vivado
     @pytest.mark.parametrize("kind", ["zynq"])
-    def test_validate_top1(self, topology, wbits, abits, kind):
+    def test_validate_top1(self, topology, wbits, abits, QONNX_export, kind):
         if "TEST_END2END_VALIDATE_TOP1" not in os.environ:
             pytest.skip("TEST_END2END_VALIDATE_TOP1 not set")
-        prepostproc_chkpt = get_checkpoint_name(topology, wbits, abits, "pre_post")
-        streamline_chkpt = get_checkpoint_name(topology, wbits, abits, "streamline")
-        parent_chkpt = get_checkpoint_name(topology, wbits, abits, "dataflow_parent")
-        cppsim_chkpt = get_checkpoint_name(topology, wbits, abits, "cppsim")
+        prepostproc_chkpt = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "pre_post"
+        )
+        streamline_chkpt = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "streamline"
+        )
+        parent_chkpt = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "dataflow_parent"
+        )
+        cppsim_chkpt = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "cppsim"
+        )
         rtlsim_chkpt = get_checkpoint_name(
-            topology, wbits, abits, "ipstitch_rtlsim_" + kind
+            topology, wbits, abits, QONNX_export, "ipstitch_rtlsim_" + kind
         )
         dataset = topology2dataset(topology)
         assert measure_top1_accuracy(prepostproc_chkpt, dataset) > 80
@@ -572,11 +670,11 @@ class TestEnd2End:
     @pytest.mark.vivado
     @pytest.mark.vitis
     @pytest.mark.parametrize("kind", ["zynq", "alveo"])
-    def test_build(self, topology, wbits, abits, kind):
+    def test_build(self, topology, wbits, abits, QONNX_export, kind):
         if kind == "alveo" and ("VITIS_PATH" not in os.environ):
             pytest.skip("VITIS_PATH not set")
         prev_chkpt_name = get_checkpoint_name(
-            topology, wbits, abits, "fifodepth_" + kind
+            topology, wbits, abits, QONNX_export, "fifodepth_" + kind
         )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)
         cfg = get_build_env(kind, target_clk_ns)
@@ -586,11 +684,32 @@ class TestEnd2End:
         for (k, v) in synth_dct.items():
             update_dashboard_data(topology, wbits, abits, k, v)
         update_dashboard_data(topology, wbits, abits, "board", cfg["board"])
-        model.save(get_checkpoint_name(topology, wbits, abits, "build_" + kind))
+        model.save(
+            get_checkpoint_name(topology, wbits, abits, QONNX_export, "build_" + kind)
+        )
 
+    @pytest.mark.slow
+    @pytest.mark.vivado
+    @pytest.mark.vitis
     @pytest.mark.parametrize("kind", ["zynq", "alveo"])
-    def test_deploy(self, topology, wbits, abits, kind):
-        prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "build_" + kind)
+    def test_make_pynq_driver(self, topology, wbits, abits, QONNX_export, kind):
+        if kind == "alveo" and ("VITIS_PATH" not in os.environ):
+            pytest.skip("VITIS_PATH not set")
+        prev_chkpt_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "build_" + kind
+        )
+        model = load_test_checkpoint_or_skip(prev_chkpt_name)
+        kind_to_driver_platform = {"zynq": "zynq-iodma", "alveo": "alveo"}
+        model = model.transform(MakePYNQDriver(kind_to_driver_platform[kind]))
+        model.save(
+            get_checkpoint_name(topology, wbits, abits, QONNX_export, "driver_" + kind)
+        )
+
+    @pytest.mark.parametrize("kind", ["zynq", "alveo"])
+    def test_deploy(self, topology, wbits, abits, QONNX_export, kind):
+        prev_chkpt_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "driver_" + kind
+        )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)
         cfg = get_build_env(kind, target_clk_ns)
         if cfg["ip"] == "":
@@ -605,11 +724,15 @@ class TestEnd2End:
             )
         )
         # save the model to be able to link it to the parent
-        model.save(get_checkpoint_name(topology, wbits, abits, "deploy_" + kind))
+        model.save(
+            get_checkpoint_name(topology, wbits, abits, QONNX_export, "deploy_" + kind)
+        )
 
     @pytest.mark.parametrize("kind", ["zynq", "alveo"])
-    def test_run_on_hw(self, topology, wbits, abits, kind):
-        prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "deploy_" + kind)
+    def test_run_on_hw(self, topology, wbits, abits, QONNX_export, kind):
+        prev_chkpt_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "deploy_" + kind
+        )
         model = load_test_checkpoint_or_skip(prev_chkpt_name)  # NOQA
         cfg = get_build_env(kind, target_clk_ns)
         if cfg["ip"] == "":
@@ -618,7 +741,7 @@ class TestEnd2End:
             topology, wbits, abits, return_topk=1
         )
         parent_model = load_test_checkpoint_or_skip(
-            get_checkpoint_name(topology, wbits, abits, "dataflow_parent")
+            get_checkpoint_name(topology, wbits, abits, QONNX_export, "dataflow_parent")
         )
         iname = parent_model.graph.input[0].name
         oname = parent_model.graph.output[0].name
@@ -630,8 +753,10 @@ class TestEnd2End:
         assert np.isclose(y, output_tensor_npy).all()
 
     @pytest.mark.parametrize("kind", ["zynq", "alveo"])
-    def test_throughput_hw(self, topology, wbits, abits, kind):
-        prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "deploy_" + kind)
+    def test_throughput_hw(self, topology, wbits, abits, QONNX_export, kind):
+        prev_chkpt_name = get_checkpoint_name(
+            topology, wbits, abits, QONNX_export, "deploy_" + kind
+        )
         end2end_example = "%s_w%da%d_%s" % (topology, wbits, abits, kind)
         model = load_test_checkpoint_or_skip(prev_chkpt_name)  # NOQA
         cfg = get_build_env(kind, target_clk_ns)
@@ -687,9 +812,13 @@ class TestEnd2End:
             ret[largest_bsize]["throughput[images/s]"],
         )
 
-    def test_upload_results_to_dashboard(self, topology, wbits, abits):
-        dashboard_data = get_dashboard_data(topology, wbits, abits)
-        if len(dashboard_data.keys()) > 0:
-            upload_to_end2end_dashboard(dashboard_data)
+    def test_upload_results_to_dashboard(self, topology, wbits, abits, QONNX_export):
+        # ToDo: Extend the dashboard to also upload QONNX exported models?
+        if QONNX_export:
+            pytest.skip("Dashboard data upload is disabled for QONNX exported models.")
         else:
-            pytest.skip("No data to upload to dashboard")
+            dashboard_data = get_dashboard_data(topology, wbits, abits)
+            if len(dashboard_data.keys()) > 0:
+                upload_to_end2end_dashboard(dashboard_data)
+            else:
+                pytest.skip("No data to upload to dashboard")
diff --git a/tests/end2end/test_end2end_cybsec_mlp.py b/tests/end2end/test_end2end_cybsec_mlp.py
index 7b4cebb52b3e4758746d4054827c6f96e8a4d681..e24d87ca6a505de7d0ed50b01157092eb0a26525 100644
--- a/tests/end2end/test_end2end_cybsec_mlp.py
+++ b/tests/end2end/test_end2end_cybsec_mlp.py
@@ -40,13 +40,16 @@ import torch
 import torch.nn as nn
 import wget
 from brevitas.core.quant import QuantType
+from brevitas.export.onnx.generic.manager import BrevitasONNXManager
 from brevitas.nn import QuantIdentity, QuantLinear, QuantReLU
 from brevitas.quant_tensor import QuantTensor
+from qonnx.util.cleanup import cleanup as qonnx_cleanup
 
 import finn.builder.build_dataflow as build
 import finn.builder.build_dataflow_config as build_cfg
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
+from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
 from finn.util.basic import make_build_dir
 from finn.util.test import get_build_env, load_test_checkpoint_or_skip
 
@@ -55,13 +58,13 @@ build_kind = "zynq"
 build_dir = os.environ["FINN_BUILD_DIR"]
 
 
-def get_checkpoint_name(step):
+def get_checkpoint_name(step, QONNX_export):
     if step == "build":
         # checkpoint for build step is an entire dir
-        return build_dir + "/end2end_cybsecmlp_build"
+        return build_dir + "/end2end_cybsecmlp_build_QONNX-%d" % (QONNX_export)
     else:
         # other checkpoints are onnx files
-        return build_dir + "/end2end_cybsecmlp_%s.onnx" % (step)
+        return build_dir + "/end2end_cybsecmlp_QONNX-%d_%s.onnx" % (QONNX_export, step)
 
 
 class CybSecMLPForExport(nn.Module):
@@ -82,7 +85,8 @@ class CybSecMLPForExport(nn.Module):
         return out_final
 
 
-def test_end2end_cybsec_mlp_export():
+@pytest.mark.parametrize("QONNX_export", [False, True])
+def test_end2end_cybsec_mlp_export(QONNX_export):
     assets_dir = pk.resource_filename("finn.qnn-data", "cybsec-mlp/")
     # load up trained net in Brevitas
     input_size = 593
@@ -116,7 +120,7 @@ def test_end2end_cybsec_mlp_export():
     W_new = np.pad(W_orig, [(0, 0), (0, 7)])
     model[0].weight.data = torch.from_numpy(W_new)
     model_for_export = CybSecMLPForExport(model)
-    export_onnx_path = get_checkpoint_name("export")
+    export_onnx_path = get_checkpoint_name("export", QONNX_export)
     input_shape = (1, 600)
     # create a QuantTensor instance to mark the input as bipolar during export
     input_a = np.random.randint(0, 1, size=input_shape).astype(np.float32)
@@ -127,32 +131,61 @@ def test_end2end_cybsec_mlp_export():
         input_t, scale=torch.tensor(scale), bit_width=torch.tensor(1.0), signed=True
     )
 
-    bo.export_finn_onnx(
-        model_for_export, export_path=export_onnx_path, input_t=input_qt
-    )
+    if QONNX_export:
+        # With the BrevitasONNXManager we need to manually set
+        # the FINN DataType at the input
+        BrevitasONNXManager.export(
+            model_for_export, input_shape, export_path=export_onnx_path
+        )
+        model = ModelWrapper(export_onnx_path)
+        model.set_tensor_datatype(model.graph.input[0].name, DataType["BIPOLAR"])
+        model.save(export_onnx_path)
+        qonnx_cleanup(export_onnx_path, out_file=export_onnx_path)
+        model = ModelWrapper(export_onnx_path)
+        model = model.transform(ConvertQONNXtoFINN())
+        model.save(export_onnx_path)
+    else:
+        bo.export_finn_onnx(
+            model_for_export, export_path=export_onnx_path, input_t=input_qt
+        )
     assert os.path.isfile(export_onnx_path)
     # fix input datatype
     finn_model = ModelWrapper(export_onnx_path)
     finnonnx_in_tensor_name = finn_model.graph.input[0].name
     assert tuple(finn_model.get_tensor_shape(finnonnx_in_tensor_name)) == (1, 600)
     # verify a few exported ops
-    assert finn_model.graph.node[1].op_type == "Add"
-    assert finn_model.graph.node[2].op_type == "Div"
-    assert finn_model.graph.node[3].op_type == "MatMul"
-    assert finn_model.graph.node[-1].op_type == "MultiThreshold"
+    if QONNX_export:
+        # The first "Mul" node doesn't exist in the QONNX export,
+        # because the QuantTensor scale is not exported.
+        # However, this node would have been unity scale anyways and
+        # the models are still equivalent.
+        assert finn_model.graph.node[0].op_type == "Add"
+        assert finn_model.graph.node[1].op_type == "Div"
+        assert finn_model.graph.node[2].op_type == "MatMul"
+        assert finn_model.graph.node[-1].op_type == "MultiThreshold"
+    else:
+        assert finn_model.graph.node[0].op_type == "Mul"
+        assert finn_model.get_initializer(finn_model.graph.node[0].input[1]) == 1.0
+        assert finn_model.graph.node[1].op_type == "Add"
+        assert finn_model.graph.node[2].op_type == "Div"
+        assert finn_model.graph.node[3].op_type == "MatMul"
+        assert finn_model.graph.node[-1].op_type == "MultiThreshold"
     # verify datatypes on some tensors
-    assert finn_model.get_tensor_datatype(finnonnx_in_tensor_name) == DataType.BIPOLAR
-    first_matmul_w_name = finn_model.graph.node[3].input[1]
-    assert finn_model.get_tensor_datatype(first_matmul_w_name) == DataType.INT2
+    assert (
+        finn_model.get_tensor_datatype(finnonnx_in_tensor_name) == DataType["BIPOLAR"]
+    )
+    first_matmul_w_name = finn_model.get_nodes_by_op_type("MatMul")[0].input[1]
+    assert finn_model.get_tensor_datatype(first_matmul_w_name) == DataType["INT2"]
 
 
 @pytest.mark.slow
 @pytest.mark.vivado
-def test_end2end_cybsec_mlp_build():
-    model_file = get_checkpoint_name("export")
+@pytest.mark.parametrize("QONNX_export", [False, True])
+def test_end2end_cybsec_mlp_build(QONNX_export):
+    model_file = get_checkpoint_name("export", QONNX_export)
     load_test_checkpoint_or_skip(model_file)
     build_env = get_build_env(build_kind, target_clk_ns)
-    output_dir = make_build_dir("test_end2end_cybsec_mlp_build")
+    output_dir = make_build_dir(f"test_end2end_cybsec_mlp_build_QONNX-{QONNX_export}")
 
     cfg = build.DataflowBuildConfig(
         output_dir=output_dir,
@@ -190,13 +223,14 @@ def test_end2end_cybsec_mlp_build():
         est_res_dict = json.load(f)
         assert est_res_dict["total"]["LUT"] == 11360.0
         assert est_res_dict["total"]["BRAM_18K"] == 36.0
-    shutil.copytree(output_dir + "/deploy", get_checkpoint_name("build"))
+    shutil.copytree(output_dir + "/deploy", get_checkpoint_name("build", QONNX_export))
 
 
-def test_end2end_cybsec_mlp_run_on_hw():
+@pytest.mark.parametrize("QONNX_export", [False, True])
+def test_end2end_cybsec_mlp_run_on_hw(QONNX_export):
     build_env = get_build_env(build_kind, target_clk_ns)
     assets_dir = pk.resource_filename("finn.qnn-data", "cybsec-mlp/")
-    deploy_dir = get_checkpoint_name("build")
+    deploy_dir = get_checkpoint_name("build", QONNX_export)
     if not os.path.isdir(deploy_dir):
         pytest.skip(deploy_dir + " not found from previous test step, skipping")
     driver_dir = deploy_dir + "/driver"
diff --git a/tests/end2end/test_end2end_mobilenet_v1.py b/tests/end2end/test_end2end_mobilenet_v1.py
index 1289b02636f030397075a9f580ed0977cd465a88..e459bfbc3e694d5bbc9698db562765b11f6e8c38 100644
--- a/tests/end2end/test_end2end_mobilenet_v1.py
+++ b/tests/end2end/test_end2end_mobilenet_v1.py
@@ -62,9 +62,9 @@ from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.insert_topk import InsertTopK
 from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
 from finn.transformation.merge_onnx_models import MergeONNXModels
+from finn.transformation.remove import RemoveIdentityOps
 from finn.transformation.streamline import Streamline
 from finn.transformation.streamline.collapse_repeated import CollapseRepeatedMul
-from finn.transformation.streamline.remove import RemoveIdentityOps
 from finn.transformation.streamline.round_thresholds import RoundAndClipThresholds
 from finn.util.basic import alveo_default_platform, alveo_part_map
 from finn.util.pytorch import NormalizePreProc
@@ -97,7 +97,9 @@ def test_end2end_mobilenet_export():
     bo.export_finn_onnx(preproc, (1, 3, 224, 224), preproc_onnx)
     preproc_model = ModelWrapper(preproc_onnx)
     # set input finn datatype to UINT8
-    preproc_model.set_tensor_datatype(preproc_model.graph.input[0].name, DataType.UINT8)
+    preproc_model.set_tensor_datatype(
+        preproc_model.graph.input[0].name, DataType["UINT8"]
+    )
     preproc_model = preproc_model.transform(InferShapes())
     preproc_model = preproc_model.transform(FoldConstants())
     preproc_model = preproc_model.transform(GiveUniqueNodeNames())
@@ -198,6 +200,7 @@ def test_end2end_mobilenet_lowering():
     )
     model = model.transform(LowerConvsToMatMul())
     model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
+    model = model.transform(absorb.AbsorbConsecutiveTransposes())
     model = model.transform(GiveUniqueNodeNames())
     model = model.transform(GiveReadableTensorNames())
     model = model.transform(InferDataTypes())
diff --git a/tests/fpgadataflow/test_code_gen_trafo.py b/tests/fpgadataflow/test_code_gen_trafo.py
index 89fab37d6d5225383ccb13a748c83573d6ee4516..5ddff3d36f03d17833e17bc98649a64dabf31577 100644
--- a/tests/fpgadataflow/test_code_gen_trafo.py
+++ b/tests/fpgadataflow/test_code_gen_trafo.py
@@ -39,7 +39,7 @@ from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 
 @pytest.mark.vivado
 def test_code_gen_trafo():
-    idt = wdt = odt = DataType.BIPOLAR
+    idt = wdt = odt = DataType["BIPOLAR"]
     mw = 8
     mh = 8
     pe = 4
diff --git a/tests/fpgadataflow/test_compilation_trafo.py b/tests/fpgadataflow/test_compilation_trafo.py
index 6284748b9ccdc422b42bd9e301eb395d8dd1ad45..81e2ff9a7c5829982cdb6121378e9e9e3af81632 100644
--- a/tests/fpgadataflow/test_compilation_trafo.py
+++ b/tests/fpgadataflow/test_compilation_trafo.py
@@ -40,7 +40,7 @@ from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 
 @pytest.mark.vivado
 def test_compilation_trafo():
-    idt = wdt = odt = DataType.BIPOLAR
+    idt = wdt = odt = DataType["BIPOLAR"]
     mw = 8
     mh = 8
     pe = 4
diff --git a/tests/fpgadataflow/test_convert_to_hls_1d_conv_layer.py b/tests/fpgadataflow/test_convert_to_hls_1d_conv_layer.py
index 4e7030449c87b81d7a492b0e76dd05a047be3858..5cc5f8fa6c1ccd3e5a9e154b6fb2773caf4668a9 100644
--- a/tests/fpgadataflow/test_convert_to_hls_1d_conv_layer.py
+++ b/tests/fpgadataflow/test_convert_to_hls_1d_conv_layer.py
@@ -72,7 +72,7 @@ from finn.util.basic import gen_finn_dt_tensor
 def test_convert_to_hls_1d_conv_layer(conv_config, depthwise, exec_mode):
     pad, kernel_size, stride, dilation = conv_config
     np.random.seed(0)
-    idt = DataType.UINT4
+    idt = DataType["UINT4"]
 
     in_feature_dim_h, in_feature_dim_w = [10, 1]
     in_chn = 16
@@ -101,7 +101,7 @@ def test_convert_to_hls_1d_conv_layer(conv_config, depthwise, exec_mode):
     input_shape = [1, in_chn, in_feature_dim_h, in_feature_dim_w]
     output_shape = [1, out_chn, out_feature_dim_h, out_feature_dim_w]
 
-    conv_weight_dt = DataType.UINT4
+    conv_weight_dt = DataType["UINT4"]
 
     conv_config = {}
     conv_config["dilations"] = [dilation_h, dilation_w]
diff --git a/tests/fpgadataflow/test_convert_to_hls_channelwise_layer.py b/tests/fpgadataflow/test_convert_to_hls_channelwise_layer.py
index 8dd927fa7628d1500fe644b030278fbaa3f18810..bf690d1d68bc0f580663735c3596c1dfc0a651e8 100644
--- a/tests/fpgadataflow/test_convert_to_hls_channelwise_layer.py
+++ b/tests/fpgadataflow/test_convert_to_hls_channelwise_layer.py
@@ -76,9 +76,13 @@ def make_single_maxpool_modelwrapper(onnx_op_name, ishape, idt, pdt, pshape):
 
 
 # parameter datatype
-@pytest.mark.parametrize("pdt", [DataType.BIPOLAR, DataType.UINT4, DataType.INT2])
+@pytest.mark.parametrize(
+    "pdt", [DataType["BIPOLAR"], DataType["UINT4"], DataType["INT2"]]
+)
 # input datatype
-@pytest.mark.parametrize("idt", [DataType.INT32, DataType.UINT4, DataType.INT4])
+@pytest.mark.parametrize(
+    "idt", [DataType["INT32"], DataType["UINT4"], DataType["INT4"]]
+)
 # function
 @pytest.mark.parametrize("onnx_op_name", ["Add", "Mul"])
 # vector parameter or scalar parameter (broadcast)
@@ -103,10 +107,10 @@ def test_convert_to_hls_channelwise_layer(
 
     # Since the aren't Data types with a bit width of a non power of 2,
     # there are cases where the input won't use it full range.
-    if idt == DataType.INT32:
-        x = gen_finn_dt_tensor(DataType.INT16, (1, ifm_ch, ifm_dim, ifm_dim))
-    elif idt == DataType.UINT32:
-        x = gen_finn_dt_tensor(DataType.UINT16, (1, ifm_ch, ifm_dim, ifm_dim))
+    if idt == DataType["INT32"]:
+        x = gen_finn_dt_tensor(DataType["INT16"], (1, ifm_ch, ifm_dim, ifm_dim))
+    elif idt == DataType["UINT32"]:
+        x = gen_finn_dt_tensor(DataType["UINT16"], (1, ifm_ch, ifm_dim, ifm_dim))
     else:
         x = gen_finn_dt_tensor(idt, (1, ifm_ch, ifm_dim, ifm_dim))
 
diff --git a/tests/fpgadataflow/test_convert_to_hls_conv_fc_transition.py b/tests/fpgadataflow/test_convert_to_hls_conv_fc_transition.py
index cf2903a5789d7d3892ac549338b274268c1661b3..9b0f3d68aed655f0b36857d50a085093ea94aecb 100755
--- a/tests/fpgadataflow/test_convert_to_hls_conv_fc_transition.py
+++ b/tests/fpgadataflow/test_convert_to_hls_conv_fc_transition.py
@@ -79,10 +79,10 @@ def get_multithreshold_rand_params(channels, num_of_thres, seed=None):
 @pytest.mark.slow
 def test_convert_to_hls_conv_fc_transition(conv_config, depthwise, use_reshape):
     np.random.seed(0)
-    idt = DataType.UINT4
-    odt = DataType.UINT4
-    conv_weight_dt = DataType.INT4
-    fc_weight_dt = DataType.INT4
+    idt = DataType["UINT4"]
+    odt = DataType["UINT4"]
+    conv_weight_dt = DataType["INT4"]
+    fc_weight_dt = DataType["INT4"]
 
     input_shape, kernel_shape, stride, pad = conv_config
     kernel_size_h, kernel_size_w = kernel_shape
@@ -186,8 +186,8 @@ def test_convert_to_hls_conv_fc_transition(conv_config, depthwise, use_reshape):
     model.set_tensor_datatype("global_out", odt)
     model.set_tensor_datatype("conv_param", conv_weight_dt)
     model.set_tensor_datatype("matmul_param", fc_weight_dt)
-    model.set_tensor_datatype("thres1_param", DataType.INT32)
-    model.set_tensor_datatype("thres2_param", DataType.INT32)
+    model.set_tensor_datatype("thres1_param", DataType["INT32"])
+    model.set_tensor_datatype("thres2_param", DataType["INT32"])
 
     model.set_initializer(
         "conv_param", gen_finn_dt_tensor(conv_weight_dt, conv_param_shape)
diff --git a/tests/fpgadataflow/test_convert_to_hls_conv_layer.py b/tests/fpgadataflow/test_convert_to_hls_conv_layer.py
index deca7c96127fdf03d9feb7504d5a6daebb41a5d5..d96bc987567cdcfcd18a404986c954c7527c7354 100644
--- a/tests/fpgadataflow/test_convert_to_hls_conv_layer.py
+++ b/tests/fpgadataflow/test_convert_to_hls_conv_layer.py
@@ -63,7 +63,7 @@ from finn.util.basic import gen_finn_dt_tensor
 def test_convert_to_hls_conv_layer(conv_config, depthwise, exec_mode):
     kernel_size, stride, pad = conv_config
     np.random.seed(0)
-    idt = DataType.UINT4
+    idt = DataType["UINT4"]
 
     in_feature_dim = 7
     in_chn = 16
@@ -84,7 +84,7 @@ def test_convert_to_hls_conv_layer(conv_config, depthwise, exec_mode):
     input_shape = [1, in_chn, in_feature_dim, in_feature_dim]
     output_shape = [1, out_chn, out_feature_dim, out_feature_dim]
 
-    conv_weight_dt = DataType.UINT4
+    conv_weight_dt = DataType["UINT4"]
 
     conv_config = {}
     conv_config["dilations"] = [1, 1]
diff --git a/tests/fpgadataflow/test_convert_to_hls_layers_cnv.py b/tests/fpgadataflow/test_convert_to_hls_layers_cnv.py
index 37a1c8d8486a535c8ff87f4b06905b3059bba35a..3357ee6d6c1e540818549f2d0df8b8554690ca3c 100644
--- a/tests/fpgadataflow/test_convert_to_hls_layers_cnv.py
+++ b/tests/fpgadataflow/test_convert_to_hls_layers_cnv.py
@@ -70,6 +70,7 @@ def test_convert_to_hls_layers_cnv_w1a1(fused_activation):
     model = model.transform(LowerConvsToMatMul())
     model = model.transform(MakeMaxPoolNHWC())
     model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
+    model = model.transform(absorb.AbsorbConsecutiveTransposes())
     model = model.transform(ConvertBipolarMatMulToXnorPopcount())
     model = model.transform(Streamline())
     model = model.transform(InferDataLayouts())
diff --git a/tests/fpgadataflow/test_convert_to_hls_layers_synthetic.py b/tests/fpgadataflow/test_convert_to_hls_layers_synthetic.py
index b0780c073114351ba136fefe6973114bd1a8505b..6089901566cb412e63cd8acc7a8260081248ba52 100644
--- a/tests/fpgadataflow/test_convert_to_hls_layers_synthetic.py
+++ b/tests/fpgadataflow/test_convert_to_hls_layers_synthetic.py
@@ -138,7 +138,7 @@ def make_model(ch, ifmdim):
 
 
 # data types
-@pytest.mark.parametrize("idt", [DataType.UINT2])
+@pytest.mark.parametrize("idt", [DataType["UINT2"]])
 # channels
 @pytest.mark.parametrize("ch", [16])
 # ifmdim
diff --git a/tests/fpgadataflow/test_convert_to_hls_pool_batch.py b/tests/fpgadataflow/test_convert_to_hls_pool_batch.py
index 70716e88a4de827be37416b63a925b30d01c342a..3efafc040df07a7d56638bf5ce0b1ce01887343c 100644
--- a/tests/fpgadataflow/test_convert_to_hls_pool_batch.py
+++ b/tests/fpgadataflow/test_convert_to_hls_pool_batch.py
@@ -118,9 +118,9 @@ def prepare_inputs(input_tensor):
 
 
 # input datatype
-@pytest.mark.parametrize("idt", [DataType.UINT4, DataType.INT4, DataType.INT8])
+@pytest.mark.parametrize("idt", [DataType["UINT4"], DataType["INT4"], DataType["INT8"]])
 # output datatype
-@pytest.mark.parametrize("odt", [DataType.UINT4, DataType.INT4])
+@pytest.mark.parametrize("odt", [DataType["UINT4"], DataType["INT4"]])
 # pool configuration:                   ( k,stride, pad, ifm_dim )
 @pytest.mark.parametrize("pool_config", [(7, 7, 0, 7), (3, 2, 1, 5)])
 # input channels
diff --git a/tests/fpgadataflow/test_depthwise_convolution.py b/tests/fpgadataflow/test_depthwise_convolution.py
index 75ce055c0e9a093a5ddeab6b13af8d36d6152fb8..633db668d3bc5de815a313743c06cd74a7166c9c 100644
--- a/tests/fpgadataflow/test_depthwise_convolution.py
+++ b/tests/fpgadataflow/test_depthwise_convolution.py
@@ -60,14 +60,14 @@ def set_up_reference_model(act, idt, wdt, k, ifm_dim, ifm_ch, stride, padding):
     ofm_dim = compute_conv_output_dim(ifm_dim, k, stride, total_pad=total_pad)
 
     if act is None:
-        odt = DataType.INT32
+        odt = DataType["INT32"]
     else:
         odt = act
         out_act = oh.make_tensor_value_info(
             "out_act", TensorProto.FLOAT, [1, ofm_dim, ofm_dim, ofm_ch]
         )
         T = oh.make_tensor_value_info("T", TensorProto.FLOAT, [ofm_ch, 15])
-        tdt = DataType.INT32
+        tdt = DataType["INT32"]
         thresh_node = oh.make_node(
             "MultiThreshold",
             domain="finn.custom_op.general",
@@ -161,7 +161,7 @@ def set_up_reference_model(act, idt, wdt, k, ifm_dim, ifm_ch, stride, padding):
 # PE
 @pytest.mark.parametrize("pe", [1, 2, 4])
 # Output activation
-@pytest.mark.parametrize("act", [None, DataType.UINT4])
+@pytest.mark.parametrize("act", [None, DataType["UINT4"]])
 # kernel size
 @pytest.mark.parametrize("k", [2, 4])
 # stride
@@ -171,7 +171,7 @@ def set_up_reference_model(act, idt, wdt, k, ifm_dim, ifm_ch, stride, padding):
 @pytest.mark.slow
 @pytest.mark.vivado
 def test_depthwise_conv_hls_cppsim(act, pe, k, stride, padding):
-    idt = wdt = DataType.INT4
+    idt = wdt = DataType["INT4"]
     ifm_dim = 6
     ifm_ch = 4
 
@@ -203,7 +203,7 @@ def test_depthwise_conv_hls_cppsim(act, pe, k, stride, padding):
 # PE
 @pytest.mark.parametrize("pe", [1, 2, 4])
 # Output activation
-@pytest.mark.parametrize("act", [None, DataType.UINT4])
+@pytest.mark.parametrize("act", [None, DataType["UINT4"]])
 # kernel size
 @pytest.mark.parametrize("k", [2, 4])
 # stride
@@ -213,7 +213,7 @@ def test_depthwise_conv_hls_cppsim(act, pe, k, stride, padding):
 @pytest.mark.slow
 @pytest.mark.vivado
 def test_depthwise_conv_hls_rtlsim(act, pe, k, stride, padding):
-    idt = wdt = DataType.INT4
+    idt = wdt = DataType["INT4"]
     ifm_dim = 6
     ifm_ch = 4
 
diff --git a/tests/fpgadataflow/test_fpgadataflow_addstreams.py b/tests/fpgadataflow/test_fpgadataflow_addstreams.py
index 021d58b4a382f2fe3d1a2c3c2a4ce8d7f3c87ae5..8cbf54ec188b12c67e02a33e3540718e9b08f382 100644
--- a/tests/fpgadataflow/test_fpgadataflow_addstreams.py
+++ b/tests/fpgadataflow/test_fpgadataflow_addstreams.py
@@ -82,7 +82,7 @@ def prepare_inputs(input1, input2):
 
 
 # data types
-@pytest.mark.parametrize("idt", [DataType.UINT4, DataType.UINT8])
+@pytest.mark.parametrize("idt", [DataType["UINT4"], DataType["UINT8"]])
 # channels
 @pytest.mark.parametrize("ch", [1, 64])
 # folding
diff --git a/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py b/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py
index 15bcd5fa8a937aa313f2c73f253f934f6bbd332b..949046d4ae313b852471e7d8a93e44fea48f7b0f 100644
--- a/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py
+++ b/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py
@@ -85,11 +85,11 @@ def make_modelwrapper(C, pe, idt, odt, pdt, func, vecs):
 
 
 # activation: None or DataType
-@pytest.mark.parametrize("act", [DataType.INT8])
+@pytest.mark.parametrize("act", [DataType["INT8"]])
 # input datatype
-@pytest.mark.parametrize("idt", [DataType.INT4])
+@pytest.mark.parametrize("idt", [DataType["INT4"]])
 # param datatype
-@pytest.mark.parametrize("pdt", [DataType.INT4])
+@pytest.mark.parametrize("pdt", [DataType["INT4"]])
 # folding, -1 is maximum possible
 @pytest.mark.parametrize("nf", [-1, 2])
 # number of input features
diff --git a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py
index 86622cf6d44dbda3af417283f5ceea1d1ebc3bf0..47cd7e7ba1df76cc793cd0946581239a6883874e 100644
--- a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py
+++ b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py
@@ -131,7 +131,7 @@ def prepare_inputs(input_tensor):
 
 
 # input datatype
-@pytest.mark.parametrize("idt", [DataType.BIPOLAR, DataType.INT2])
+@pytest.mark.parametrize("idt", [DataType["BIPOLAR"], DataType["INT2"]])
 # kernel size
 @pytest.mark.parametrize("k", [2, 3])
 # input dimension
diff --git a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator1d.py b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator1d.py
index b3d695469b7a4fa1f4235feee29e7fc3dece0df5..8440ac1fe46a0d1ea4db3d76489dfc4ce68ff642 100644
--- a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator1d.py
+++ b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator1d.py
@@ -144,8 +144,8 @@ def prepare_inputs(input_tensor):
 
 
 # input datatype
-# @pytest.mark.parametrize("idt", [DataType.BIPOLAR, DataType.INT8])
-@pytest.mark.parametrize("idt", [DataType.INT8])
+# @pytest.mark.parametrize("idt", [DataType["BIPOLAR"], DataType["INT8"]])
+@pytest.mark.parametrize("idt", [DataType["INT8"]])
 # kernel size
 @pytest.mark.parametrize("k", [[4, 1]])
 # input dimension
diff --git a/tests/fpgadataflow/test_fpgadataflow_duplicatestreams.py b/tests/fpgadataflow/test_fpgadataflow_duplicatestreams.py
index 6b776e8827d8e76102bd069ae8567051ed0580ba..73bf1165afa9418be0c89f77797de538275fd220 100644
--- a/tests/fpgadataflow/test_fpgadataflow_duplicatestreams.py
+++ b/tests/fpgadataflow/test_fpgadataflow_duplicatestreams.py
@@ -85,7 +85,7 @@ def prepare_inputs(input_tensor, idt):
 
 
 # data type
-@pytest.mark.parametrize("idt", [DataType.INT4, DataType.UINT16])
+@pytest.mark.parametrize("idt", [DataType["INT4"], DataType["UINT16"]])
 # channels
 @pytest.mark.parametrize("ch", [64])
 # folding
diff --git a/tests/fpgadataflow/test_fpgadataflow_dwc.py b/tests/fpgadataflow/test_fpgadataflow_dwc.py
index b0af4382383d8935c69e362b1a43db536979c784..248b591eb48d7cfd6f121738a9bca525c38a45f8 100644
--- a/tests/fpgadataflow/test_fpgadataflow_dwc.py
+++ b/tests/fpgadataflow/test_fpgadataflow_dwc.py
@@ -82,7 +82,7 @@ def prepare_inputs(input_tensor, dt):
 # outWidth
 @pytest.mark.parametrize("OUTWidth", [2, 4])
 # finn_dtype
-@pytest.mark.parametrize("finn_dtype", [DataType.BIPOLAR, DataType.INT2])
+@pytest.mark.parametrize("finn_dtype", [DataType["BIPOLAR"], DataType["INT2"]])
 @pytest.mark.slow
 @pytest.mark.vivado
 def test_fpgadataflow_dwc_rtlsim(Shape, INWidth, OUTWidth, finn_dtype):
diff --git a/tests/fpgadataflow/test_fpgadataflow_fclayer.py b/tests/fpgadataflow/test_fpgadataflow_fclayer.py
index 49c326d2a34e7262826505ae32f2509b42ae0a35..02c3a3dc9506152fe999873df0612e76a5c9cefd 100644
--- a/tests/fpgadataflow/test_fpgadataflow_fclayer.py
+++ b/tests/fpgadataflow/test_fpgadataflow_fclayer.py
@@ -59,11 +59,11 @@ def make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T=None, tdt=Non
     # StreamingFC:
     # - specify their datatypes as such
     # - specify their datatypes as BINARY as use binaryXnorMode
-    if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
+    if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]:
         # we'll internally convert weights/inputs to binary and specify the
         # datatypes as such, and also set the binaryXnorMode attribute to 1
-        export_wdt = DataType.BINARY
-        export_idt = DataType.BINARY
+        export_wdt = DataType["BINARY"]
+        export_idt = DataType["BINARY"]
         binary_xnor_mode = 1
     else:
         export_wdt = wdt
@@ -75,7 +75,7 @@ def make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T=None, tdt=Non
     if T is not None:
         no_act = 0
         node_inp_list = ["inp", "weights", "thresh"]
-        if odt == DataType.BIPOLAR:
+        if odt == DataType["BIPOLAR"]:
             actval = 0
         else:
             actval = odt.min()
@@ -123,7 +123,7 @@ def make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T=None, tdt=Non
 
 
 def prepare_inputs(input_tensor, idt, wdt):
-    if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
+    if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]:
         # convert bipolar to binary
         return {"inp": (input_tensor + 1) / 2}
     else:
@@ -133,11 +133,11 @@ def prepare_inputs(input_tensor, idt, wdt):
 # mem_mode: const or decoupled
 @pytest.mark.parametrize("mem_mode", ["const", "decoupled", "external"])
 # activation: None or DataType
-@pytest.mark.parametrize("act", [None, DataType.BIPOLAR, DataType.INT4])
+@pytest.mark.parametrize("act", [None, DataType["BIPOLAR"], DataType["INT4"]])
 # weight datatype
-@pytest.mark.parametrize("wdt", [DataType.BIPOLAR, DataType.INT4])
+@pytest.mark.parametrize("wdt", [DataType["BIPOLAR"], DataType["INT4"]])
 # input datatype
-@pytest.mark.parametrize("idt", [DataType.BIPOLAR, DataType.INT4])
+@pytest.mark.parametrize("idt", [DataType["BIPOLAR"], DataType["INT4"]])
 # neuron folding, -1 is maximum possible
 @pytest.mark.parametrize("nf", [-1, 2, 1])
 # synapse folding, -1 is maximum possible
@@ -165,10 +165,10 @@ def test_fpgadataflow_fclayer_cppsim(mem_mode, idt, wdt, act, nf, sf, mw, mh):
         # no activation, produce accumulators
         T = None
         tdt = None
-        if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
-            odt = DataType.UINT32
+        if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]:
+            odt = DataType["UINT32"]
         else:
-            odt = DataType.INT32
+            odt = DataType["INT32"]
     else:
         odt = act
         (min, max) = calculate_signed_dot_prod_range(idt, wdt, mw)
@@ -177,13 +177,13 @@ def test_fpgadataflow_fclayer_cppsim(mem_mode, idt, wdt, act, nf, sf, mw, mh):
         # provide non-decreasing thresholds
         T = np.sort(T, axis=1)
         # generate thresholds for activation
-        if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
-            tdt = DataType.UINT32
+        if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]:
+            tdt = DataType["UINT32"]
             # bias thresholds to be positive
             T = np.ceil((T + mw) / 2)
             assert (T >= 0).all()
         else:
-            tdt = DataType.INT32
+            tdt = DataType["INT32"]
     model = make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T, tdt)
     for node in model.graph.node:
         # lookup op_type in registry of CustomOps
@@ -194,14 +194,14 @@ def test_fpgadataflow_fclayer_cppsim(mem_mode, idt, wdt, act, nf, sf, mw, mh):
     model = model.transform(CompileCppSim())
     # prepare input data
     input_dict = prepare_inputs(x, idt, wdt)
-    if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
+    if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]:
         # convert inputs to binary and use xnorpopcountmatmul
         y = xp.xnorpopcountmatmul((x + 1) / 2, (W + 1) / 2)
     else:
         y = np.matmul(x, W)
     if T is not None:
         y = multithreshold(y, T)
-        if act == DataType.BIPOLAR:
+        if act == DataType["BIPOLAR"]:
             # binary to bipolar
             y = 2 * y - 1
         else:
@@ -220,11 +220,11 @@ def test_fpgadataflow_fclayer_cppsim(mem_mode, idt, wdt, act, nf, sf, mw, mh):
 # mem_mode: const or decoupled
 @pytest.mark.parametrize("mem_mode", ["const", "decoupled", "external"])
 # activation: None or DataType
-@pytest.mark.parametrize("act", [None, DataType.BIPOLAR, DataType.INT4])
+@pytest.mark.parametrize("act", [None, DataType["BIPOLAR"], DataType["INT4"]])
 # weight datatype
-@pytest.mark.parametrize("wdt", [DataType.BIPOLAR, DataType.INT4])
+@pytest.mark.parametrize("wdt", [DataType["BIPOLAR"], DataType["INT4"]])
 # input datatype
-@pytest.mark.parametrize("idt", [DataType.BIPOLAR, DataType.INT4])
+@pytest.mark.parametrize("idt", [DataType["BIPOLAR"], DataType["INT4"]])
 # neuron folding, -1 is maximum possible
 @pytest.mark.parametrize("nf", [-1, 2, 1])
 # synapse folding, -1 is maximum possible
@@ -252,10 +252,10 @@ def test_fpgadataflow_fclayer_rtlsim(mem_mode, idt, wdt, act, nf, sf, mw, mh):
         # no activation, produce accumulators
         T = None
         tdt = None
-        if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
-            odt = DataType.UINT32
+        if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]:
+            odt = DataType["UINT32"]
         else:
-            odt = DataType.INT32
+            odt = DataType["INT32"]
     else:
         odt = act
         (min, max) = calculate_signed_dot_prod_range(idt, wdt, mw)
@@ -264,13 +264,13 @@ def test_fpgadataflow_fclayer_rtlsim(mem_mode, idt, wdt, act, nf, sf, mw, mh):
         # provide non-decreasing thresholds
         T = np.sort(T, axis=1)
         # generate thresholds for activation
-        if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
-            tdt = DataType.UINT32
+        if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]:
+            tdt = DataType["UINT32"]
             # bias thresholds to be positive
             T = np.ceil((T + mw) / 2)
             assert (T >= 0).all()
         else:
-            tdt = DataType.INT32
+            tdt = DataType["INT32"]
     model = make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T, tdt)
     for node in model.graph.node:
         # lookup op_type in registry of CustomOps
@@ -279,14 +279,14 @@ def test_fpgadataflow_fclayer_rtlsim(mem_mode, idt, wdt, act, nf, sf, mw, mh):
 
     # prepare input data
     input_dict = prepare_inputs(x, idt, wdt)
-    if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
+    if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]:
         # convert inputs to binary and use xnorpopcountmatmul
         y = xp.xnorpopcountmatmul((x + 1) / 2, (W + 1) / 2)
     else:
         y = np.matmul(x, W)
     if T is not None:
         y = multithreshold(y, T)
-        if act == DataType.BIPOLAR:
+        if act == DataType["BIPOLAR"]:
             # binary to bipolar
             y = 2 * y - 1
         else:
@@ -319,11 +319,11 @@ def test_fpgadataflow_fclayer_rtlsim(mem_mode, idt, wdt, act, nf, sf, mw, mh):
 # mem_mode: const or decoupled
 @pytest.mark.parametrize("mem_mode", ["decoupled"])
 # activation: None or DataType
-@pytest.mark.parametrize("act", [DataType.INT4])
+@pytest.mark.parametrize("act", [DataType["INT4"]])
 # weight datatype
-@pytest.mark.parametrize("wdt", [DataType.INT4])
+@pytest.mark.parametrize("wdt", [DataType["INT4"]])
 # input datatype
-@pytest.mark.parametrize("idt", [DataType.INT4])
+@pytest.mark.parametrize("idt", [DataType["INT4"]])
 # neuron folding, -1 is maximum possible
 @pytest.mark.parametrize("nf", [-1])
 # synapse folding, -1 is maximum possible
@@ -352,10 +352,10 @@ def test_fpgadataflow_fclayer_large_depth_decoupled_mode_rtlsim(
         # no activation, produce accumulators
         T = None
         tdt = None
-        if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
-            odt = DataType.UINT32
+        if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]:
+            odt = DataType["UINT32"]
         else:
-            odt = DataType.INT32
+            odt = DataType["INT32"]
     else:
         odt = act
         (min, max) = calculate_signed_dot_prod_range(idt, wdt, mw)
@@ -364,13 +364,13 @@ def test_fpgadataflow_fclayer_large_depth_decoupled_mode_rtlsim(
         # provide non-decreasing thresholds
         T = np.sort(T, axis=1)
         # generate thresholds for activation
-        if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
-            tdt = DataType.UINT32
+        if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]:
+            tdt = DataType["UINT32"]
             # bias thresholds to be positive
             T = np.ceil((T + mw) / 2)
             assert (T >= 0).all()
         else:
-            tdt = DataType.INT32
+            tdt = DataType["INT32"]
     model = make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T, tdt)
     for node in model.graph.node:
         # lookup op_type in registry of CustomOps
@@ -379,14 +379,14 @@ def test_fpgadataflow_fclayer_large_depth_decoupled_mode_rtlsim(
 
     # prepare input data
     input_dict = prepare_inputs(x, idt, wdt)
-    if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
+    if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]:
         # convert inputs to binary and use xnorpopcountmatmul
         y = xp.xnorpopcountmatmul((x + 1) / 2, (W + 1) / 2)
     else:
         y = np.matmul(x, W)
     if T is not None:
         y = multithreshold(y, T)
-        if act == DataType.BIPOLAR:
+        if act == DataType["BIPOLAR"]:
             # binary to bipolar
             y = 2 * y - 1
         else:
diff --git a/tests/fpgadataflow/test_fpgadataflow_fifo.py b/tests/fpgadataflow/test_fpgadataflow_fifo.py
index 81f66c42ca76d42fe8ee50576d72007f6ca6c12f..4d3074fe14617df4386f060b6a476734931fb4ca 100644
--- a/tests/fpgadataflow/test_fpgadataflow_fifo.py
+++ b/tests/fpgadataflow/test_fpgadataflow_fifo.py
@@ -86,7 +86,7 @@ def prepare_inputs(input_tensor, dt):
 # outWidth
 @pytest.mark.parametrize("depth", [16])
 # finn_dtype
-@pytest.mark.parametrize("finn_dtype", [DataType.BIPOLAR])  # , DataType.INT2])
+@pytest.mark.parametrize("finn_dtype", [DataType["BIPOLAR"]])  # , DataType["INT2"]])
 @pytest.mark.slow
 @pytest.mark.vivado
 def test_fpgadataflow_fifo_rtlsim(Shape, folded_shape, depth, finn_dtype):
diff --git a/tests/fpgadataflow/test_fpgadataflow_fmpadding.py b/tests/fpgadataflow/test_fpgadataflow_fmpadding.py
index 5db12ee22828e43e276ed85f04f985653fe0a2dd..b564273c0927938859dc438dce619e7067a7ad74 100644
--- a/tests/fpgadataflow/test_fpgadataflow_fmpadding.py
+++ b/tests/fpgadataflow/test_fpgadataflow_fmpadding.py
@@ -108,7 +108,7 @@ def make_single_fmpadding_modelwrapper(idim, padding, num_ch, simd, idt, pad_sty
 # PaddingStyle: selects behavior when (odim-idim)%2 != 0
 @pytest.mark.parametrize("pad_style", [2])
 # FINN input datatype
-@pytest.mark.parametrize("idt", [DataType.INT2, DataType.INT4])
+@pytest.mark.parametrize("idt", [DataType["INT2"], DataType["INT4"]])
 # execution mode
 @pytest.mark.parametrize("mode", ["cppsim", "rtlsim"])
 @pytest.mark.slow
diff --git a/tests/fpgadataflow/test_fpgadataflow_globalaccpool.py b/tests/fpgadataflow/test_fpgadataflow_globalaccpool.py
index f1373123a69f4c3d02b191c0f0560b59d2c9a7b2..2299cc6e8f397df718d2fd65be8a562c2457e42d 100644
--- a/tests/fpgadataflow/test_fpgadataflow_globalaccpool.py
+++ b/tests/fpgadataflow/test_fpgadataflow_globalaccpool.py
@@ -78,7 +78,7 @@ def prepare_inputs(input_tensor, idt):
 
 
 # data type
-@pytest.mark.parametrize("idt", [DataType.UINT4, DataType.UINT16])
+@pytest.mark.parametrize("idt", [DataType["UINT4"], DataType["UINT16"]])
 # channels
 @pytest.mark.parametrize("ch", [64])
 # folding
@@ -127,7 +127,7 @@ def test_fpgadataflow_globalaccpool(idt, ch, fold, imdim, exec_mode):
         exp_cycles_dict = model.analysis(exp_cycles_per_layer)
         exp_cycles = exp_cycles_dict[node.name]
         # commented out, needs performance debug:
-        # test_fpgadataflow_globalaccpool[rtlsim-7-1-64-DataType.UINT4]
+        # test_fpgadataflow_globalaccpool[rtlsim-7-1-64-DataType["UINT4"]]
         # assert False where False =
         # <function isclose at 0x7eff26d5ca60>(50, 103, atol=(0.1 * 103))
         # assert np.isclose(exp_cycles, cycles_rtlsim, atol=0.1 * cycles_rtlsim)
diff --git a/tests/fpgadataflow/test_fpgadataflow_ipstitch.py b/tests/fpgadataflow/test_fpgadataflow_ipstitch.py
index 9a6050a55dd86ca5064b293f87304cbb1365edea..a4e75f5254b3bfd96871dbf32b8400edc2d55379 100644
--- a/tests/fpgadataflow/test_fpgadataflow_ipstitch.py
+++ b/tests/fpgadataflow/test_fpgadataflow_ipstitch.py
@@ -44,7 +44,6 @@ from finn.transformation.fpgadataflow.floorplan import Floorplan
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.insert_iodma import InsertIODMA
 from finn.transformation.fpgadataflow.insert_tlastmarker import InsertTLastMarker
-from finn.transformation.fpgadataflow.make_deployment import DeployToPYNQ
 from finn.transformation.fpgadataflow.make_zynq_proj import ZynqBuild
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.synth_ooc import SynthOutOfContext
@@ -69,9 +68,9 @@ ip_stitch_model_dir = os.environ["FINN_BUILD_DIR"]
 def create_one_fc_model(mem_mode="const"):
     # create a model with a StreamingFCLayer instance with no activation
     # the wider range of the full accumulator makes debugging a bit easier
-    wdt = DataType.INT2
-    idt = DataType.INT32
-    odt = DataType.INT32
+    wdt = DataType["INT2"]
+    idt = DataType["INT32"]
+    odt = DataType["INT32"]
     m = 4
     no_act = 1
     binary_xnor_mode = 0
@@ -122,9 +121,9 @@ def create_one_fc_model(mem_mode="const"):
 
 def create_two_fc_model(mem_mode="decoupled"):
     # create a model with two StreamingFCLayer instances
-    wdt = DataType.INT2
-    idt = DataType.INT32
-    odt = DataType.INT32
+    wdt = DataType["INT2"]
+    idt = DataType["INT32"]
+    odt = DataType["INT32"]
     m = 4
     actval = 0
     no_act = 1
@@ -363,11 +362,6 @@ def test_fpgadataflow_ipstitch_zynqbuild(board):
         assert sdp_node.__class__.__name__ == "StreamingDataflowPartition"
         assert os.path.isfile(sdp_node.get_nodeattr("model"))
         model = load_test_checkpoint_or_skip(sdp_node.get_nodeattr("model"))
-    # generate inputs for remote exec
-    iname = "inp"
-    idt = model.get_tensor_datatype(iname)
-    ishape = model.get_tensor_shape(iname)
-    x = gen_finn_dt_tensor(idt, ishape)
     # bitfile using ZynqBuild
     model = model.transform(ZynqBuild(board, 10))
     model.save(ip_stitch_model_dir + "/test_fpgadataflow_ipstitch_customzynq.onnx")
@@ -375,22 +369,3 @@ def test_fpgadataflow_ipstitch_zynqbuild(board):
     bitfile_name = model.get_metadata_prop("bitfile")
     assert bitfile_name is not None
     assert os.path.isfile(bitfile_name)
-    # deployment
-    try:
-        ip = os.environ["PYNQ_IP"]  # no default for this one; skip if not defined
-        if ip == "":
-            pytest.skip("PYNQ board IP address not specified")
-        username = os.getenv("PYNQ_USERNAME", "xilinx")
-        password = os.getenv("PYNQ_PASSWORD", "xilinx")
-        port = os.getenv("PYNQ_PORT", 22)
-        target_dir = os.getenv("PYNQ_TARGET_DIR", "/home/xilinx/finn")
-        model = model.transform(DeployToPYNQ(ip, port, username, password, target_dir))
-        deployment_dir = model.get_metadata_prop("pynq_deploy_dir")
-        assert deployment_dir is not None
-        assert os.path.isdir(deployment_dir)
-        # remote exec
-        input_dict = {"global_in": x}
-        outp = execute_onnx(model, input_dict)
-        assert np.isclose(outp["global_out"], x).all()
-    except KeyError:
-        pytest.skip("PYNQ board IP address not specified")
diff --git a/tests/fpgadataflow/test_fpgadataflow_labelselect.py b/tests/fpgadataflow/test_fpgadataflow_labelselect.py
index 8997208a648fa79439a882de23865496ba527858..8ed06c8bdf1c0dbfab2f8141bf724132f4a24705 100644
--- a/tests/fpgadataflow/test_fpgadataflow_labelselect.py
+++ b/tests/fpgadataflow/test_fpgadataflow_labelselect.py
@@ -81,7 +81,9 @@ def prepare_inputs(input_tensor, idt):
     return {"inp": input_tensor}
 
 
-@pytest.mark.parametrize("idt", [DataType.UINT8, DataType.UINT16, DataType.INT16])
+@pytest.mark.parametrize(
+    "idt", [DataType["UINT8"], DataType["UINT16"], DataType["INT16"]]
+)
 # labels
 @pytest.mark.parametrize("labels", [10, 100])
 # folding
diff --git a/tests/fpgadataflow/test_fpgadataflow_lookup.py b/tests/fpgadataflow/test_fpgadataflow_lookup.py
new file mode 100644
index 0000000000000000000000000000000000000000..45678bbdf22c21d794777aba27d9070b42238267
--- /dev/null
+++ b/tests/fpgadataflow/test_fpgadataflow_lookup.py
@@ -0,0 +1,132 @@
+# Copyright (c) 2021, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import pytest
+
+import numpy as np
+import torch
+from brevitas.export import FINNManager
+from torch import nn
+
+from finn.core.datatype import DataType
+from finn.core.modelwrapper import ModelWrapper
+from finn.core.onnx_exec import execute_onnx
+from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
+from finn.transformation.fpgadataflow.convert_to_hls_layers import InferLookupLayer
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
+from finn.transformation.general import GiveUniqueNodeNames
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
+from finn.util.basic import gen_finn_dt_tensor
+
+
+def make_lookup_model(embeddings, ishape, idt, edt):
+    num_embeddings, embedding_dim = embeddings.shape
+
+    class LookupModel(nn.Module):
+        def __init__(self, num_embeddings, embedding_dim):
+            super().__init__()
+            self.lookup = nn.Embedding(
+                num_embeddings=num_embeddings, embedding_dim=embedding_dim
+            )
+
+        def forward(self, x):
+            x = self.lookup(x)
+            return x
+
+    torch_model = LookupModel(num_embeddings, embedding_dim)
+    input_t = torch.zeros(ishape, dtype=torch.int64)
+    ret = FINNManager.export(torch_model, input_t=input_t, opset_version=11)
+    model = ModelWrapper(ret)
+    iname = model.graph.input[0].name
+    ename = model.graph.node[0].input[0]
+    model.set_tensor_datatype(iname, idt)
+    eshape = model.get_tensor_shape(ename)
+    assert tuple(eshape) == embeddings.shape
+    model.set_initializer(ename, embeddings)
+    model.set_tensor_datatype(ename, edt)
+    model = model.transform(InferShapes())
+    model = model.transform(InferDataTypes())
+    return model
+
+
+# embedding DataType
+@pytest.mark.parametrize("edt", [DataType["FIXED<8,2>"]])
+# other embedding config
+@pytest.mark.parametrize(
+    "embedding_cfg", [(130, DataType["UINT8"], 25), (5145, DataType["UINT16"], 20)]
+)
+# execution mode
+@pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"])
+@pytest.mark.vivado
+@pytest.mark.slow
+def test_fpgadataflow_lookup(edt, embedding_cfg, exec_mode):
+    ishape = (1, 10)
+    num_embeddings, idt, embedding_dim = embedding_cfg
+    eshape = (num_embeddings, embedding_dim)
+    exp_oshape = tuple(list(ishape) + [embedding_dim])
+    embeddings = gen_finn_dt_tensor(edt, eshape)
+    model = make_lookup_model(embeddings, ishape, idt, edt)
+    assert len(model.graph.node) == 1
+    assert model.graph.node[0].op_type == "Gather"
+    iname = model.graph.input[0].name
+    ename = model.graph.node[0].input[0]
+    oname = model.graph.output[0].name
+    assert model.get_tensor_datatype(iname) == idt
+    assert model.get_tensor_datatype(ename) == edt
+    assert model.get_tensor_datatype(oname) == edt
+    assert tuple(model.get_tensor_shape(ename)) == eshape
+    assert tuple(model.get_tensor_shape(oname)) == exp_oshape
+    assert (model.get_initializer(ename) == embeddings).all()
+    itensor = gen_finn_dt_tensor(idt, ishape).astype(np.int64)
+    itensor = np.clip(itensor, 0, num_embeddings - 1)
+    ret = execute_onnx(model, {iname: itensor})
+    exp_out = np.take(embeddings, itensor, axis=0)
+    assert (exp_out == ret[oname]).all()
+    # call transformation to convert to HLS and verify conversion
+    model = model.transform(InferLookupLayer())
+    assert model.graph.node[0].op_type == "Lookup"
+    assert model.graph.node[0].input[0] == iname
+    assert model.graph.node[0].input[1] == ename
+    assert model.graph.node[0].output[0] == oname
+    if exec_mode == "cppsim":
+        model = model.transform(PrepareCppSim())
+        model = model.transform(CompileCppSim())
+        model = model.transform(SetExecMode("cppsim"))
+    elif exec_mode == "rtlsim":
+        model = model.transform(GiveUniqueNodeNames())
+        model = model.transform(PrepareIP("xc7z020clg400-1", 10))
+        model = model.transform(HLSSynthIP())
+        model = model.transform(SetExecMode("rtlsim"))
+        model = model.transform(PrepareRTLSim())
+    ret_sim = execute_onnx(model, {iname: itensor})
+    assert (exp_out == ret_sim[oname]).all()
diff --git a/tests/fpgadataflow/test_fpgadataflow_res_estimate.py b/tests/fpgadataflow/test_fpgadataflow_res_estimate.py
index 9def746c1c872a8b99b5bab48e8d0bd20798cedd..fe52a73fc07df8551442e975c5eb378c132a56d7 100644
--- a/tests/fpgadataflow/test_fpgadataflow_res_estimate.py
+++ b/tests/fpgadataflow/test_fpgadataflow_res_estimate.py
@@ -54,9 +54,9 @@ def test_res_estimate():
     mw = mh = 4
     simd = 1
     pe = 1
-    idt = DataType.INT2
-    wdt = DataType.INT2
-    odt = DataType.INT2
+    idt = DataType["INT2"]
+    wdt = DataType["INT2"]
+    odt = DataType["INT2"]
     actval = odt.min()
 
     inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, mw])
diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding.py b/tests/fpgadataflow/test_fpgadataflow_thresholding.py
index b87241de56870cad70d08583b24292e0da91109e..341bd3f37041c9b5a1526e99b2c4bad4d3dd3029 100644
--- a/tests/fpgadataflow/test_fpgadataflow_thresholding.py
+++ b/tests/fpgadataflow/test_fpgadataflow_thresholding.py
@@ -43,6 +43,7 @@ from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
 from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
@@ -97,9 +98,9 @@ def make_single_thresholding_modelwrapper(T, pe, idt, odt, actval, mem_mode):
 
 
 # activation: None or DataType
-@pytest.mark.parametrize("act", [DataType.INT4, DataType.BIPOLAR])
+@pytest.mark.parametrize("act", [DataType["INT4"], DataType["BIPOLAR"]])
 # input datatype
-@pytest.mark.parametrize("idt", [DataType.INT16, DataType.UINT16])
+@pytest.mark.parametrize("idt", [DataType["INT16"], DataType["UINT16"]])
 # folding, -1 is maximum possible
 @pytest.mark.parametrize("nf", [-1, 2, 1])
 # number of input features
@@ -124,12 +125,12 @@ def test_fpgadataflow_thresholding(idt, act, nf, ich, exec_mode, mem_mode):
     T = np.random.randint(idt.min(), idt.max() + 1, (ich, n_steps)).astype(np.float32)
     # make the vivado_hls threshold bug appear (incorrect rtlsim result when first
     # threshold of first channel is zero, while using BIPOLAR output)
-    if act == DataType.BIPOLAR:
+    if act == DataType["BIPOLAR"]:
         T[0][0] = 0
     # provide non-decreasing thresholds
     T = np.sort(T, axis=1)
 
-    if odt == DataType.BIPOLAR:
+    if odt == DataType["BIPOLAR"]:
         actval = 0
     else:
         actval = odt.min()
@@ -153,7 +154,7 @@ def test_fpgadataflow_thresholding(idt, act, nf, ich, exec_mode, mem_mode):
     input_dict = {"inp": x}
 
     y = multithreshold(x, T)
-    if act == DataType.BIPOLAR:
+    if act == DataType["BIPOLAR"]:
         # binary to bipolar
         y = 2 * y - 1
     else:
@@ -185,8 +186,8 @@ def test_fpgadataflow_thresholding(idt, act, nf, ich, exec_mode, mem_mode):
 @pytest.mark.vivado
 def test_runtime_thresholds_single_layer():
     mem_mode = "decoupled"
-    act = DataType.INT4
-    idt = DataType.INT16
+    act = DataType["INT4"]
+    idt = DataType["INT16"]
     nf = 8
     ich = 16
     pe = ich // nf
@@ -201,7 +202,7 @@ def test_runtime_thresholds_single_layer():
     # provide non-decreasing thresholds
     T = np.sort(T, axis=1)
 
-    if odt == DataType.BIPOLAR:
+    if odt == DataType["BIPOLAR"]:
         actval = 0
     else:
         actval = odt.min()
@@ -216,6 +217,7 @@ def test_runtime_thresholds_single_layer():
     old_weight_stream = map(lambda x: int(x, 16), old_weight_stream.split("\n"))
     old_weight_stream = list(old_weight_stream)
     # need to create stitched IP for runtime weight testing
+    model = model.transform(InsertFIFO(True))
     model = model.transform(GiveUniqueNodeNames())
     model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
     model = model.transform(HLSSynthIP())
@@ -243,7 +245,7 @@ def test_runtime_thresholds_single_layer():
     # old weights (see above)
     y = exec_ctx["outp"][1]
     expected = multithreshold(in_tensor, T)[1]
-    if act == DataType.BIPOLAR:
+    if act == DataType["BIPOLAR"]:
         # binary to bipolar
         expected = 2 * expected - 1
     else:
@@ -272,7 +274,7 @@ def test_runtime_thresholds_single_layer():
     rtlsim_exec(model, exec_ctx, pre_hook=write_weights)
     y = exec_ctx["outp"][1]
     expected = multithreshold(in_tensor, new_weights)[1]
-    if act == DataType.BIPOLAR:
+    if act == DataType["BIPOLAR"]:
         # binary to bipolar
         expected = 2 * expected - 1
     else:
diff --git a/tests/fpgadataflow/test_fpgadataflow_upsampler.py b/tests/fpgadataflow/test_fpgadataflow_upsampler.py
new file mode 100644
index 0000000000000000000000000000000000000000..1709cfe32904a5ed369f8399150a8a1d05f4b781
--- /dev/null
+++ b/tests/fpgadataflow/test_fpgadataflow_upsampler.py
@@ -0,0 +1,201 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import pytest
+
+import numpy as np
+import os
+import torch
+from brevitas.export import FINNManager
+from torch import nn
+
+import finn.core.onnx_exec as oxe
+import finn.transformation.streamline.absorb as absorb
+from finn.core.datatype import DataType
+from finn.core.modelwrapper import ModelWrapper
+from finn.transformation.base import Transformation
+from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
+from finn.transformation.fpgadataflow.convert_to_hls_layers import InferUpsample
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
+from finn.transformation.general import GiveUniqueNodeNames
+from finn.transformation.infer_data_layouts import InferDataLayouts
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.make_input_chanlast import MakeInputChannelsLast
+
+tmpdir = os.environ["FINN_BUILD_DIR"]
+
+
+class ForceDataTypeForTensors(Transformation):
+    """
+    Forces a certain datatype for all tensors in a model.
+    """
+
+    def __init__(self, dType=DataType["INT8"]):
+        super().__init__()
+        self._dType = dType
+
+    def apply(self, model):
+        graph = model.graph
+        for n in graph.node:
+            for inp in n.input:
+                model.set_tensor_datatype(inp, self._dType)
+            for inp in n.output:
+                model.set_tensor_datatype(inp, self._dType)
+
+        return model, False
+
+
+_to_chan_last_args = (0, 2, 3, 1)
+_to_chan_first_args = (0, 3, 1, 2)
+
+
+class TransposeUpsampleIO(Transformation):
+    """
+    Converts the inputs outputs for all Upsample and Resize nodes
+    from NCHW to NHWC.
+    """
+
+    def apply(self, model):
+        graph = model.graph
+        for n in graph.node:
+            if n.op_type == "Upsample" or n.op_type == "Resize":
+                # Set input shape
+                inp = n.input[0]
+                NCHW_shape = model.get_tensor_shape(inp)
+                NHWC_shape = [NCHW_shape[idx] for idx in _to_chan_last_args]
+                model.set_tensor_shape(inp, NHWC_shape)
+                # Set output shape
+                out = n.output[0]
+                NCHW_shape = model.get_tensor_shape(out)
+                NHWC_shape = [NCHW_shape[idx] for idx in _to_chan_last_args]
+                model.set_tensor_shape(out, NHWC_shape)
+        return model, False
+
+
+class PyTorchTestModel(nn.Module):
+    def __init__(self, upscale_factor=2):
+        super(PyTorchTestModel, self).__init__()
+        self.m = nn.Upsample(
+            scale_factor=upscale_factor,
+            mode="nearest",
+        )
+
+    def forward(self, x):
+        x = self.m(x)
+        return x
+
+
+# param datatype
+@pytest.mark.parametrize("dt", [DataType["INT8"]])
+# Width/height of square input feature map
+@pytest.mark.parametrize("IFMDim", [3, 5])
+# upscaling factor
+@pytest.mark.parametrize("scale", [2, 3])
+# Number of input/output channels
+@pytest.mark.parametrize("NumChannels", [4])
+# execution mode
+@pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"])
+@pytest.mark.vivado
+@pytest.mark.slow
+def test_fpgadataflow_upsampler(dt, IFMDim, scale, NumChannels, exec_mode):
+    atol = 1e-3
+    # Create the test model and inputs for it
+    torch_model = PyTorchTestModel(upscale_factor=scale)
+    input_shape = (1, NumChannels, IFMDim, IFMDim)
+    test_in = torch.arange(0, np.prod(np.asarray(input_shape)))
+    # Limit the input to values valid for the given datatype
+    test_in %= dt.max() - dt.min() + 1
+    test_in += dt.min()
+    # Additionally make sure we always start with 0, for convenience purposes.
+    test_in = torch.roll(test_in, dt.min())
+    test_in = test_in.view(*input_shape).type(torch.float32)
+
+    # Get golden PyTorch and ONNX inputs
+    golden_torch_float = torch_model(test_in)
+    export_path = f"{tmpdir}/Upsample_exported.onnx"
+    FINNManager.export(
+        torch_model, input_shape=input_shape, export_path=export_path, opset_version=11
+    )
+    model = ModelWrapper(export_path)
+    input_dict = {model.graph.input[0].name: test_in.numpy().astype(np.int32)}
+    input_dict = {model.graph.input[0].name: test_in.numpy()}
+    golden_output_dict = oxe.execute_onnx(model, input_dict, True)
+    golden_result = golden_output_dict[model.graph.output[0].name]
+
+    # Make sure PyTorch and ONNX match
+    pyTorch_onnx_match = np.isclose(golden_result, golden_torch_float).all()
+    assert pyTorch_onnx_match, "ONNX and PyTorch upsampling output don't match."
+
+    # Prep model for execution
+    model = ModelWrapper(export_path)
+    # model = model.transform(TransposeUpsampleIO())
+    model = model.transform(MakeInputChannelsLast())
+    model = model.transform(InferDataLayouts())
+    model = model.transform(absorb.AbsorbTransposeIntoResize())
+    model = model.transform(InferShapes())
+    model = model.transform(ForceDataTypeForTensors(dType=dt))
+    model = model.transform(GiveUniqueNodeNames())
+    model = model.transform(InferUpsample())
+    model = model.transform(InferShapes())
+    model = model.transform(InferDataTypes())
+
+    # Check that all nodes are UpsampleNearestNeighbour_Batch nodes
+    for n in model.get_finn_nodes():
+        node_check = n.op_type == "UpsampleNearestNeighbour_Batch"
+        assert node_check, "All nodes should be UpsampleNearestNeighbour_Batch nodes."
+
+    # Prep sim
+    if exec_mode == "cppsim":
+        model = model.transform(PrepareCppSim())
+        model = model.transform(CompileCppSim())
+        model = model.transform(SetExecMode("cppsim"))
+    elif exec_mode == "rtlsim":
+        model = model.transform(GiveUniqueNodeNames())
+        model = model.transform(PrepareIP("xc7z020clg400-1", 10))
+        model = model.transform(HLSSynthIP())
+        model = model.transform(SetExecMode("rtlsim"))
+        model = model.transform(PrepareRTLSim())
+    else:
+        raise Exception("Unknown exec_mode")
+
+    # Run sim
+    test_in_transposed = test_in.numpy().transpose(_to_chan_last_args)
+    input_dict = {model.graph.input[0].name: test_in_transposed}
+    output_dict = oxe.execute_onnx(model, input_dict, True)
+    test_result = output_dict[model.graph.output[0].name]
+    output_matches = np.isclose(golden_result, test_result, atol=atol).all()
+
+    if exec_mode == "cppsim":
+        assert output_matches, "Cppsim output doesn't match ONNX/PyTorch."
+    elif exec_mode == "rtlsim":
+        assert output_matches, "Rtlsim output doesn't match ONNX/PyTorch."
diff --git a/tests/fpgadataflow/test_fpgadataflow_vvau.py b/tests/fpgadataflow/test_fpgadataflow_vvau.py
index 36b844deab4e28ff35290a170f713a64be839e8a..6f39994bf27594a063a1e66c5bba7867eaabef6e 100644
--- a/tests/fpgadataflow/test_fpgadataflow_vvau.py
+++ b/tests/fpgadataflow/test_fpgadataflow_vvau.py
@@ -141,11 +141,11 @@ def prepare_inputs(input_tensor):
 
 
 # mem_mode: const or decoupled
-@pytest.mark.parametrize("idt", [DataType.UINT4, DataType.UINT8])
+@pytest.mark.parametrize("idt", [DataType["UINT4"], DataType["UINT8"]])
 # weight datatype
-@pytest.mark.parametrize("wdt", [DataType.INT4])
+@pytest.mark.parametrize("wdt", [DataType["INT4"]])
 # activation: None or DataType
-@pytest.mark.parametrize("act", [DataType.UINT4, None])
+@pytest.mark.parametrize("act", [DataType["UINT4"], None])
 # PE
 @pytest.mark.parametrize("pe", [1, "channels"])
 # Input image shape
@@ -187,14 +187,14 @@ def test_fpgadataflow_vvau(
     if act is None:
         T = None
         tdt = None
-        odt = DataType.INT32
+        odt = DataType["INT32"]
     else:
         odt = act
         (min_v, max_v) = _calculate_dot_prod_range(idt, wdt, k_h * k_w * channels)
         n_steps = act.get_num_possible_values() - 1
         T = np.random.randint(min_v, max_v - 1, (channels, n_steps)).astype(np.float32)
         T = np.sort(T, axis=1)
-        tdt = DataType.INT32
+        tdt = DataType["INT32"]
 
     model = _make_single_vvau_modelwrapper(
         W, pe, k_h, k_w, channels, dim_h, dim_w, wdt, idt, odt, T, tdt
diff --git a/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py b/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py
index 11ca79471d4eb2642a141ecdda9b4c55714ec76c..236eb2a0342a2782f106761f4cd356888a2f8630 100644
--- a/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py
+++ b/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py
@@ -47,12 +47,15 @@ from finn.util.basic import gen_finn_dt_tensor
 
 
 def make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt):
+    k_h, k_w = k
+    ifm_dim_h, ifm_dim_w = ifm_dim
+    ofm_dim_h, ofm_dim_w = ofm_dim
     odt = idt
     inp = helper.make_tensor_value_info(
-        "inp", TensorProto.FLOAT, [1, ifm_dim, ifm_dim, ifm_ch]
+        "inp", TensorProto.FLOAT, [1, ifm_dim_h, ifm_dim_w, ifm_ch]
     )
     outp = helper.make_tensor_value_info(
-        "outp", TensorProto.FLOAT, [1, ofm_dim, ofm_dim, ifm_ch]
+        "outp", TensorProto.FLOAT, [1, ofm_dim_h, ofm_dim_w, ifm_ch]
     )
 
     mp_node = helper.make_node(
@@ -60,8 +63,8 @@ def make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt):
         ["inp"],
         ["outp"],
         domain="finn.custom_op.general",
-        kernel_shape=[k, k],
-        strides=[k, k],
+        kernel_shape=[k_h, k_w],
+        strides=[k_h, k_w],
         pads=[0, 0, 0, 0],
     )
     graph = helper.make_graph(
@@ -78,12 +81,15 @@ def make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt):
 
 
 def make_single_streamingmaxpool_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt):
+    k_h, k_w = k
+    ifm_dim_h, ifm_dim_w = ifm_dim
+    ofm_dim_h, ofm_dim_w = ofm_dim
     odt = idt
     inp = helper.make_tensor_value_info(
-        "inp", TensorProto.FLOAT, [1, ifm_dim, ifm_dim, ifm_ch]
+        "inp", TensorProto.FLOAT, [1, ifm_dim_h, ifm_dim_w, ifm_ch]
     )
     outp = helper.make_tensor_value_info(
-        "outp", TensorProto.FLOAT, [1, ofm_dim, ofm_dim, ifm_ch]
+        "outp", TensorProto.FLOAT, [1, ofm_dim_h, ofm_dim_w, ifm_ch]
     )
 
     smp_node = helper.make_node(
@@ -92,9 +98,9 @@ def make_single_streamingmaxpool_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt):
         ["outp"],
         domain="finn.custom_op.fpgadataflow",
         backend="fpgadataflow",
-        PoolDim=k,
+        PoolDim=[k_h, k_w],
         NumChannels=ifm_ch,
-        ImgDim=ifm_dim,
+        ImgDim=[ifm_dim_h, ifm_dim_w],
         dataType=idt.name,
     )
     graph = helper.make_graph(
@@ -115,24 +121,42 @@ def prepare_inputs(input_tensor):
 
 
 # input datatype
-@pytest.mark.parametrize("idt", [DataType.BIPOLAR, DataType.INT2])
+@pytest.mark.parametrize("idt", [DataType["BIPOLAR"], DataType["INT4"]])
+# 1d maxpool
+@pytest.mark.parametrize("dim_1d", [False, True])
 # kernel size
 @pytest.mark.parametrize("k", [2, 4])
 # input dimension
-@pytest.mark.parametrize("ifm_dim", [4, 6, 8])
+@pytest.mark.parametrize("ifm_dim", [4, 8])
 # input channels
-@pytest.mark.parametrize("ifm_ch", [1, 2])  # , 2, 3, 4])
+@pytest.mark.parametrize("ifm_ch", [1, 3])  # 1,3
 # execution mode
 @pytest.mark.parametrize("exec_mode", ["rtlsim", "cppsim"])
 @pytest.mark.slow
 @pytest.mark.vivado
-def test_fpgadataflow_streamingmaxpool(idt, k, ifm_dim, ifm_ch, exec_mode):
-    stride = k
-    ofm_dim = int(((ifm_dim - k) / stride) + 1)
-    if ifm_dim % k != 0:
+def test_fpgadataflow_streamingmaxpool(idt, dim_1d, k, ifm_dim, ifm_ch, exec_mode):
+    ifm_dim_h = ifm_dim
+    k_h = k
+    if dim_1d:
+        ifm_dim_w = 1
+        k_w = 1
+    else:
+        ifm_dim_w = ifm_dim_h
+        k_w = k_h
+    ifm_dim = (ifm_dim_h, ifm_dim_w)
+    k = (k_h, k_w)
+
+    stride_h = k_h
+    stride_w = k_w
+    ofm_dim_h = int(((ifm_dim_h - k_h) / stride_h) + 1)
+    ofm_dim_w = int(((ifm_dim_w - k_w) / stride_w) + 1)
+    ofm_dim = (ofm_dim_h, ofm_dim_w)
+    if idt == DataType["BIPOLAR"] and dim_1d:
+        pytest.skip("Skipping binary StreamingMaxPool_1d (not implemented)")
+    if ifm_dim_h % k_h != 0 or ifm_dim_w % k_w != 0:
         pytest.skip("Skipping StreamingMaxPool test w/ ImgDim % PoolDim != 0")
 
-    x = gen_finn_dt_tensor(idt, (1, ifm_dim, ifm_dim, ifm_ch))
+    x = gen_finn_dt_tensor(idt, (1, ifm_dim_h, ifm_dim_w, ifm_ch))
     # prepare input data
     input_dict = prepare_inputs(x)
 
@@ -152,7 +176,7 @@ def test_fpgadataflow_streamingmaxpool(idt, k, ifm_dim, ifm_ch, exec_mode):
         model = model.transform(HLSSynthIP())
         model = model.transform(PrepareRTLSim())
     else:
-        raise Exception("Unknown exec_mode in test_fpgadataflow_slidingwindow")
+        raise Exception("Unknown exec_mode in test_layer_streaming_maxpool_batch")
 
     # execute model
     y_produced = oxe.execute_onnx(model, input_dict)["outp"]
diff --git a/tests/fpgadataflow/test_runtime_weights.py b/tests/fpgadataflow/test_runtime_weights.py
index 73b1315592af79145e1b7c6f147b3ede7e066bce..0196a78d5c4254d7cb116641f946bcccb9e1ebc9 100644
--- a/tests/fpgadataflow/test_runtime_weights.py
+++ b/tests/fpgadataflow/test_runtime_weights.py
@@ -36,8 +36,8 @@ from finn.core.rtlsim_exec import rtlsim_exec
 from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.general import GiveUniqueNodeNames
 from finn.util.basic import gen_finn_dt_tensor
 from finn.util.create import hls_random_mlp_maker
@@ -49,8 +49,8 @@ target_clk_ns = 5
 
 @pytest.mark.vivado
 def test_runtime_weights_single_layer():
-    idt = DataType.UINT32
-    wdt = DataType.UINT4
+    idt = DataType["UINT32"]
+    wdt = DataType["UINT4"]
     act = None
     mw = 64
     mh = 32
@@ -78,11 +78,11 @@ def test_runtime_weights_single_layer():
     os.remove("old_weights.dat")
     old_weight_stream = map(lambda x: int(x, 16), old_weight_stream.split("\n"))
     old_weight_stream = list(old_weight_stream)
+    model = model.transform(InsertFIFO(True))
     model = model.transform(GiveUniqueNodeNames())
     model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
     model = model.transform(HLSSynthIP())
     model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns))
-    model = model.transform(PrepareRTLSim())
     model.set_metadata_prop("exec_mode", "rtlsim")
     in_tensor = np.asarray(range(mw), dtype=np.float32)
     # add two copies of the input tensor as the first one is just used to
diff --git a/tests/fpgadataflow/test_set_folding.py b/tests/fpgadataflow/test_set_folding.py
index f268611c296687987fffe32293b0454109bc7db4..66fd5b43a1b8b8c8986bf9c9b9d0e9efd7a744a6 100644
--- a/tests/fpgadataflow/test_set_folding.py
+++ b/tests/fpgadataflow/test_set_folding.py
@@ -98,7 +98,8 @@ def make_multi_fclayer_model(ch, wdt, adt, tdt, nnodes):
     model.set_tensor_datatype("outp", adt)
 
     for i in range(1, nnodes + 1):
-        model.graph.value_info.append(tensors[i])
+        if tensors[i].name != "outp":
+            model.graph.value_info.append(tensors[i])
         model.set_initializer("weights_" + str(i - 1), W)
         model.set_initializer("thresh_" + str(i - 1), T)
         model.set_tensor_datatype("weights_" + str(i - 1), wdt)
@@ -114,7 +115,7 @@ def make_multi_fclayer_model(ch, wdt, adt, tdt, nnodes):
 def test_set_folding(target_fps, platform):
 
     model = make_multi_fclayer_model(
-        128, DataType.INT4, DataType.INT2, DataType.INT16, 5
+        128, DataType["INT4"], DataType["INT2"], DataType["INT16"], 5
     )
 
     model = model.transform(GiveUniqueNodeNames())
diff --git a/tests/transformation/streamline/test_move_flatten_past_affine.py b/tests/transformation/streamline/test_move_flatten_past_affine.py
index 1971ecfaa181d6ee799a9191b63d2482629b1e1c..ef01436dc9435676b562e2b635a8cf12e901046b 100644
--- a/tests/transformation/streamline/test_move_flatten_past_affine.py
+++ b/tests/transformation/streamline/test_move_flatten_past_affine.py
@@ -77,14 +77,14 @@ def test_move_flatten_past_affine(data_layout, batch_size):
     model = ModelWrapper(model)
 
     # initialize values
-    a0_values = gen_finn_dt_tensor(DataType.TERNARY, [1024, 1000])
+    a0_values = gen_finn_dt_tensor(DataType["TERNARY"], [1024, 1000])
     model.set_initializer("a0", a0_values)
     a1_values = np.random.uniform(low=0.1, high=0.99, size=(1)).astype(np.float32)
     model.set_initializer("a1", a1_values)
     a2_values = np.random.uniform(low=-1, high=1, size=(1000)).astype(np.float32)
     model.set_initializer("a2", a2_values)
 
-    model.set_tensor_datatype("inp", DataType.INT2)
+    model.set_tensor_datatype("inp", DataType["INT2"])
     model.set_tensor_layout("inp", data_layout)
     model = model.transform(InferShapes())
     model = model.transform(InferDataTypes())
@@ -93,7 +93,7 @@ def test_move_flatten_past_affine(data_layout, batch_size):
     model = model.transform(GiveReadableTensorNames())
 
     # compare execution before and after transformation
-    inp_values = gen_finn_dt_tensor(DataType.INT2, ishape)
+    inp_values = gen_finn_dt_tensor(DataType["INT2"], ishape)
     idict = {model.graph.input[0].name: inp_values}
     model_transformed = model.transform(MoveFlattenPastAffine())
     assert oxe.compare_execution(model, model_transformed, idict)
diff --git a/tests/transformation/streamline/test_move_flatten_past_topk.py b/tests/transformation/streamline/test_move_flatten_past_topk.py
index 5e0211ad8857653ce75af2f5a7de0c6439770108..6086f7804eda4447de8f5948f521f0b003f65020 100644
--- a/tests/transformation/streamline/test_move_flatten_past_topk.py
+++ b/tests/transformation/streamline/test_move_flatten_past_topk.py
@@ -69,7 +69,7 @@ def test_move_flatten_past_affine(data_layout, batch_size):
     model = helper.make_model(graph, producer_name="move_flatten_model")
     model = ModelWrapper(model)
 
-    model.set_tensor_datatype("inp", DataType.INT2)
+    model.set_tensor_datatype("inp", DataType["INT2"])
     model.set_tensor_layout("inp", data_layout)
     model = model.transform(InsertTopK())
     model = model.transform(InferShapes())
@@ -79,7 +79,7 @@ def test_move_flatten_past_affine(data_layout, batch_size):
     model = model.transform(GiveReadableTensorNames())
 
     # compare execution before and after transformation
-    inp_values = gen_finn_dt_tensor(DataType.INT2, ishape)
+    inp_values = gen_finn_dt_tensor(DataType["INT2"], ishape)
     idict = {model.graph.input[0].name: inp_values}
     model_transformed = model.transform(MoveFlattenPastTopK())
     assert oxe.compare_execution(model, model_transformed, idict)
diff --git a/tests/transformation/streamline/test_move_mul_past_dw_conv.py b/tests/transformation/streamline/test_move_mul_past_dw_conv.py
index cb9beed713eb448b49015a7de601a4d15edc035b..e9e956d845ef8e56d2078bcd738ad3bb0ff72bfa 100644
--- a/tests/transformation/streamline/test_move_mul_past_dw_conv.py
+++ b/tests/transformation/streamline/test_move_mul_past_dw_conv.py
@@ -68,9 +68,9 @@ def test_move_mul_past_dw_conv(ifm_dim, ifm_ch, k, stride, pad_amt, dw):
 
     model = helper.make_model(graph, producer_name="mulpastconv-model")
     model = ModelWrapper(model)
-    inp_values = gen_finn_dt_tensor(DataType.INT2, [1, ifm_ch, ifm_dim, ifm_dim])
-    mul_values = gen_finn_dt_tensor(DataType.INT2, [1, ifm_ch, 1, 1])
-    W_values = gen_finn_dt_tensor(DataType.INT2, W_shape)
+    inp_values = gen_finn_dt_tensor(DataType["INT2"], [1, ifm_ch, ifm_dim, ifm_dim])
+    mul_values = gen_finn_dt_tensor(DataType["INT2"], [1, ifm_ch, 1, 1])
+    W_values = gen_finn_dt_tensor(DataType["INT2"], W_shape)
     model.set_initializer("W", W_values)
     model.set_initializer("mul", mul_values)
     model = model.transform(InferShapes())
diff --git a/tests/transformation/streamline/test_move_mul_past_maxpool.py b/tests/transformation/streamline/test_move_mul_past_maxpool.py
index 81f18842ed8ba2b5230f3a853076244d0a0ab8d9..2c51aaf36a79591fd0fd0cea368d5e23da0d07c3 100755
--- a/tests/transformation/streamline/test_move_mul_past_maxpool.py
+++ b/tests/transformation/streamline/test_move_mul_past_maxpool.py
@@ -66,7 +66,7 @@ def test_move_mul_past_maxpool(ifm_dim, ifm_ch, k, stride, pad, cw, negative):
 
     model = helper.make_model(graph, producer_name="mulpastmaxpool-model")
     model = ModelWrapper(model)
-    inp_values = gen_finn_dt_tensor(DataType.INT2, [1, ifm_ch, ifm_dim, ifm_dim])
+    inp_values = gen_finn_dt_tensor(DataType["INT2"], [1, ifm_ch, ifm_dim, ifm_dim])
     mul_values = np.random.random_sample(mul_shape).astype(np.float32)
     if negative == 1:
         mul_values = mul_values * (-1)
diff --git a/tests/transformation/streamline/test_remove_identity_ops.py b/tests/transformation/streamline/test_remove_identity_ops.py
deleted file mode 100644
index ad7c20fb51902f22c20896bdfb3321dc74d0572d..0000000000000000000000000000000000000000
--- a/tests/transformation/streamline/test_remove_identity_ops.py
+++ /dev/null
@@ -1,95 +0,0 @@
-import pytest
-
-import numpy as np
-from onnx import TensorProto, helper
-
-import finn.core.onnx_exec as oxe
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.streamline.remove import RemoveIdentityOps
-from finn.util.basic import gen_finn_dt_tensor
-
-
-def insert_identity_op(model, op, as_first_node, approx):
-    if approx:
-        zero_val = 0.000001
-        one_val = 0.999999
-    else:
-        zero_val = 0.0
-        one_val = 1.0
-    if op in ["Add", "Sub"]:
-        val = np.asarray([zero_val], dtype=np.float32)
-    elif op in ["Mul", "Div"]:
-        val = np.asarray([one_val], dtype=np.float32)
-    else:
-        return
-
-    graph = model.graph
-    if as_first_node:
-        identity_node = helper.make_node(op, ["inp", "value"], ["ident_out"])
-        graph.node.insert(0, identity_node)
-        graph.node[1].input[0] = "ident_out"
-    else:
-        identity_node = helper.make_node(op, ["div_out", "value"], ["ident_out"])
-        graph.node.insert(3, identity_node)
-        graph.node[-1].input[0] = "ident_out"
-    model.set_initializer("value", val)
-
-    return model
-
-
-# identity operations to be inserted
-@pytest.mark.parametrize("op", ["Add", "Sub", "Mul", "Div"])
-@pytest.mark.parametrize("approx", [False, True])
-@pytest.mark.parametrize("as_first_node", [False, True])
-def test_remove_identity_ops(op, as_first_node, approx):
-
-    # set up onnx model
-    inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, 4, 1, 1])
-    mul = helper.make_tensor_value_info("mul", TensorProto.FLOAT, [])
-    shape = helper.make_tensor_value_info("shape", TensorProto.FLOAT, [2])
-    div = helper.make_tensor_value_info("div", TensorProto.FLOAT, [])
-    matmul = helper.make_tensor_value_info("matmul", TensorProto.FLOAT, [4, 2])
-    outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, 2])
-
-    mul_node = helper.make_node("Mul", ["inp", "mul"], ["mul_out"])
-    reshape_node = helper.make_node("Reshape", ["mul_out", "shape"], ["reshape_out"])
-    div_node = helper.make_node("Div", ["reshape_out", "div"], ["div_out"])
-    matmul_node = helper.make_node("MatMul", ["div_out", "matmul"], ["outp"])
-
-    graph = helper.make_graph(
-        nodes=[mul_node, reshape_node, div_node, matmul_node],
-        name="identity-graph",
-        inputs=[inp],
-        outputs=[outp],
-        value_info=[mul, shape, div, matmul],
-    )
-
-    model = helper.make_model(graph, producer_name="mulpastconv-model")
-    model = ModelWrapper(model)
-    inp_values = gen_finn_dt_tensor(DataType.INT2, [1, 4, 1, 1])
-    mul_values = np.random.uniform(low=0.1, high=0.99, size=(1)).astype(np.float32)
-    shape_values = np.asarray([1, -1], dtype=np.int64)
-    div_values = np.random.uniform(low=0.1, high=0.99, size=(1)).astype(np.float32)
-    matmul_values = gen_finn_dt_tensor(DataType.INT2, [4, 2])
-    model.set_initializer("mul", mul_values)
-    model.set_initializer("shape", shape_values)
-    model.set_initializer("div", div_values)
-    model.set_initializer("matmul", matmul_values)
-    insert_identity_op(model, op, as_first_node, approx)
-    model = model.transform(InferShapes())
-    model = model.transform(InferDataTypes())
-    idict = {"inp": inp_values}
-    odict = oxe.execute_onnx(model, idict)
-    out_before = odict["outp"]
-    num_of_nodes_before = len(model.graph.node)
-
-    model = model.transform(RemoveIdentityOps())
-    num_of_nodes_after = len(model.graph.node)
-    assert num_of_nodes_before - 1 == num_of_nodes_after
-
-    odict = oxe.execute_onnx(model, idict)
-    out_after = odict["outp"]
-    assert np.isclose(out_before, out_after, atol=1e-3).all()
diff --git a/tests/transformation/streamline/test_round_thresholds.py b/tests/transformation/streamline/test_round_thresholds.py
index f9259908a2b4e4d716e3fb9ae7ec28cd9ec85d03..2e57f1c85f6ac197ca7a4cf15e595c34cc0fb564 100644
--- a/tests/transformation/streamline/test_round_thresholds.py
+++ b/tests/transformation/streamline/test_round_thresholds.py
@@ -47,17 +47,17 @@ def test_round_thresholds():
     model = ModelWrapper(model_def)
     threshold_val = np.asarray([[-1.1], [0.7], [2.3], [5.1]], dtype=np.float32)
     model.set_initializer("thresholds", threshold_val)
-    model.set_tensor_datatype("v", DataType.INT8)
+    model.set_tensor_datatype("v", DataType["INT8"])
     inp_dict_f = {"v": np.floor(threshold_val).T}
     inp_dict_n = {"v": np.round(threshold_val).T}
     inp_dict_c = {"v": np.ceil(threshold_val).T}
     orig_f = oxe.execute_onnx(model, inp_dict_f)["out"]
     orig_n = oxe.execute_onnx(model, inp_dict_n)["out"]
     orig_c = oxe.execute_onnx(model, inp_dict_c)["out"]
-    assert model.get_tensor_datatype("thresholds") == DataType.FLOAT32
+    assert model.get_tensor_datatype("thresholds") == DataType["FLOAT32"]
     new_model = model.transform(RoundAndClipThresholds())
     # rounded up thresholds should have same dtype as input
-    assert new_model.get_tensor_datatype("thresholds") == DataType.INT8
+    assert new_model.get_tensor_datatype("thresholds") == DataType["INT8"]
     new_f = oxe.execute_onnx(new_model, inp_dict_f)["out"]
     new_n = oxe.execute_onnx(new_model, inp_dict_n)["out"]
     new_c = oxe.execute_onnx(new_model, inp_dict_c)["out"]
diff --git a/tests/transformation/test_infer_datatypes_lfc.py b/tests/transformation/test_infer_datatypes_lfc.py
index 00715e3e3ca3626e1b76bf3b23bae4dc1d65b053..8883dac7a54eafaaa768c8ae991b2030e385b318 100644
--- a/tests/transformation/test_infer_datatypes_lfc.py
+++ b/tests/transformation/test_infer_datatypes_lfc.py
@@ -49,12 +49,12 @@ def test_infer_datatypes_lfc():
     model = model.transform(GiveUniqueNodeNames())
     model = model.transform(GiveReadableTensorNames())
     model = model.transform(InferDataTypes())
-    assert model.get_tensor_datatype("MatMul_0_out0") == DataType.INT32
-    assert model.get_tensor_datatype("MatMul_1_out0") == DataType.INT32
-    assert model.get_tensor_datatype("MatMul_2_out0") == DataType.INT32
-    assert model.get_tensor_datatype("MatMul_3_out0") == DataType.INT32
-    assert model.get_tensor_datatype("MultiThreshold_0_out0") == DataType.BIPOLAR
-    assert model.get_tensor_datatype("MultiThreshold_1_out0") == DataType.BIPOLAR
-    assert model.get_tensor_datatype("MultiThreshold_2_out0") == DataType.BIPOLAR
-    assert model.get_tensor_datatype("MultiThreshold_3_out0") == DataType.BIPOLAR
+    assert model.get_tensor_datatype("MatMul_0_out0") == DataType["INT32"]
+    assert model.get_tensor_datatype("MatMul_1_out0") == DataType["INT32"]
+    assert model.get_tensor_datatype("MatMul_2_out0") == DataType["INT32"]
+    assert model.get_tensor_datatype("MatMul_3_out0") == DataType["INT32"]
+    assert model.get_tensor_datatype("MultiThreshold_0_out0") == DataType["BIPOLAR"]
+    assert model.get_tensor_datatype("MultiThreshold_1_out0") == DataType["BIPOLAR"]
+    assert model.get_tensor_datatype("MultiThreshold_2_out0") == DataType["BIPOLAR"]
+    assert model.get_tensor_datatype("MultiThreshold_3_out0") == DataType["BIPOLAR"]
     os.remove(export_onnx_path)
diff --git a/tests/transformation/test_qonnx_to_finn.py b/tests/transformation/test_qonnx_to_finn.py
new file mode 100644
index 0000000000000000000000000000000000000000..df7d63e3d2e139077f0fa20b10714c0a43a24e47
--- /dev/null
+++ b/tests/transformation/test_qonnx_to_finn.py
@@ -0,0 +1,175 @@
+# Copyright (c) 2021, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+import pkg_resources as pk
+
+import pytest
+
+import brevitas.export.onnx.generic as b_onnx
+import brevitas.onnx as bo
+import numpy as np
+import onnx
+import onnx.numpy_helper as nph
+import torch
+from pkgutil import get_data
+from qonnx.util.cleanup import cleanup
+from tempfile import TemporaryDirectory
+
+import finn.core.onnx_exec as oxe
+from finn.core.modelwrapper import ModelWrapper
+from finn.transformation.fold_constants import FoldConstants
+from finn.transformation.general import GiveUniqueNodeNames, RemoveStaticGraphInputs
+from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
+from finn.util.test import get_test_model_trained
+
+
+def get_brev_model_and_sample_inputs(model_name, wbits, abits):
+    if "FC" in model_name:
+        in_shape = (1, 1, 28, 28)
+        raw_i = get_data("finn.data", "onnx/mnist-conv/test_data_set_0/input_0.pb")
+        input_tensor = onnx.load_tensor_from_string(raw_i)
+        input_tensor = nph.to_array(input_tensor)
+        brev_model = get_test_model_trained(model_name, wbits, abits)
+    elif model_name == "CNV":
+        in_shape = (1, 3, 32, 32)
+        fn = pk.resource_filename(
+            "finn.qnn-data", "cifar10/cifar10-test-data-class3.npz"
+        )
+        input_tensor = np.load(fn)["arr_0"].astype(np.float32)
+        input_tensor = input_tensor / 255
+        brev_model = get_test_model_trained(model_name, wbits, abits)
+    elif model_name == "mobilenet":
+        in_shape = (1, 3, 224, 224)
+        np.random.seed(42)
+        input_tensor = np.random.normal(size=in_shape).astype(dtype=np.float32)
+        brev_model = get_test_model_trained(model_name, 4, 4)
+    else:
+        raise RuntimeError(f"The model with the name {model_name} is not supported.")
+
+    return brev_model, in_shape, input_tensor
+
+
+def analysis_testing_for_no_quant_nodes(model):
+    # Test that all Quant nodes have been converted to MultiThreshold nodes
+    # or folded into tensor initializers.
+
+    for op_type in ["BinaryQuant", "Quant", "Trunc"]:
+        q_count = len(model.get_nodes_by_op_type(op_type))
+        if q_count > 0:
+            raise ValueError(f"There should be no {op_type} nodes left in the graph.")
+
+    return dict()
+
+
+# This test currently takes about 4 min and 20 seconds
+@pytest.mark.parametrize("abits", [1, 2])
+@pytest.mark.parametrize("wbits", [1, 2])
+@pytest.mark.parametrize("model_name", ["TFC", "SFC", "LFC", "CNV", "mobilenet"])
+def test_QONNX_to_FINN(model_name, wbits, abits):
+    if wbits > abits:
+        pytest.skip("No wbits > abits cases at the moment")
+    if model_name == "LFC" and wbits == 2 and abits == 2:
+        pytest.skip("No LFC-w2a2 present at the moment")
+    if model_name == "mobilenet" and (wbits != 2 or abits != 2):
+        pytest.skip("Mobilenet only runs at W2A2, though it's technically W4A4.")
+
+    # Get test config and model
+    ATOL = 1e-7
+    brev_model, in_shape, input_tensor = get_brev_model_and_sample_inputs(
+        model_name, wbits, abits
+    )
+    temp_dir = TemporaryDirectory()
+    qonnx_base_path = temp_dir.name + "/qonnx_{}.onnx"
+    finn_base_path = temp_dir.name + "/finn_{}.onnx"
+
+    # Get Brevitas output
+    torch_input_tensor = torch.from_numpy(input_tensor).float()
+    brev_output = brev_model.forward(torch_input_tensor).detach().numpy()
+
+    # Get "clean" FINN model and it's output
+    _ = bo.export_finn_onnx(brev_model, in_shape, finn_base_path.format("raw"))
+    model = ModelWrapper(finn_base_path.format("raw"))
+    model = model.transform(GiveUniqueNodeNames())
+    model = model.transform(InferShapes())
+    model = model.transform(FoldConstants())
+    model = model.transform(RemoveStaticGraphInputs())
+    model.save(finn_base_path.format("clean"))
+
+    model = ModelWrapper(finn_base_path.format("clean"))
+    input_dict = {model.graph.input[0].name: input_tensor}
+    output_dict = oxe.execute_onnx(model, input_dict, False)
+    finn_export_output = output_dict[model.graph.output[0].name]
+    # This test always fails on MobileNet for some reason
+    if model_name != "mobilenet":
+        assert np.isclose(
+            brev_output, finn_export_output, atol=ATOL
+        ).all(), "The output of the Brevitas model and the FINN model should match."
+
+    # Get the equivalent QONNX model
+    b_onnx.function.DOMAIN_STRING = "finn.custom_op.general"
+    _ = b_onnx.manager.BrevitasONNXManager.export(
+        brev_model, in_shape, qonnx_base_path.format("raw")
+    )
+    cleanup(qonnx_base_path.format("raw"), out_file=qonnx_base_path.format("clean"))
+
+    # Compare output
+    model = ModelWrapper(qonnx_base_path.format("clean"))
+    input_dict = {model.graph.input[0].name: input_tensor}
+    output_dict = oxe.execute_onnx(model, input_dict, False)
+    qonnx_export_output = output_dict[model.graph.output[0].name]
+    assert np.isclose(
+        brev_output, qonnx_export_output, atol=ATOL
+    ).all(), "The output of the Brevitas model and the QONNX model should match."
+    # This test always fails on MobileNet for some reason
+    if model_name != "mobilenet":
+        assert np.isclose(
+            qonnx_export_output, finn_export_output, atol=ATOL
+        ).all(), "The output of the FINN model and the QONNX model should match."
+
+    # Run QONNX to FINN conversion
+    model = ModelWrapper(qonnx_base_path.format("clean"))
+    model = model.transform(ConvertQONNXtoFINN())
+    model.save(qonnx_base_path.format("whole_trafo"))
+
+    # Compare output
+    model = ModelWrapper(qonnx_base_path.format("whole_trafo"))
+    input_dict = {model.graph.input[0].name: input_tensor}
+    output_dict = oxe.execute_onnx(model, input_dict, False)
+    test_output = output_dict[model.graph.output[0].name]
+    assert np.isclose(test_output, finn_export_output, atol=ATOL).all(), (
+        "The output of the FINN model "
+        "and the QONNX -> FINN converted model should match."
+    )
+
+    # Run analysis passes on the converted model
+    model = ModelWrapper(qonnx_base_path.format("whole_trafo"))
+    _ = model.analysis(analysis_testing_for_no_quant_nodes)
+
+    temp_dir.cleanup()
diff --git a/tests/util/test_build_dataflow.py b/tests/util/test_build_dataflow.py
index 770553201eb86f448dcd9e22afd8e827c338c7f9..de1b3abcc314c0c1451bd86bab8a7b93600ca697 100644
--- a/tests/util/test_build_dataflow.py
+++ b/tests/util/test_build_dataflow.py
@@ -48,11 +48,14 @@ def test_build_dataflow_directory():
     # check the generated files
     output_dir = target_dir + "/output_tfc_w1a1_Pynq-Z1"
     assert os.path.isfile(output_dir + "/time_per_step.json")
+    assert os.path.isfile(output_dir + "/auto_folding_config.json")
     assert os.path.isfile(output_dir + "/final_hw_config.json")
     assert os.path.isfile(output_dir + "/stitched_ip/ip/component.xml")
     assert os.path.isfile(output_dir + "/driver/driver.py")
     assert os.path.isfile(output_dir + "/report/estimate_layer_cycles.json")
     assert os.path.isfile(output_dir + "/report/estimate_layer_resources.json")
+    assert os.path.isfile(output_dir + "/report/verify_rtlsim.vcd")
+    assert os.path.isfile(output_dir + "/report/rtlsim_perf_batch_1.vcd")
     assert os.path.isfile(
         output_dir + "/report/estimate_layer_config_alternatives.json"
     )
diff --git a/tests/util/test_create.py b/tests/util/test_create.py
index 42a288b74ecda9746296519b1b86563c75b2752e..c11e60175ea3ac94b6686ec5f8401a7c134fe53e 100644
--- a/tests/util/test_create.py
+++ b/tests/util/test_create.py
@@ -32,7 +32,9 @@ import finn.util.create as create
 from finn.core.datatype import DataType
 
 
-@pytest.mark.parametrize("bitwidth", [DataType.BIPOLAR, DataType.INT2, DataType.INT4])
+@pytest.mark.parametrize(
+    "bitwidth", [DataType["BIPOLAR"], DataType["INT2"], DataType["INT4"]]
+)
 def test_hls_random_mlp_maker(bitwidth):
     w = bitwidth
     a = bitwidth
@@ -42,7 +44,7 @@ def test_hls_random_mlp_maker(bitwidth):
             "mh": 100,
             "simd": 185,
             "pe": 100,
-            "idt": DataType.BIPOLAR,
+            "idt": DataType["BIPOLAR"],
             "wdt": w,
             "act": a,
         },
@@ -56,7 +58,7 @@ def test_hls_random_mlp_maker(bitwidth):
             "pe": 1,
             "idt": a,
             "wdt": w,
-            "act": DataType.BIPOLAR,
+            "act": DataType["BIPOLAR"],
         },
     ]
 
diff --git a/tests/util/test_data_packing_hls.py b/tests/util/test_data_packing_hls.py
index 3221eda34c85ed9d65b258b6489699cda8400517..7113a3051bffb568e36b01af59945f0956658f76 100644
--- a/tests/util/test_data_packing_hls.py
+++ b/tests/util/test_data_packing_hls.py
@@ -38,7 +38,16 @@ from finn.core.datatype import DataType
 from finn.util.data_packing import numpy_to_hls_code
 
 
-@pytest.mark.parametrize("dtype", [DataType.BINARY, DataType.INT2, DataType.INT32])
+@pytest.mark.parametrize(
+    "dtype",
+    [
+        DataType["BINARY"],
+        DataType["INT2"],
+        DataType["INT32"],
+        DataType["FIXED<9,6>"],
+        DataType["FLOAT32"],
+    ],
+)
 @pytest.mark.parametrize("test_shape", [(1, 2, 4), (1, 1, 64), (2, 64)])
 @pytest.mark.vivado
 def test_npy2apintstream(test_shape, dtype):
@@ -119,17 +128,17 @@ def test_numpy_to_hls_code():
         return "".join(s.split())
 
     A = [[1, 1, 1, 0], [0, 1, 1, 0]]
-    ret = numpy_to_hls_code(A, DataType.BINARY, "test", True)
+    ret = numpy_to_hls_code(A, DataType["BINARY"], "test", True)
     eA = """ap_uint<4> test[2] =
     {ap_uint<4>("0xe", 16), ap_uint<4>("0x6", 16)};"""
     assert remove_all_whitespace(ret) == remove_all_whitespace(eA)
     B = [[[3, 3], [3, 3]], [[1, 3], [3, 1]]]
-    ret = numpy_to_hls_code(B, DataType.UINT2, "test", True)
+    ret = numpy_to_hls_code(B, DataType["UINT2"], "test", True)
     eB = """ap_uint<4> test[2][2] =
     {{ap_uint<4>("0xf", 16), ap_uint<4>("0xf", 16)},
      {ap_uint<4>("0x7", 16), ap_uint<4>("0xd", 16)}};"""
     assert remove_all_whitespace(ret) == remove_all_whitespace(eB)
-    ret = numpy_to_hls_code(B, DataType.UINT2, "test", True, True)
+    ret = numpy_to_hls_code(B, DataType["UINT2"], "test", True, True)
     eB = """{{ap_uint<4>("0xf", 16), ap_uint<4>("0xf", 16)},
      {ap_uint<4>("0x7", 16), ap_uint<4>("0xd", 16)}};"""
     assert remove_all_whitespace(ret) == remove_all_whitespace(eB)