diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index f5998e98d00f7ea2e89ae3f0fcddd5862454f876..dfc83ba618eb905fe5579231542d14d529503ac2 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -68,11 +68,3 @@ repos:
     # black-compatible flake-8 config
     args: ['--max-line-length=88',  # black default
            '--extend-ignore=E203']  # E203 is not PEP8 compliant
-
-- repo: local
-  hooks:
-  - id: jupyter-nb-clear-output
-    name: jupyter-nb-clear-output
-    files: \.ipynb$
-    language: system
-    entry: jupyter nbconvert --ClearOutputPreprocessor.enabled=True --inplace
diff --git a/custom_hls/checksum.cpp b/custom_hls/checksum.cpp
index 071d9bfbe3a93bf822ec1f0d64605941a4248e85..3ea3870d354a494b2d840688b8762cbc3cca9c44 100644
--- a/custom_hls/checksum.cpp
+++ b/custom_hls/checksum.cpp
@@ -1,5 +1,5 @@
 /******************************************************************************
- *  Copyright (c) 2022, Advanced Micro Devices, Inc.
+ *  Copyright (c) 2022, Xilinx, Inc.
  *  All rights reserved.
  *
  *  Redistribution and use in source and binary forms, with or without
diff --git a/custom_hls/checksum.hpp b/custom_hls/checksum.hpp
index bf580f31a6228ffd446221ff5c7cd5f29e439837..77fc14694ff011f9252b1d182e2054b7545b890f 100644
--- a/custom_hls/checksum.hpp
+++ b/custom_hls/checksum.hpp
@@ -1,5 +1,5 @@
 /******************************************************************************
- *  Copyright (c) 2022, Advanced Micro Devices, Inc.
+ *  Copyright (c) 2022, Xilinx, Inc.
  *  All rights reserved.
  *
  *  Redistribution and use in source and binary forms, with or without
@@ -74,6 +74,7 @@ void checksum(
 	hls::stream<T> &src,
 	hls::stream<T> &dst,
 	ap_uint<32>    &chk,
+	ap_uint<1>     drain,	// drain data after checksuming without forward to `dst`
 	F&& f = F()
 ) {
 	ap_uint<2>  coeff[3] = { 1, 2, 3 };
@@ -84,7 +85,7 @@ void checksum(
 		T const  x = src.read();
 
 		// Pass-thru copy
-		dst.write(x);
+		if(!drain)  dst.write(x);
 
 		// Actual checksum update
 		for(unsigned  j = 0; j < K; j++) {
@@ -118,14 +119,16 @@ void checksum(
 	void checksum_ ## WORDS_PER_FRAME ## _ ## WORD_SIZE ## _ ## ITEMS_PER_WORD ( \
 		hls::stream<T> &src, \
 		hls::stream<T> &dst, \
-		ap_uint<32>    &chk \
+		ap_uint<32>    &chk, \
+		ap_uint< 1>    drain \
 	) { \
 	_Pragma("HLS interface port=src axis") \
 	_Pragma("HLS interface port=dst axis") \
 	_Pragma("HLS interface port=chk s_axilite") \
+	_Pragma("HLS interface port=drain s_axilite") \
 	_Pragma("HLS interface port=return ap_ctrl_none") \
-	_Pragma("HLS dataflow") \
-		checksum<WORDS_PER_FRAME, ITEMS_PER_WORD>(src, dst, chk); \
+	_Pragma("HLS dataflow disable_start_propagation") \
+		checksum<WORDS_PER_FRAME, ITEMS_PER_WORD>(src, dst, chk, drain); \
 	}
 #define CHECKSUM_TOP(WORDS_PER_FRAME, WORD_SIZE, ITEMS_PER_WORD) \
 	CHECKSUM_TOP_(WORDS_PER_FRAME, WORD_SIZE, ITEMS_PER_WORD)
diff --git a/docker/finn_entrypoint.sh b/docker/finn_entrypoint.sh
index 5cd2b962ce92afe7d7954ab962694d5c5e70e21e..556e6d040db3140916d75632b9bdead3c1d38747 100644
--- a/docker/finn_entrypoint.sh
+++ b/docker/finn_entrypoint.sh
@@ -54,10 +54,8 @@ recho () {
   echo -e "${RED}ERROR: $1${NC}"
 }
 
-# finn-base
-pip install --user -e ${FINN_ROOT}/deps/finn-base
-# Install qonnx without dependencies, currently its only dependency is finn-base
-pip install --user --no-dependencies -e ${FINN_ROOT}/deps/qonnx
+# qonnx
+pip install --user -e ${FINN_ROOT}/deps/qonnx
 # finn-experimental
 pip install --user -e ${FINN_ROOT}/deps/finn-experimental
 # brevitas
diff --git a/docker/jenkins/Jenkinsfile b/docker/jenkins/Jenkinsfile
index dab0833166234fc8ec9f123adf8c6157acdf5d5d..ad533efa5d8bbab68837e6092f91c4767cde60f7 100644
--- a/docker/jenkins/Jenkinsfile
+++ b/docker/jenkins/Jenkinsfile
@@ -22,7 +22,7 @@ node {
                 dir("${env.WORKSPACE}") {
                 sh("bash run-docker.sh python setup.py test --addopts -mstreamline")
                 }
-            } 
+            }
         }, thirdBranch: {
             stage('Util functions') {
                 dir("${env.WORKSPACE}") {
@@ -41,6 +41,6 @@ node {
                 sh("bash run-docker.sh python setup.py test --addopts -mfpgadataflow")
                 }
             }
-        }        
+        }
     }
 }
diff --git a/docs/finn/internals.rst b/docs/finn/internals.rst
index 356b5613fe35d3020c3e024e4d77e5d4f2e3d469..e28874145d6d61232b0d63b0e53e4dd5dcdc4cfc 100644
--- a/docs/finn/internals.rst
+++ b/docs/finn/internals.rst
@@ -14,10 +14,10 @@ FINN uses `ONNX <https://github.com/onnx/onnx>`_ as an intermediate representati
 Custom Quantization Annotations
 ===============================
 
-ONNX does not support datatypes smaller than 8-bit integers, whereas in FINN we are interested in smaller integers down to ternary and bipolar. To make this work, FINN uses the quantization_annotation field in ONNX to annotate tensors with their FINN DataType (:py:mod:`finn.core.datatype.DataType`) information. However, all tensors are expected to use single-precision floating point (float32) storage in FINN. This means we store even a 1-bit value as floating point for the purposes of representation. The FINN compiler flow is responsible for eventually producing a packed representation for the target hardware, where the 1-bit is actually stored as 1-bit.
+ONNX does not support datatypes smaller than 8-bit integers, whereas in FINN we are interested in smaller integers down to ternary and bipolar. To make this work, FINN uses the quantization_annotation field in ONNX to annotate tensors with their FINN DataType (:py:mod:`qonnx.core.datatype.DataType`) information. However, all tensors are expected to use single-precision floating point (float32) storage in FINN. This means we store even a 1-bit value as floating point for the purposes of representation. The FINN compiler flow is responsible for eventually producing a packed representation for the target hardware, where the 1-bit is actually stored as 1-bit.
 
 Note that FINN uses floating point tensors as a carrier data type to represent integers. Floating point arithmetic can introduce rounding errors, e.g. (int_num * float_scale) / float_scale is not always equal to int_num.
-When using the custom ONNX execution flow, FINN will attempt to sanitize any rounding errors for integer tensors. See (:py:mod:`finn.util.basic.sanitize_quant_values`) for more information.
+When using the custom ONNX execution flow, FINN will attempt to sanitize any rounding errors for integer tensors. See (:py:mod:`qonnx.util.basic.sanitize_quant_values`) for more information.
 This behavior can be disabled (not recommended!) by setting the environment variable SANITIZE_QUANT_TENSORS=0.
 
 Custom Operations/Nodes
@@ -39,7 +39,7 @@ To verify correct operation of FINN-ONNX graphs, FINN provides its own ONNX exec
 ModelWrapper
 ============
 
-FINN provides a ModelWrapper class (:py:mod:`finn.core.modelwrapper.ModelWrapper`) as a thin wrapper around ONNX to make it easier to analyze and manipulate ONNX graphs. This wrapper provides many helper functions, while still giving full access to the ONNX protobuf representation.
+FINN provides a ModelWrapper class (:py:mod:`qonnx.core.modelwrapper.ModelWrapper`) as a thin wrapper around ONNX to make it easier to analyze and manipulate ONNX graphs. This wrapper provides many helper functions, while still giving full access to the ONNX protobuf representation.
 
 Some of the helper functions are described in more detail below.
 
@@ -48,7 +48,7 @@ Create a ModelWrapper instance
 The ModelWrapper instance can be created using a model in .onnx format or by directly passing a ModelProto instance to the wrapper. The code block below gives an example of how to use the wrapper on a model in .onnx format.
 ::
 
-  from finn.core.modelwrapper import ModelWrapper
+  from qonnx.core.modelwrapper import ModelWrapper
   model = ModelWrapper("model.onnx")
 
 Access the ONNX GraphProto through ModelWrapper
@@ -116,7 +116,7 @@ As mentioned above there are FINN DataTypes additional to the container datatype
   model.get_tensor_datatype(tensor_list[2])
 
   # set tensor datatype of third tensor in model tensor list
-  from finn.core.datatype import DataType
+  from qonnx.core.datatype import DataType
 
   finn_dtype = DataType.BIPOLAR
   model.set_tensor_datatype(tensor_list[2], finn_dtype)
@@ -127,7 +127,7 @@ ModelWrapper contains two helper functions for tensor initializers, one to deter
   # get tensor initializer of third tensor in model tensor list
   model.get_initializer(tensor_list[2])
 
-ModelWrapper contains more useful functions, if you are interested please have a look at the ModelWrapper module (:py:mod:`finn.core.modelwrapper.ModelWrapper`) directly.
+ModelWrapper contains more useful functions, if you are interested please have a look at the ModelWrapper module (:py:mod:`qonnx.core.modelwrapper.ModelWrapper`) directly.
 
 
 .. _analysis_pass:
diff --git a/docs/finn/nw_prep.rst b/docs/finn/nw_prep.rst
index 9a10895dd8e5e74d0f047bea30d4fbddd4215af1..8d0403fc9bb6a45fae60f14c0fb0acf862792abb 100644
--- a/docs/finn/nw_prep.rst
+++ b/docs/finn/nw_prep.rst
@@ -19,11 +19,11 @@ Tidy-up transformations
 
 These transformations do not appear in the diagram above, but are applied in many steps in the FINN flow to postprocess the model after a transformation and/or prepare it for the next transformation. They ensure that all information is set and behave like a "tidy-up". These transformations are the following:
 
-* :py:mod:`finn.transformation.general.GiveReadableTensorNames` and :py:mod:`finn.transformation.general.GiveUniqueNodeNames`
+* :py:mod:`qonnx.transformation.general.GiveReadableTensorNames` and :py:mod:`qonnx.transformation.general.GiveUniqueNodeNames`
 
-* :py:mod:`finn.transformation.infer_datatypes.InferDataTypes` and :py:mod:`finn.transformation.infer_shapes.InferShapes`
+* :py:mod:`qonnx.transformation.infer_datatypes.InferDataTypes` and :py:mod:`qonnx.transformation.infer_shapes.InferShapes`
 
-* :py:mod:`finn.transformation.fold_constants.FoldConstants`
+* :py:mod:`qonnx.transformation.fold_constants.FoldConstants`
 
 Streamlining Transformations
 ============================
@@ -43,7 +43,7 @@ Dataflow Partitioning
 In the next step the graph is split and the part consisting of HLS layers is further processed in the FINN flow. The parent graph containing the non-HLS layers remains. The PE and SIMD are set to 1 by default, so the result is a network of only HLS layers with maximum folding. The model can be verified using the *cppsim* simulation. It is a simulation using C++ and is described in more detail in chapter :ref:`verification`.
 
 Folding
-=======
+=========
 
 To adjust the folding, the values for PE and SIMD can be increased to achieve also an increase in the performance. The result can be verified using the same simulation flow as for the network with maximum folding (*cppsim* using C++), for details please have a look at chapter :ref:`verification`.
 
diff --git a/docs/finn/source_code/finn.core.rst b/docs/finn/source_code/finn.core.rst
index 86afd5a1063db37bb212f5ceb07cfa69bbbcbc0b..2e2a8532c6419198c5075a08bef5207b39d4658b 100644
--- a/docs/finn/source_code/finn.core.rst
+++ b/docs/finn/source_code/finn.core.rst
@@ -8,15 +8,15 @@ Modules
 finn.core.data\_layout
 -------------------------
 
-.. automodule:: finn.core.data_layout
+.. automodule:: qonnx.core.data_layout
    :members:
    :undoc-members:
    :show-inheritance:
 
-finn.core.datatype
+qonnx.core.datatype
 -------------------------
 
-.. automodule:: finn.core.datatype
+.. automodule:: qonnx.core.datatype
    :members:
    :undoc-members:
    :show-inheritance:
@@ -29,10 +29,10 @@ finn.core.execute\_custom\_node
    :undoc-members:
    :show-inheritance:
 
-finn.core.modelwrapper
+qonnx.core.modelwrapper
 -----------------------------
 
-.. automodule:: finn.core.modelwrapper
+.. automodule:: qonnx.core.modelwrapper
    :members:
    :undoc-members:
    :show-inheritance:
diff --git a/docs/finn/source_code/finn.custom_op.general.rst b/docs/finn/source_code/finn.custom_op.general.rst
index 87749fd69e541e628436aa904c180338418addc1..dfca29a8f3b6836e2af3fb566e0394eb920c2f6e 100644
--- a/docs/finn/source_code/finn.custom_op.general.rst
+++ b/docs/finn/source_code/finn.custom_op.general.rst
@@ -8,7 +8,7 @@ General Custom Ops
 finn.custom\_op.general.bipolar_quant
 --------------------------------------
 
-.. automodule:: finn.custom_op.general.bipolar_quant
+.. automodule:: qonnx.custom_op.general.bipolar_quant
    :members:
    :undoc-members:
    :show-inheritance:
@@ -16,7 +16,7 @@ finn.custom\_op.general.bipolar_quant
 finn.custom\_op.general.debugmarker
 -----------------------------------
 
-.. automodule:: finn.custom_op.general.debugmarker
+.. automodule:: qonnx.custom_op.general.debugmarker
    :members:
    :undoc-members:
    :show-inheritance:
@@ -24,7 +24,7 @@ finn.custom\_op.general.debugmarker
 finn.custom\_op.general.genericpartition
 -----------------------------------------
 
-.. automodule:: finn.custom_op.general.genericpartition
+.. automodule:: qonnx.custom_op.general.genericpartition
    :members:
    :undoc-members:
    :show-inheritance:
@@ -32,7 +32,7 @@ finn.custom\_op.general.genericpartition
 finn.custom\_op.general.im2col
 ------------------------------
 
-.. automodule:: finn.custom_op.general.im2col
+.. automodule:: qonnx.custom_op.general.im2col
    :members:
    :undoc-members:
    :show-inheritance:
@@ -40,7 +40,7 @@ finn.custom\_op.general.im2col
 finn.custom\_op.general.maxpoolnhwc
 ------------------------------------
 
-.. automodule:: finn.custom_op.general.maxpoolnhwc
+.. automodule:: qonnx.custom_op.general.maxpoolnhwc
    :members:
    :undoc-members:
    :show-inheritance:
@@ -48,7 +48,7 @@ finn.custom\_op.general.maxpoolnhwc
 finn.custom\_op.general.multithreshold
 ---------------------------------------
 
-.. automodule:: finn.custom_op.general.multithreshold
+.. automodule:: qonnx.custom_op.general.multithreshold
    :members:
    :undoc-members:
    :show-inheritance:
@@ -56,7 +56,7 @@ finn.custom\_op.general.multithreshold
 finn.custom\_op.general.quant
 ------------------------------
 
-.. automodule:: finn.custom_op.general.quant
+.. automodule:: qonnx.custom_op.general.quant
   :members:
   :undoc-members:
   :show-inheritance:
@@ -64,7 +64,7 @@ finn.custom\_op.general.quant
 finn.custom\_op.general.quantavgpool2d
 --------------------------------------
 
-.. automodule:: finn.custom_op.general.quantavgpool2d
+.. automodule:: qonnx.custom_op.general.quantavgpool2d
   :members:
   :undoc-members:
   :show-inheritance:
@@ -72,7 +72,7 @@ finn.custom\_op.general.quantavgpool2d
 finn.custom\_op.general.trunc
 ------------------------------
 
-.. automodule:: finn.custom_op.general.trunc
+.. automodule:: qonnx.custom_op.general.trunc
   :members:
   :undoc-members:
   :show-inheritance:
@@ -80,7 +80,7 @@ finn.custom\_op.general.trunc
 finn.custom\_op.general.xnorpopcount
 -------------------------------------
 
-.. automodule:: finn.custom_op.general.xnorpopcount
+.. automodule:: qonnx.custom_op.general.xnorpopcount
    :members:
    :undoc-members:
    :show-inheritance:
diff --git a/docs/finn/source_code/finn.custom_op.rst b/docs/finn/source_code/finn.custom_op.rst
index 1ee3e1dce1898b06605c89202ee841489b817942..3e91eff9a16b3dedf0e1682c79d6f8022ebe0db8 100644
--- a/docs/finn/source_code/finn.custom_op.rst
+++ b/docs/finn/source_code/finn.custom_op.rst
@@ -9,7 +9,7 @@ Submodules
    :maxdepth: 2
 
    finn.custom_op.fpgadataflow
-   finn.custom_op.general
+   qonnx.custom_op.general
 
 Custom Op Nodes
 ===============
@@ -25,7 +25,7 @@ Base Class
 finn.custom\_op.registry
 -------------------------
 
-.. automodule:: finn.custom_op.registry
+.. automodule:: qonnx.custom_op.registry
   :members:
   :undoc-members:
   :show-inheritance:
diff --git a/docs/finn/source_code/finn.transformation.rst b/docs/finn/source_code/finn.transformation.rst
index cffb0fd0f9e963c02a6986e47e1654951ad3bab0..acd09993472d56bc3b9c4db49042601e4cef7547 100644
--- a/docs/finn/source_code/finn.transformation.rst
+++ b/docs/finn/source_code/finn.transformation.rst
@@ -28,7 +28,7 @@ Base Class
 finn.transformation.batchnorm\_to\_affine
 ------------------------------------------------
 
-.. automodule:: finn.transformation.batchnorm_to_affine
+.. automodule:: qonnx.transformation.batchnorm_to_affine
    :members:
    :undoc-members:
    :show-inheritance:
@@ -84,7 +84,7 @@ finn.transformation.extend\_partition
 finn.transformation.extract\_conv\_bias
 ------------------------------------------
 
-.. automodule:: finn.transformation.extract_conv_bias
+.. automodule:: qonnx.transformation.extract_conv_bias
    :members:
    :undoc-members:
    :show-inheritance:
@@ -93,7 +93,7 @@ finn.transformation.extract\_conv\_bias
 finn.transformation.fold\_constants
 ------------------------------------------
 
-.. automodule:: finn.transformation.fold_constants
+.. automodule:: qonnx.transformation.fold_constants
    :members:
    :undoc-members:
    :show-inheritance:
@@ -101,15 +101,15 @@ finn.transformation.fold\_constants
 finn.transformation.gemm\_to\_matmul
 ------------------------------------------
 
-.. automodule:: finn.transformation.gemm_to_matmul
+.. automodule:: qonnx.transformation.gemm_to_matmul
    :members:
    :undoc-members:
    :show-inheritance:
 
-finn.transformation.general
+qonnx.transformation.general
 ----------------------------------
 
-.. automodule:: finn.transformation.general
+.. automodule:: qonnx.transformation.general
    :members:
    :undoc-members:
    :show-inheritance:
@@ -117,7 +117,7 @@ finn.transformation.general
 finn.transformation.infer\_data\_layouts
 -------------------------------------------
 
-.. automodule:: finn.transformation.infer_data_layouts
+.. automodule:: qonnx.transformation.infer_data_layouts
   :members:
   :undoc-members:
   :show-inheritance:
@@ -125,7 +125,7 @@ finn.transformation.infer\_data\_layouts
 finn.transformation.infer\_datatypes
 -------------------------------------------
 
-.. automodule:: finn.transformation.infer_datatypes
+.. automodule:: qonnx.transformation.infer_datatypes
    :members:
    :undoc-members:
    :show-inheritance:
@@ -133,7 +133,7 @@ finn.transformation.infer\_datatypes
 finn.transformation.infer\_shapes
 ----------------------------------------
 
-.. automodule:: finn.transformation.infer_shapes
+.. automodule:: qonnx.transformation.infer_shapes
    :members:
    :undoc-members:
    :show-inheritance:
@@ -141,7 +141,7 @@ finn.transformation.infer\_shapes
 finn.transformation.insert\_topk
 ---------------------------------------
 
-.. automodule:: finn.transformation.insert_topk
+.. automodule:: qonnx.transformation.insert_topk
    :members:
    :undoc-members:
    :show-inheritance:
@@ -157,7 +157,7 @@ finn.transformation.lower\_convs\_to\_matmul
 finn.transformation.make\_input\_chanlast
 ------------------------------------------
 
-.. automodule:: finn.transformation.make_input_chanlast
+.. automodule:: qonnx.transformation.make_input_chanlast
   :members:
   :undoc-members:
   :show-inheritance:
@@ -165,7 +165,7 @@ finn.transformation.make\_input\_chanlast
 finn.transformation.merge\_onnx\_models
 ----------------------------------------
 
-.. automodule:: finn.transformation.merge_onnx_models
+.. automodule:: qonnx.transformation.merge_onnx_models
   :members:
   :undoc-members:
   :show-inheritance:
@@ -179,10 +179,10 @@ finn.transformation.move\_reshape
    :undoc-members:
    :show-inheritance:
 
-finn.transformation.remove
+qonnx.transformation.remove
 -------------------------------------
 
-.. automodule:: finn.transformation.remove
+.. automodule:: qonnx.transformation.remove
   :members:
   :undoc-members:
   :show-inheritance:
diff --git a/docs/finn/source_code/finn.util.rst b/docs/finn/source_code/finn.util.rst
index 62b72c2ac84567b20fee73a16e82b5857d698c9d..aec42ae905445947a59cb256f55eda2070347edf 100644
--- a/docs/finn/source_code/finn.util.rst
+++ b/docs/finn/source_code/finn.util.rst
@@ -13,10 +13,10 @@ finn.util.basic
    :undoc-members:
    :show-inheritance:
 
-finn.util.config
+qonnx.util.config
 ----------------
 
-.. automodule:: finn.util.config
+.. automodule:: qonnx.util.config
   :members:
   :undoc-members:
   :show-inheritance:
diff --git a/fetch-repos.sh b/fetch-repos.sh
index f8c136a32340d427c3f742261847079ed7b645a5..33bd8f940eed40ed2233c1893ec91cf7e4774450 100755
--- a/fetch-repos.sh
+++ b/fetch-repos.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2020-2022, Advanced Micro Devices
+# Copyright (c) 2020-2022, Xilinx, Inc.
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -27,18 +27,16 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-FINN_BASE_COMMIT="fde240556165bbbce27bb7c0c894839877186d52"
-QONNX_COMMIT="9f9eff95227cc57aadc6eafcbd44b7acda89f067"
+QONNX_COMMIT="4a4826641db8d34619d31eac155fe95af11692eb"
 FINN_EXP_COMMIT="9cbd2787b5160e2b44e0e8164a0df1457dbd5366"
 BREVITAS_COMMIT="a5b71d6de1389d3e7db898fef72e014842670f03"
-PYVERILATOR_COMMIT="0c3eb9343500fc1352a02c020a736c8c2db47e8e"
+PYVERILATOR_COMMIT="64b8294ff1afebb47be76fcad6ae87027e0402c2"
 CNPY_COMMIT="4e8810b1a8637695171ed346ce68f6984e585ef4"
-HLSLIB_COMMIT="5db5c8d480ae82bbbd05dd216b85272b6c6af091"
+HLSLIB_COMMIT="bea971285a506cd4c2032f133a8ec23a15f935e1"
 OMX_COMMIT="a97f0bf145a2f7e57ca416ea76c9e45df4e9aa37"
 AVNET_BDF_COMMIT="2d49cfc25766f07792c0b314489f21fe916b639b"
 EXP_BOARD_FILES_MD5="ac1811ae93b03f5f09a505283ff989a3"
 
-FINN_BASE_URL="https://github.com/Xilinx/finn-base.git"
 QONNX_URL="https://github.com/fastmachinelearning/qonnx.git"
 FINN_EXP_URL="https://github.com/Xilinx/finn-experimental.git"
 BREVITAS_URL="https://github.com/Xilinx/brevitas.git"
@@ -48,7 +46,6 @@ HLSLIB_URL="https://github.com/Xilinx/finn-hlslib.git"
 OMX_URL="https://github.com/maltanar/oh-my-xilinx.git"
 AVNET_BDF_URL="https://github.com/Avnet/bdf.git"
 
-FINN_BASE_DIR="finn-base"
 QONNX_DIR="qonnx"
 FINN_EXP_DIR="finn-experimental"
 BREVITAS_DIR="brevitas"
@@ -106,7 +103,6 @@ fetch_board_files() {
     cd $OLD_PWD
 }
 
-fetch_repo $FINN_BASE_URL $FINN_BASE_COMMIT $FINN_BASE_DIR
 fetch_repo $QONNX_URL $QONNX_COMMIT $QONNX_DIR
 fetch_repo $FINN_EXP_URL $FINN_EXP_COMMIT $FINN_EXP_DIR
 fetch_repo $BREVITAS_URL $BREVITAS_COMMIT $BREVITAS_DIR
diff --git a/finn-rtllib/axi_info/hdl/axi_info.sv b/finn-rtllib/axi_info/hdl/axi_info.sv
index 293563293651162e55df4f1886d1e2a17e0b3996..c0f35730c798d54c36fd1edd2afc6693caa76595 100644
--- a/finn-rtllib/axi_info/hdl/axi_info.sv
+++ b/finn-rtllib/axi_info/hdl/axi_info.sv
@@ -1,5 +1,5 @@
 /******************************************************************************
- *  Copyright (c) 2022, Advanced Micro Devices, Inc.
+ *  Copyright (c) 2022, Xilinx, Inc.
  *  All rights reserved.
  *
  *  Redistribution and use in source and binary forms, with or without
diff --git a/finn-rtllib/axi_info/hdl/axi_info_top.sv b/finn-rtllib/axi_info/hdl/axi_info_top.sv
index 2032e1105ce2c352c7ff3d5c2292dee338fa0bb1..ab2cfc8bed3fcb2308c85a81cd687f56780150cb 100644
--- a/finn-rtllib/axi_info/hdl/axi_info_top.sv
+++ b/finn-rtllib/axi_info/hdl/axi_info_top.sv
@@ -1,5 +1,5 @@
 /******************************************************************************
- *  Copyright (c) 2022, Advanced Micro Devices, Inc.
+ *  Copyright (c) 2022, Xilinx, Inc.
  *  All rights reserved.
  *
  *  Redistribution and use in source and binary forms, with or without
@@ -78,7 +78,7 @@ module axi_info_top #(
 			32'h0,
 			CHECKSUM_COUNT
 		})
-	)(
+	) inst (
 		//- Global Control ------------------
 		.ap_clk, .ap_rst_n,
 
diff --git a/finn-rtllib/axi_info/xgui/axi_info_top_v1_0.tcl b/finn-rtllib/axi_info/xgui/axi_info_top_v1_0.tcl
index 22ae5a71538ca0f5983ec0adf6f75d1bdfbd4f72..76ab1a5c5be0ac6530cd9ebc23e75697f36c0797 100644
--- a/finn-rtllib/axi_info/xgui/axi_info_top_v1_0.tcl
+++ b/finn-rtllib/axi_info/xgui/axi_info_top_v1_0.tcl
@@ -67,4 +67,3 @@ proc update_MODELPARAM_VALUE.CHECKSUM_COUNT { MODELPARAM_VALUE.CHECKSUM_COUNT PA
 	# Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value
 	set_property value [get_property value ${PARAM_VALUE.CHECKSUM_COUNT}] ${MODELPARAM_VALUE.CHECKSUM_COUNT}
 }
-
diff --git a/notebooks/advanced/0_custom_analysis_pass.ipynb b/notebooks/advanced/0_custom_analysis_pass.ipynb
index 684b3fea792de55c7f7fb87a4764e94e10f32964..a6e06921516fd624ad9e8e1884677c7791f5734a 100644
--- a/notebooks/advanced/0_custom_analysis_pass.ipynb
+++ b/notebooks/advanced/0_custom_analysis_pass.ipynb
@@ -68,7 +68,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from finn.core.modelwrapper import ModelWrapper\n",
+    "from qonnx.core.modelwrapper import ModelWrapper\n",
     "model = ModelWrapper('../LFCW1A1.onnx')"
    ]
   },
diff --git a/notebooks/advanced/1_custom_transformation_pass.ipynb b/notebooks/advanced/1_custom_transformation_pass.ipynb
index f0c5f80d826a41c429ecbb465844e738faa62c9b..7f78bea9e57e7145a75cd8c9f822ac5f57bcdf5f 100644
--- a/notebooks/advanced/1_custom_transformation_pass.ipynb
+++ b/notebooks/advanced/1_custom_transformation_pass.ipynb
@@ -46,7 +46,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from finn.core.modelwrapper import ModelWrapper\n",
+    "from qonnx.core.modelwrapper import ModelWrapper\n",
     "showSrc(ModelWrapper.transform)"
    ]
   },
@@ -79,7 +79,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from finn.transformation.base import Transformation\n",
+    "from qonnx.transformation.base import Transformation\n",
     "\n",
     "showSrc(Transformation)"
    ]
@@ -110,7 +110,7 @@
    "source": [
     "import onnx\n",
     "onnx_model = onnx.load('../LFCW1A1.onnx')\n",
-    "from finn.core.modelwrapper import ModelWrapper\n",
+    "from qonnx.core.modelwrapper import ModelWrapper\n",
     "onnx_model = ModelWrapper(onnx_model)"
    ]
   },
@@ -129,7 +129,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from finn.transformation.base import Transformation\n",
+    "from qonnx.transformation.base import Transformation\n",
     "\n",
     "class ConvertSubToAdd(Transformation):\n",
     "    def apply(self, model):\n",
@@ -194,7 +194,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from finn.transformation.base import NodeLocalTransformation\n",
+    "from qonnx.transformation.base import NodeLocalTransformation\n",
     "\n",
     "showSrc(NodeLocalTransformation)"
    ]
diff --git a/notebooks/advanced/2_custom_op.ipynb b/notebooks/advanced/2_custom_op.ipynb
index 5f2bdc4bf4dac1b3471a75f2af7c786b69ed6cd0..e3b5d8cf0bd01bf2588331d346e706b3a36fed10 100644
--- a/notebooks/advanced/2_custom_op.ipynb
+++ b/notebooks/advanced/2_custom_op.ipynb
@@ -23,7 +23,7 @@
     "\n",
     "2. `CustomOp` subclasses need to implement the methods below (those not starting with underscore).\n",
     "\n",
-    "3. To be discoverable in the custom op register, `CustomOp` subclasses must set the `domain` field to the name of the Python module they appear in. For instance, to use the custom `Im2Col` op type from [here](https://github.com/Xilinx/finn-base/blob/dev/src/finn/custom_op/general/im2col.py), the ONNX node must use `domain=finn.custom_op.general` since its module is located at `finn/custom_op/general/im2col.py`."
+    "3. To be discoverable in the custom op register, `CustomOp` subclasses must set the `domain` field to the name of the Python module they appear in. For instance, to use the custom `Im2Col` op type from [here](https://github.com/Xilinx/finn-base/blob/dev/src/finn/custom_op/general/im2col.py), the ONNX node must use `domain=qonnx.custom_op.general` since its module is located at `finn/custom_op/general/im2col.py`."
    ]
   },
   {
@@ -139,7 +139,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import finn.custom_op.general as general\n",
+    "import qonnx.custom_op.general as general\n",
     "general.custom_op[\"MyPythonPowerOp\"] = MyPythonPowerOp"
    ]
   },
@@ -176,7 +176,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from finn.core.modelwrapper import ModelWrapper\n",
+    "from qonnx.core.modelwrapper import ModelWrapper\n",
     "from onnx import TensorProto\n",
     "\n",
     "def make_graph(ishape, exp, op_type = \"MyPythonPowerOp\"):\n",
@@ -195,7 +195,7 @@
     "        # name of output tensor\n",
     "        [\"outp\"],\n",
     "        # specify domain s.t. FINN can find our op under this submodule\n",
-    "        domain=\"finn.custom_op.general\",\n",
+    "        domain=\"qonnx.custom_op.general\",\n",
     "        # set up attributes\n",
     "        exponent = int(exp),\n",
     "        exec_mode = \"python\"\n",
@@ -240,8 +240,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from finn.core.datatype import DataType\n",
-    "from finn.util.basic import gen_finn_dt_tensor\n",
+    "from qonnx.core.datatype import DataType\n",
+    "from qonnx.util.basic import gen_finn_dt_tensor\n",
     "\n",
     "# generate a random input of e.g signed 4-bit values\n",
     "random_input = gen_finn_dt_tensor(DataType[\"INT4\"], input_shape)\n",
@@ -431,7 +431,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from finn.custom_op.registry import getCustomOp\n",
+    "from qonnx.custom_op.registry import getCustomOp\n",
     "\n",
     "# get FINN wrapper for this node, with all the functionality\n",
     "op_inst = getCustomOp(mixedop_graph.model.graph.node[0])\n",
@@ -456,9 +456,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#from finn.transformation.base import Transformation\n",
+    "#from qonnx.transformation.base import Transformation\n",
     "# can derive from NodeLocalTransformation for faster (parallel) execution\n",
-    "from finn.transformation.base import NodeLocalTransformation\n",
+    "from qonnx.transformation.base import NodeLocalTransformation\n",
     "import os\n",
     "\n",
     "class MyNodeLocalCodeGen(NodeLocalTransformation):\n",
diff --git a/notebooks/basics/0_how_to_work_with_onnx.ipynb b/notebooks/basics/0_how_to_work_with_onnx.ipynb
index aae98ec771c1b38d16e593241f16f6dccfe142d7..a4ea75fe38aac6720671a9b51de0ef31951cccb0 100644
--- a/notebooks/basics/0_how_to_work_with_onnx.ipynb
+++ b/notebooks/basics/0_how_to_work_with_onnx.ipynb
@@ -322,7 +322,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from finn.core.modelwrapper import ModelWrapper\n",
+    "from qonnx.core.modelwrapper import ModelWrapper\n",
     "finn_model = ModelWrapper(onnx_model)"
    ]
   },
diff --git a/notebooks/basics/1_brevitas_network_import.ipynb b/notebooks/basics/1_brevitas_network_import.ipynb
index b6d6c3bdfd2962987a63c62a5532e7969a33982f..ecd3c89c1afb12593ec68bef9016f9e2bf083dde 100644
--- a/notebooks/basics/1_brevitas_network_import.ipynb
+++ b/notebooks/basics/1_brevitas_network_import.ipynb
@@ -181,7 +181,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from finn.core.modelwrapper import ModelWrapper\n",
+    "from qonnx.core.modelwrapper import ModelWrapper\n",
     "model = ModelWrapper(export_onnx_path)\n",
     "model.graph.node[8]"
    ]
@@ -240,8 +240,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from finn.transformation.fold_constants import FoldConstants\n",
-    "from finn.transformation.infer_shapes import InferShapes\n",
+    "from qonnx.transformation.fold_constants import FoldConstants\n",
+    "from qonnx.transformation.infer_shapes import InferShapes\n",
     "model = model.transform(InferShapes())\n",
     "model = model.transform(FoldConstants())\n",
     "export_onnx_path_transformed = \"/tmp/LFCW1A1-clean.onnx\"\n",
diff --git a/notebooks/end2end_example/bnn-pynq/cnv_end2end_example.ipynb b/notebooks/end2end_example/bnn-pynq/cnv_end2end_example.ipynb
index e2762024a751ef573ebaf8dadf64d63e6abb6d83..b628fa455a27649791c2b6f72409b85f71f7c704 100644
--- a/notebooks/end2end_example/bnn-pynq/cnv_end2end_example.ipynb
+++ b/notebooks/end2end_example/bnn-pynq/cnv_end2end_example.ipynb
@@ -84,10 +84,10 @@
     "import onnx\n",
     "from finn.util.test import get_test_model_trained\n",
     "import brevitas.onnx as bo\n",
-    "from finn.core.modelwrapper import ModelWrapper\n",
-    "from finn.transformation.infer_shapes import InferShapes\n",
-    "from finn.transformation.fold_constants import FoldConstants\n",
-    "from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames, RemoveStaticGraphInputs\n",
+    "from qonnx.core.modelwrapper import ModelWrapper\n",
+    "from qonnx.transformation.infer_shapes import InferShapes\n",
+    "from qonnx.transformation.fold_constants import FoldConstants\n",
+    "from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames, RemoveStaticGraphInputs\n",
     "\n",
     "cnv = get_test_model_trained(\"CNV\", 1, 1)\n",
     "bo.export_finn_onnx(cnv, (1, 3, 32, 32), build_dir + \"/end2end_cnv_w1a1_export.onnx\")\n",
@@ -139,8 +139,8 @@
    "outputs": [],
    "source": [
     "from finn.util.pytorch import ToTensor\n",
-    "from finn.transformation.merge_onnx_models import MergeONNXModels\n",
-    "from finn.core.datatype import DataType\n",
+    "from qonnx.transformation.merge_onnx_models import MergeONNXModels\n",
+    "from qonnx.core.datatype import DataType\n",
     "\n",
     "model = ModelWrapper(build_dir+\"/end2end_cnv_w1a1_tidy.onnx\")\n",
     "global_inp_name = model.graph.input[0].name\n",
@@ -164,8 +164,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from finn.transformation.insert_topk import InsertTopK\n",
-    "from finn.transformation.infer_datatypes import InferDataTypes\n",
+    "from qonnx.transformation.insert_topk import InsertTopK\n",
+    "from qonnx.transformation.infer_datatypes import InferDataTypes\n",
     "\n",
     "# postprocessing: insert Top-1 node at the end\n",
     "model = model.transform(InsertTopK(k=1))\n",
@@ -212,12 +212,12 @@
    "outputs": [],
    "source": [
     "from finn.transformation.streamline import Streamline\n",
-    "from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul\n",
-    "from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount\n",
+    "from qonnx.transformation.lower_convs_to_matmul import LowerConvsToMatMul\n",
+    "from qonnx.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount\n",
     "import finn.transformation.streamline.absorb as absorb\n",
     "from finn.transformation.streamline.reorder import MakeMaxPoolNHWC, MoveScalarLinearPastInvariants\n",
-    "from finn.transformation.infer_data_layouts import InferDataLayouts\n",
-    "from finn.transformation.general import RemoveUnusedTensors\n",
+    "from qonnx.transformation.infer_data_layouts import InferDataLayouts\n",
+    "from qonnx.transformation.general import RemoveUnusedTensors\n",
     "\n",
     "model = ModelWrapper(build_dir + \"/end2end_cnv_w1a1_pre_post.onnx\")\n",
     "model = model.transform(MoveScalarLinearPastInvariants())\n",
@@ -277,8 +277,8 @@
     "    CreateDataflowPartition,\n",
     ")\n",
     "from finn.transformation.move_reshape import RemoveCNVtoFCFlatten\n",
-    "from finn.custom_op.registry import getCustomOp\n",
-    "from finn.transformation.infer_data_layouts import InferDataLayouts\n",
+    "from qonnx.custom_op.registry import getCustomOp\n",
+    "from qonnx.transformation.infer_data_layouts import InferDataLayouts\n",
     "\n",
     "# choose the memory mode for the MVTU units, decoupled or const\n",
     "mem_mode = \"decoupled\"\n",
diff --git a/notebooks/end2end_example/bnn-pynq/tfc_end2end_example.ipynb b/notebooks/end2end_example/bnn-pynq/tfc_end2end_example.ipynb
index 2e19cda3dce3366ee729aa0c4640e5221886f457..5501e030e28e3cbd52d226d7d9b8014974ca38a9 100644
--- a/notebooks/end2end_example/bnn-pynq/tfc_end2end_example.ipynb
+++ b/notebooks/end2end_example/bnn-pynq/tfc_end2end_example.ipynb
@@ -119,7 +119,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from finn.core.modelwrapper import ModelWrapper\n",
+    "from qonnx.core.modelwrapper import ModelWrapper\n",
     "model = ModelWrapper(build_dir+\"/tfc_w1_a1.onnx\")"
    ]
   },
@@ -207,10 +207,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames, RemoveStaticGraphInputs\n",
-    "from finn.transformation.infer_shapes import InferShapes\n",
-    "from finn.transformation.infer_datatypes import InferDataTypes\n",
-    "from finn.transformation.fold_constants import FoldConstants\n",
+    "from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames, RemoveStaticGraphInputs\n",
+    "from qonnx.transformation.infer_shapes import InferShapes\n",
+    "from qonnx.transformation.infer_datatypes import InferDataTypes\n",
+    "from qonnx.transformation.fold_constants import FoldConstants\n",
     "\n",
     "model = model.transform(InferShapes())\n",
     "model = model.transform(FoldConstants())\n",
@@ -258,8 +258,8 @@
    "outputs": [],
    "source": [
     "from finn.util.pytorch import ToTensor\n",
-    "from finn.transformation.merge_onnx_models import MergeONNXModels\n",
-    "from finn.core.datatype import DataType\n",
+    "from qonnx.transformation.merge_onnx_models import MergeONNXModels\n",
+    "from qonnx.core.datatype import DataType\n",
     "\n",
     "model = ModelWrapper(build_dir+\"/tfc_w1_a1_tidy.onnx\")\n",
     "global_inp_name = model.graph.input[0].name\n",
@@ -295,7 +295,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from finn.transformation.insert_topk import InsertTopK\n",
+    "from qonnx.transformation.insert_topk import InsertTopK\n",
     "\n",
     "# postprocessing: insert Top-1 node at the end\n",
     "model = model.transform(InsertTopK(k=1))\n",
@@ -383,10 +383,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount\n",
+    "from qonnx.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount\n",
     "from finn.transformation.streamline.round_thresholds import RoundAndClipThresholds\n",
-    "from finn.transformation.infer_data_layouts import InferDataLayouts\n",
-    "from finn.transformation.general import RemoveUnusedTensors\n",
+    "from qonnx.transformation.infer_data_layouts import InferDataLayouts\n",
+    "from qonnx.transformation.general import RemoveUnusedTensors\n",
     "\n",
     "model = model.transform(ConvertBipolarMatMulToXnorPopcount())\n",
     "model = model.transform(absorb.AbsorbAddIntoMultiThreshold())\n",
@@ -487,7 +487,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from finn.custom_op.registry import getCustomOp\n",
+    "from qonnx.custom_op.registry import getCustomOp\n",
     "sdp_node = parent_model.get_nodes_by_op_type(\"StreamingDataflowPartition\")[0]\n",
     "sdp_node = getCustomOp(sdp_node)\n",
     "dataflow_model_filename = sdp_node.get_nodeattr(\"model\")\n",
diff --git a/notebooks/end2end_example/bnn-pynq/tfc_end2end_verification.ipynb b/notebooks/end2end_example/bnn-pynq/tfc_end2end_verification.ipynb
index dbb98bc304b3fffc0dad524070e67859726ff406..1e07781b66a8eaa816921a5ff721756bf418a26c 100644
--- a/notebooks/end2end_example/bnn-pynq/tfc_end2end_verification.ipynb
+++ b/notebooks/end2end_example/bnn-pynq/tfc_end2end_verification.ipynb
@@ -83,7 +83,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from finn.custom_op.general.xnorpopcount import xnorpopcountmatmul\n",
+    "from qonnx.custom_op.general.xnorpopcount import xnorpopcountmatmul\n",
     "showSrc(xnorpopcountmatmul)"
    ]
   },
@@ -105,7 +105,7 @@
    "outputs": [],
    "source": [
     "import numpy as np\n",
-    "from finn.core.modelwrapper import ModelWrapper\n",
+    "from qonnx.core.modelwrapper import ModelWrapper\n",
     "input_dict = {\"global_in\": nph.to_array(input_tensor)}\n",
     "\n",
     "model_for_sim = ModelWrapper(build_dir+\"/tfc_w1a1_ready_for_hls_conversion.onnx\")"
@@ -171,7 +171,7 @@
    "source": [
     "from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim\n",
     "from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim\n",
-    "from finn.transformation.general import GiveUniqueNodeNames\n",
+    "from qonnx.transformation.general import GiveUniqueNodeNames\n",
     "\n",
     "model_for_cppsim = model_for_cppsim.transform(GiveUniqueNodeNames())\n",
     "model_for_cppsim = model_for_cppsim.transform(PrepareCppSim())\n",
@@ -212,7 +212,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from finn.custom_op.registry import getCustomOp\n",
+    "from qonnx.custom_op.registry import getCustomOp\n",
     "\n",
     "fc0 = model_for_cppsim.graph.node[1]\n",
     "fc0w = getCustomOp(fc0)\n",
diff --git a/notebooks/end2end_example/cybersecurity/1-train-mlp-with-brevitas.ipynb b/notebooks/end2end_example/cybersecurity/1-train-mlp-with-brevitas.ipynb
index 85a4e9556b71ad913080a302c665edf23146faa0..68b345ed348f7a3f6fff507e1a4e45f6942a6a60 100644
--- a/notebooks/end2end_example/cybersecurity/1-train-mlp-with-brevitas.ipynb
+++ b/notebooks/end2end_example/cybersecurity/1-train-mlp-with-brevitas.ipynb
@@ -714,7 +714,7 @@
     "* The input preprocessing (x + 1) / 2 is exported as part of the network (initial `Add` and `Div` layers)\n",
     "* Brevitas `QuantLinear` layers are exported to ONNX as `MatMul`. We've exported the padded version; shape of the first MatMul node's weight parameter is 600x64\n",
     "* The weight parameters (second inputs) for MatMul nodes are annotated with `quantization: finn_datatype: INT2`\n",
-    "* The quantized activations are exported as `MultiThreshold` nodes with `domain=finn.custom_op.general`\n",
+    "* The quantized activations are exported as `MultiThreshold` nodes with `domain=qonnx.custom_op.general`\n",
     "* There's a final `MultiThreshold` node with threshold=0 to produce the final bipolar output (this is the `qnt_output` from `CybSecMLPForExport`"
    ]
   },
diff --git a/notebooks/end2end_example/cybersecurity/2-import-into-finn-and-verify.ipynb b/notebooks/end2end_example/cybersecurity/2-import-into-finn-and-verify.ipynb
index 07c8dbb1b9a4fd356aaf6a5bc5679e21a3152c1f..3e116b1adbcfddcd3cf61d8ad11130988fc4e2d4 100644
--- a/notebooks/end2end_example/cybersecurity/2-import-into-finn-and-verify.ipynb
+++ b/notebooks/end2end_example/cybersecurity/2-import-into-finn-and-verify.ipynb
@@ -53,7 +53,7 @@
    "source": [
     "# 1. Import model into FINN with ModelWrapper <a id=\"brevitas_import_visualization\"></a>\n",
     "\n",
-    "Now that we have the model in .onnx format, we can work with it using FINN. To import it into FINN, we'll use the [`ModelWrapper`](https://finn.readthedocs.io/en/latest/source_code/finn.core.html#finn.core.modelwrapper.ModelWrapper). It is a wrapper around the ONNX model which provides several helper functions to make it easier to work with the model."
+    "Now that we have the model in .onnx format, we can work with it using FINN. To import it into FINN, we'll use the [`ModelWrapper`](https://finn.readthedocs.io/en/latest/source_code/finn.core.html#qonnx.core.modelwrapper.ModelWrapper). It is a wrapper around the ONNX model which provides several helper functions to make it easier to work with the model."
    ]
   },
   {
@@ -62,7 +62,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from finn.core.modelwrapper import ModelWrapper\n",
+    "from qonnx.core.modelwrapper import ModelWrapper\n",
     "\n",
     "ready_model_filename = \"cybsec-mlp-ready.onnx\"\n",
     "model_for_sim = ModelWrapper(ready_model_filename)"
@@ -97,7 +97,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from finn.core.datatype import DataType\n",
+    "from qonnx.core.datatype import DataType\n",
     "\n",
     "finnonnx_in_tensor_name = model_for_sim.graph.input[0].name\n",
     "finnonnx_out_tensor_name = model_for_sim.graph.output[0].name\n",
@@ -139,10 +139,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames, RemoveStaticGraphInputs\n",
-    "from finn.transformation.infer_shapes import InferShapes\n",
-    "from finn.transformation.infer_datatypes import InferDataTypes\n",
-    "from finn.transformation.fold_constants import FoldConstants\n",
+    "from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames, RemoveStaticGraphInputs\n",
+    "from qonnx.transformation.infer_shapes import InferShapes\n",
+    "from qonnx.transformation.infer_datatypes import InferDataTypes\n",
+    "from qonnx.transformation.fold_constants import FoldConstants\n",
     "\n",
     "model_for_sim = model_for_sim.transform(InferShapes())\n",
     "model_for_sim = model_for_sim.transform(FoldConstants())\n",
diff --git a/requirements.txt b/requirements.txt
index 0e51d7ae6d53703e2b485be85956127ca3430a7c..3bab23fb7d6c6cc80155b9f4b42c5db48ab0723e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,14 +5,14 @@ docrep==0.2.7
 future==0.18.2
 gspread==3.6.0
 numpy==1.18.0
-onnx==1.7.0
+onnx==1.11.0
 onnxoptimizer
-onnxruntime==1.4.0
+onnxruntime==1.11.1
 pre-commit==2.9.2
+protobuf==3.20.1
 pyscaffold==3.2.1
 scipy==1.5.2
 setupext-janitor>=1.1.2
 toposort==1.5
 vcdvcd==1.0.5
 wget==3.2
-protobuf==3.20.1
\ No newline at end of file
diff --git a/run-docker.sh b/run-docker.sh
index 6b7d597cb68c58c28ac568cb39f8e760c94bb885..ff4161ce06d8d922fd153ad37cddcdcad50effcc 100755
--- a/run-docker.sh
+++ b/run-docker.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2020-2022, Advanced Micro Devices
+# Copyright (c) 2020-2022, Xilinx, Inc.
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/setup.cfg b/setup.cfg
index bcf5364b782447d21eea553ddcc2a6fc9b2636c0..94d2cb2b8dd5b9a43931c165ef998b4af2f50192 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -124,6 +124,7 @@ markers =
     util: mark tests that test util functions
     transform: mark tests that test transformations (before hls layers)
     fpgadataflow: mark tests related to hls layers
+    end2end: mark tests that run the end2end flow
 norecursedirs =
     dist
     build
diff --git a/src/finn/analysis/__init__.py b/src/finn/analysis/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/finn/analysis/fpgadataflow/__init__.py b/src/finn/analysis/fpgadataflow/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/finn/analysis/fpgadataflow/dataflow_performance.py b/src/finn/analysis/fpgadataflow/dataflow_performance.py
index dafe8a9f89468d9ccba926562a4729a793c2fbf0..57267026667788d757a80261a07e5524b0036c36 100644
--- a/src/finn/analysis/fpgadataflow/dataflow_performance.py
+++ b/src/finn/analysis/fpgadataflow/dataflow_performance.py
@@ -26,7 +26,8 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from finn.custom_op.registry import getCustomOp
+from qonnx.custom_op.registry import getCustomOp
+
 from finn.util.fpgadataflow import is_fpgadataflow_node
 
 
diff --git a/src/finn/analysis/fpgadataflow/exp_cycles_per_layer.py b/src/finn/analysis/fpgadataflow/exp_cycles_per_layer.py
index bb1cad56da5a2d3a8690566dd6f3f220af9c12a6..e1517ec636a0ab3129d6049bbc83a796c871f124 100644
--- a/src/finn/analysis/fpgadataflow/exp_cycles_per_layer.py
+++ b/src/finn/analysis/fpgadataflow/exp_cycles_per_layer.py
@@ -26,7 +26,8 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import finn.custom_op.registry as registry
+import qonnx.custom_op.registry as registry
+
 from finn.util.fpgadataflow import is_fpgadataflow_node
 
 
diff --git a/src/finn/analysis/fpgadataflow/floorplan_params.py b/src/finn/analysis/fpgadataflow/floorplan_params.py
index 9ba99fb546587ba3c2f385c958ecb172f8903bf7..d57b660bce8abe89bfe73f82a518a43918ddb2b1 100644
--- a/src/finn/analysis/fpgadataflow/floorplan_params.py
+++ b/src/finn/analysis/fpgadataflow/floorplan_params.py
@@ -26,7 +26,8 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from finn.custom_op.registry import getCustomOp
+from qonnx.custom_op.registry import getCustomOp
+
 from finn.util.fpgadataflow import is_fpgadataflow_node
 
 
diff --git a/src/finn/analysis/fpgadataflow/hls_synth_res_estimation.py b/src/finn/analysis/fpgadataflow/hls_synth_res_estimation.py
index aff99efd807d8b04dc6490b299d66c0be8d8fc44..4d921438f692353572a7613e21df64b60706e62d 100644
--- a/src/finn/analysis/fpgadataflow/hls_synth_res_estimation.py
+++ b/src/finn/analysis/fpgadataflow/hls_synth_res_estimation.py
@@ -26,10 +26,10 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 import os
+import qonnx.custom_op.registry as registry
 import warnings
 import xml.etree.ElementTree as ET
 
-import finn.custom_op.registry as registry
 from finn.util.fpgadataflow import is_fpgadataflow_node
 
 
diff --git a/src/finn/analysis/fpgadataflow/op_and_param_counts.py b/src/finn/analysis/fpgadataflow/op_and_param_counts.py
index 27c6dfd997a14ab8b213baaa469d402bed1cf3a8..0bc9655c0d701158082ae41fdbe0c0849ed0e0bf 100644
--- a/src/finn/analysis/fpgadataflow/op_and_param_counts.py
+++ b/src/finn/analysis/fpgadataflow/op_and_param_counts.py
@@ -26,8 +26,8 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import finn.custom_op.registry as registry
-from finn.util.basic import is_finn_op
+import qonnx.custom_op.registry as registry
+from qonnx.util.basic import is_finn_op
 
 
 def aggregate_dict_keys(res_dict):
diff --git a/src/finn/analysis/fpgadataflow/post_synth_res.py b/src/finn/analysis/fpgadataflow/post_synth_res.py
index 4b817910949fa750f34a53592413bb38c7557c08..8b9c5d2a04fd4740979f9f67d15935f3e0d6a788 100644
--- a/src/finn/analysis/fpgadataflow/post_synth_res.py
+++ b/src/finn/analysis/fpgadataflow/post_synth_res.py
@@ -28,9 +28,9 @@
 
 import os
 import xml.etree.ElementTree as ET
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.registry import getCustomOp
 
-from finn.core.modelwrapper import ModelWrapper
-from finn.custom_op.registry import getCustomOp
 from finn.transformation.move_reshape import _is_fpgadataflow_node
 
 
diff --git a/src/finn/analysis/fpgadataflow/res_estimation.py b/src/finn/analysis/fpgadataflow/res_estimation.py
index c543361f5dae373c5c581088fa3fdb5be1b5a39d..406496bc0e873de0df484f457c0fe8d97b94e434 100644
--- a/src/finn/analysis/fpgadataflow/res_estimation.py
+++ b/src/finn/analysis/fpgadataflow/res_estimation.py
@@ -26,7 +26,8 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import finn.custom_op.registry as registry
+import qonnx.custom_op.registry as registry
+
 from finn.util.fpgadataflow import is_fpgadataflow_node
 
 
diff --git a/src/finn/analysis/verify_custom_nodes.py b/src/finn/analysis/verify_custom_nodes.py
index 62dac2827f11d290c5a50137e12684eb93326297..83a985e71f2d61a5e7505edeba9329269531792b 100644
--- a/src/finn/analysis/verify_custom_nodes.py
+++ b/src/finn/analysis/verify_custom_nodes.py
@@ -26,8 +26,8 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import finn.custom_op.registry as registry
-from finn.util.basic import is_finn_op
+import qonnx.custom_op.registry as registry
+from qonnx.util.basic import is_finn_op
 
 
 def verify_nodes(model):
diff --git a/src/finn/builder/__init__.py b/src/finn/builder/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/finn/builder/build_dataflow.py b/src/finn/builder/build_dataflow.py
index c4664a5471984e1f88a70f1d9bb6ce674e38c782..238083f653d410772a81115ff12dd987835d1f32 100644
--- a/src/finn/builder/build_dataflow.py
+++ b/src/finn/builder/build_dataflow.py
@@ -34,13 +34,13 @@ import pdb  # NOQA
 import sys
 import time
 import traceback
+from qonnx.core.modelwrapper import ModelWrapper
 
 from finn.builder.build_dataflow_config import (
     DataflowBuildConfig,
     default_build_dataflow_steps,
 )
 from finn.builder.build_dataflow_steps import build_dataflow_step_lookup
-from finn.core.modelwrapper import ModelWrapper
 
 
 # adapted from https://stackoverflow.com/a/39215961
diff --git a/src/finn/builder/build_dataflow_config.py b/src/finn/builder/build_dataflow_config.py
index cc8e6187624e8931ca31c0c78bdab166d5a3bdf5..381dfe91a22a95ac056ea61f7de1d1b9d176ab17 100644
--- a/src/finn/builder/build_dataflow_config.py
+++ b/src/finn/builder/build_dataflow_config.py
@@ -166,7 +166,7 @@ class DataflowBuildConfig:
     #: If the parallelization attributes (PE, SIMD) are part of the config,
     #: this will override the automatically generated parallelization
     #: attributes inferred from target_fps (if any)
-    #: Will be applied with :py:mod:`finn.transformation.general.ApplyConfig`
+    #: Will be applied with :py:mod:`qonnx.transformation.general.ApplyConfig`
     folding_config_file: Optional[str] = None
 
     #: (Optional) Target inference performance in frames per second.
@@ -267,7 +267,7 @@ class DataflowBuildConfig:
 
     #: Path to JSON config file assigning each layer to an SLR.
     #: Only relevant when `shell_flow_type = ShellFlowType.VITIS_ALVEO`
-    #: Will be applied with :py:mod:`finn.transformation.general.ApplyConfig`
+    #: Will be applied with :py:mod:`qonnx.transformation.general.ApplyConfig`
     vitis_floorplan_file: Optional[str] = None
 
     #: Vitis optimization strategy
diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py
index f2f768b3c3caf7747627d0c0043d3955b417ea67..59f77650da5c3c3f9db0ea65e2288544b376bec3 100644
--- a/src/finn/builder/build_dataflow_steps.py
+++ b/src/finn/builder/build_dataflow_steps.py
@@ -31,7 +31,23 @@ import numpy as np
 import os
 from copy import deepcopy
 from distutils.dir_util import copy_tree
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
+from qonnx.transformation.fold_constants import FoldConstants
+from qonnx.transformation.general import (
+    ApplyConfig,
+    GiveReadableTensorNames,
+    GiveUniqueNodeNames,
+    RemoveStaticGraphInputs,
+    RemoveUnusedTensors,
+)
+from qonnx.transformation.infer_data_layouts import InferDataLayouts
+from qonnx.transformation.infer_datatypes import InferDataTypes
+from qonnx.transformation.infer_shapes import InferShapes
+from qonnx.transformation.lower_convs_to_matmul import LowerConvsToMatMul
 from qonnx.util.cleanup import cleanup_model
+from qonnx.util.config import extract_model_config_to_json
 from shutil import copy
 
 import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
@@ -53,13 +69,9 @@ from finn.builder.build_dataflow_config import (
     ShellFlowType,
     VerificationStepType,
 )
-from finn.core.modelwrapper import ModelWrapper
 from finn.core.onnx_exec import execute_onnx
 from finn.core.rtlsim_exec import rtlsim_exec
 from finn.core.throughput_test import throughput_test_rtlsim
-from finn.custom_op.registry import getCustomOp
-from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
-from finn.transformation.fold_constants import FoldConstants
 from finn.transformation.fpgadataflow.annotate_cycles import AnnotateCycles
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.create_dataflow_partition import (
@@ -84,17 +96,6 @@ from finn.transformation.fpgadataflow.set_fifo_depths import (
 from finn.transformation.fpgadataflow.set_folding import SetFolding
 from finn.transformation.fpgadataflow.synth_ooc import SynthOutOfContext
 from finn.transformation.fpgadataflow.vitis_build import VitisBuild
-from finn.transformation.general import (
-    ApplyConfig,
-    GiveReadableTensorNames,
-    GiveUniqueNodeNames,
-    RemoveStaticGraphInputs,
-    RemoveUnusedTensors,
-)
-from finn.transformation.infer_data_layouts import InferDataLayouts
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
 from finn.transformation.move_reshape import RemoveCNVtoFCFlatten
 from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
 from finn.transformation.qonnx.quant_act_to_multithreshold import (
@@ -102,9 +103,10 @@ from finn.transformation.qonnx.quant_act_to_multithreshold import (
 )
 from finn.transformation.streamline import Streamline
 from finn.transformation.streamline.reorder import MakeMaxPoolNHWC
-from finn.util.basic import get_rtlsim_trace_depth
-from finn.util.config import extract_model_config_to_json
-from finn.util.pyverilator import pyverilate_get_liveness_threshold_cycles
+from finn.util.basic import (
+    get_rtlsim_trace_depth,
+    pyverilate_get_liveness_threshold_cycles,
+)
 from finn.util.test import execute_parent
 
 
diff --git a/src/finn/core/__init__.py b/src/finn/core/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/finn/core/onnx_exec.py b/src/finn/core/onnx_exec.py
new file mode 100644
index 0000000000000000000000000000000000000000..2695113661ed286c94ae9cb5f20ca99cc1fced7f
--- /dev/null
+++ b/src/finn/core/onnx_exec.py
@@ -0,0 +1,152 @@
+# Copyright (c) 2022, Xilinx, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import copy
+import numpy as np
+import qonnx.analysis.topology as ta
+from qonnx.core.onnx_exec import execute_onnx as execute_onnx_base
+
+from finn.core.remote_exec import remote_exec
+from finn.core.rtlsim_exec import rtlsim_exec
+
+
+def execute_onnx(
+    model, input_dict, return_full_exec_context=False, start_node=None, end_node=None
+):
+    """Executes given ONNX ModelWrapper with given named inputs.
+    If return_full_exec_context is False, a dict of named outputs is returned
+    as indicated by the model.graph.output.
+    If return return_full_exec_context is True, the full set of tensors used by
+    the execution (including inputs, weights, activations and final outputs)
+    will be returned as a dict.
+    When start_node and end_node are set to None, the whole graph is executed.
+    If they are set to particular ONNX nodes, only the subgraph between (and
+    including) those nodes is executed.
+    """
+
+    # check if model has an execution mode set
+    # if None, execute model node using the QONNX-provided execute_onnx impl
+    # if set to "remote_pynq" execute model on PYNQ board
+    # if set to "rtlsim" execute model using pyverilator
+    model_exec_mode = model.get_metadata_prop("exec_mode")
+    if (model_exec_mode is None) or (model_exec_mode == ""):
+        return execute_onnx_base(
+            model, input_dict, return_full_exec_context, start_node, end_node
+        )
+
+    if not model.check_all_tensor_shapes_specified():
+        raise Exception("Found unspecified tensor shapes, try infer_shapes")
+    ret = model.analysis(ta.nodes_topologically_sorted)
+    assert (
+        ret["nodes_topologically_sorted"] is True
+    ), """Nodes must be
+    topologically sorted."""
+
+    graph = model.graph
+    # first, we need to make sure that every variable required by the graph has
+    # some buffer associated with it. this includes graph inputs (which includes
+    # the input data as well as the trained parameters) and the graph ValueInfo
+    # (intermediate tensors between layers)
+    # this is provided by the execution_context, which is a dict of np.ndarray
+    execution_context = model.make_empty_exec_context()
+    # fill in any inputs provided to this function
+    for inp_name in input_dict.keys():
+        if inp_name in execution_context:
+            if execution_context[inp_name].shape == input_dict[inp_name].shape:
+                execution_context[inp_name] = input_dict[inp_name]
+            else:
+                raise Exception(
+                    "Shape mismatch for provided input %s: found %s expected %s "
+                    % (
+                        inp_name,
+                        str(execution_context[inp_name].shape),
+                        str(input_dict[inp_name].shape),
+                    )
+                )
+
+    # check if model has an execution mode set
+    # if None, execute model node by node using execute_node()
+    # if set to "remote_pynq" execute model on PYNQ board
+    # if set to "rtlsim" execute model using pyverilator
+    model_exec_mode = model.get_metadata_prop("exec_mode")
+    if (model_exec_mode is None) or (model_exec_mode == ""):
+        return execute_onnx_base()
+    elif model_exec_mode == "remote_pynq":
+        # use remote exec metadata built into model to execute on a remote PYNQ
+        remote_exec(model, execution_context)
+    elif model_exec_mode == "rtlsim":
+        # use stitched IP for rtlsim
+        rtlsim_exec(model, execution_context)
+    else:
+        raise Exception(
+            """Metadata property "exec_mode" is set to an unknown value.
+        Can be left unset or has to be set to "remote_pynq" for remote execution
+        on PYNQ board or "rtlsim" for execution using pyverilator!"""
+        )
+
+    if return_full_exec_context:
+        return execution_context
+    else:
+        # provide outputs as dict
+        output_dict = dict()
+        for out_tensor in graph.output:
+            out_name = out_tensor.name
+            output_dict[out_name] = execution_context[out_name]
+        return output_dict
+
+
+def execute_onnx_and_make_model(model, input_dict):
+    """Executes given ONNX ModelWrapper with given named inputs and return a new
+    ModelWrapper where an initializer is provided for each tensor as taken from
+    the execution. This new model is useful for debugging, since it contains
+    all the intermediate activation values."""
+
+    # retrieve the full execution context
+    execution_context = execute_onnx(model, input_dict, True)
+    new_model = copy.deepcopy(model)
+    # create value_info entries and initializers for everything
+    for i in execution_context.keys():
+        new_model.set_initializer(i, execution_context[i])
+    for vi in new_model.graph.value_info:
+        new_model.graph.output.append(vi)
+    return new_model
+
+
+def compare_execution(
+    model_a,
+    model_b,
+    input_dict,
+    compare_fxn=lambda x, y: np.isclose(x, y, atol=1e-3).all(),
+):
+    """Executes two ONNX models and compare their outputs using given function.
+
+    compare_fxn should take in two tensors and return a Boolean"""
+    # compare values from first output tensors produced
+    res_a = list(execute_onnx(model_a, input_dict).items())[0][1]
+    res_b = list(execute_onnx(model_b, input_dict).items())[0][1]
+    return compare_fxn(res_a, res_b)
diff --git a/src/finn/core/remote_exec.py b/src/finn/core/remote_exec.py
new file mode 100644
index 0000000000000000000000000000000000000000..f487b48f86f1ef0440ed4a8bf371083369dd096c
--- /dev/null
+++ b/src/finn/core/remote_exec.py
@@ -0,0 +1,119 @@
+# Copyright (c) 2020 Xilinx, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of Xilinx nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import numpy as np
+import os
+import subprocess
+import warnings
+
+
+def remote_exec(model, execution_context):
+    """Executes the given model remotely on the pynq board. The metadata properties
+    related to the pynq board have to be set. The execution context contains the
+    input values."""
+    # TODO fix for multi input-output
+    pynq_ip = model.get_metadata_prop("pynq_ip")
+    pynq_port = int(model.get_metadata_prop("pynq_port"))
+    pynq_username = model.get_metadata_prop("pynq_username")
+    pynq_password = model.get_metadata_prop("pynq_password")
+    pynq_target_dir = model.get_metadata_prop("pynq_target_dir")
+    deployment_dir = model.get_metadata_prop("pynq_deploy_dir")
+    platform = model.get_metadata_prop("platform")
+    assert platform in ["alveo", "zynq-iodma"]
+    bitfile = model.get_metadata_prop("bitfile")
+    bitfile = os.path.basename(bitfile)
+    if pynq_password == "":
+        if "zynq" in platform:
+            raise Exception("PYNQ board remote exec needs password for sudo")
+        else:
+            local_prefix = ""  # assume we are using an ssh key
+            warnings.warn("Empty password, make sure you've set up an ssh key")
+    else:
+        local_prefix = "sshpass -p %s " % pynq_password
+
+    if platform == "alveo":
+        # Alveo can run without sudo
+        remote_prefix = ""
+    elif "zynq" in platform:
+        # PYNQ Zynq boards need to execute with sudo
+        remote_prefix = "echo %s | sudo -S " % pynq_password
+
+    inp = execution_context[model.graph.input[0].name]
+    # make copy of array before saving it
+    inp = inp.copy()
+    batchsize = inp.shape[0]
+    np.save(os.path.join(deployment_dir, "input.npy"), inp)
+    # extracting last folder of absolute path (deployment_dir)
+    deployment_folder = os.path.basename(os.path.normpath(deployment_dir))
+    # copy input to PYNQ board
+    cmd = local_prefix + "scp -P{} -r {}/input.npy {}@{}:{}/{}".format(
+        pynq_port,
+        deployment_dir,
+        pynq_username,
+        pynq_ip,
+        pynq_target_dir,
+        deployment_folder,
+    )
+    bash_command = ["/bin/bash", "-c", cmd]
+    process_scp_in = subprocess.Popen(bash_command, stdout=subprocess.PIPE)
+    process_scp_in.communicate()
+
+    # use platform attribute for correct remote execution
+    if platform == "alveo":
+        remote_cmd = "bash -ic 'bash alveo_run.sh execute %d' \"" % batchsize
+    else:
+        remote_cmd = (
+            "python3.6 driver.py --exec_mode=execute --batchsize={} "
+            "--bitfile={} --inputfile=input.npy --outputfile=output.npy "
+            '--platform={} "'
+        ).format(batchsize, bitfile, platform)
+    cmd = (
+        local_prefix + 'ssh {}@{} -p {} "cd {}/{}; ' + remote_prefix + remote_cmd
+    ).format(pynq_username, pynq_ip, pynq_port, pynq_target_dir, deployment_folder)
+    bash_command = ["/bin/bash", "-c", cmd]
+    process_exec_accel = subprocess.Popen(bash_command, stdout=subprocess.PIPE)
+    process_exec_accel.communicate()
+    # remove stale output file from local dir, if any
+    try:
+        os.remove("{}/output.npy".format(deployment_dir))
+    except FileNotFoundError:
+        pass
+    # copy generated output to local
+    cmd = local_prefix + "scp -P{} {}@{}:{}/{}/output.npy {}".format(
+        pynq_port,
+        pynq_username,
+        pynq_ip,
+        pynq_target_dir,
+        deployment_folder,
+        deployment_dir,
+    )
+    bash_command = ["/bin/bash", "-c", cmd]
+    process_scp_out = subprocess.Popen(bash_command, stdout=subprocess.PIPE)
+    process_scp_out.communicate()
+    outp = np.load("{}/output.npy".format(deployment_dir))
+    execution_context[model.graph.output[0].name] = outp
diff --git a/src/finn/core/rtlsim_exec.py b/src/finn/core/rtlsim_exec.py
new file mode 100644
index 0000000000000000000000000000000000000000..d45c972928b15e08f72dfccecec96fe057176ae1
--- /dev/null
+++ b/src/finn/core/rtlsim_exec.py
@@ -0,0 +1,161 @@
+# Copyright (c) 2020 Xilinx, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of Xilinx nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+from pyverilator.util.axi_utils import reset_rtlsim, rtlsim_multi_io
+from qonnx.custom_op.registry import getCustomOp
+
+from finn.util.basic import pyverilate_get_liveness_threshold_cycles
+from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
+from finn.util.pyverilator import pyverilate_stitched_ip
+
+try:
+    from pyverilator import PyVerilator
+except ModuleNotFoundError:
+    PyVerilator = None
+
+
+def rtlsim_exec(model, execution_context, pre_hook=None, post_hook=None):
+    """Use PyVerilator to execute given model with stitched IP. The execution
+    context contains the input values. Hook functions can be optionally
+    specified to observe/alter the state of the circuit, receiving the
+    PyVerilator sim object as their first argument:
+    - pre_hook : hook function to be called before sim start (after reset)
+    - post_hook : hook function to be called after sim end
+    """
+    if PyVerilator is None:
+        raise ImportError("Installation of PyVerilator is required.")
+    # ensure stitched ip project already exists
+    assert os.path.isfile(
+        model.get_metadata_prop("wrapper_filename")
+    ), """The
+    file name from metadata property "wrapper_filename" doesn't exist."""
+    assert os.path.isdir(
+        model.get_metadata_prop("vivado_stitch_proj")
+    ), """The
+    directory from metadata property "vivado_stitch_proj" doesn't exist"""
+    trace_file = model.get_metadata_prop("rtlsim_trace")
+    if trace_file is None:
+        trace_file = ""
+    extra_verilator_args = model.get_metadata_prop("extra_verilator_args")
+    if extra_verilator_args is None:
+        extra_verilator_args = []
+    else:
+        extra_verilator_args = eval(extra_verilator_args)
+
+    # extract i/o info to prepare io_dict
+    io_dict = {"inputs": {}, "outputs": {}}
+    if_dict = eval(model.get_metadata_prop("vivado_stitch_ifnames"))
+    # go over and prepare inputs
+    for i, i_vi in enumerate(model.graph.input):
+        i_name = i_vi.name
+        i_tensor = execution_context[i_name]
+        i_dt = model.get_tensor_datatype(i_name)
+        first_node_onnx = model.find_consumer(i_name)
+        first_node = getCustomOp(first_node_onnx)
+        node_inp_ind = list(first_node_onnx.input).index(i_name)
+        if node_inp_ind == 0:
+            # default node input (input 0)
+            i_stream_w = first_node.get_instream_width()
+            i_folded_shape = first_node.get_folded_input_shape()
+        else:
+            # not input 0; node must support specifying inp index
+            # for these functions
+            i_stream_w = first_node.get_instream_width(node_inp_ind)
+            i_folded_shape = first_node.get_folded_input_shape(node_inp_ind)
+        batchsize = i_tensor.shape[0]
+        # override batch size for input
+        i_folded_shape = list(i_folded_shape)
+        i_folded_shape[0] = batchsize
+        i_folded_shape = tuple(i_folded_shape)
+        # TODO any other layout transformations need to happen here!
+        i_tensor = i_tensor.reshape(i_folded_shape)
+        # pack input for rtlsim
+        packed_input = npy_to_rtlsim_input(i_tensor, i_dt, i_stream_w)
+        # add to io_dict
+        if_name = if_dict["s_axis"][i][0]
+        io_dict["inputs"][if_name] = packed_input
+    # go over outputs to determine how many values will be produced
+    num_out_values = 0
+    o_tensor_info = []
+    for o, o_vi in enumerate(model.graph.output):
+        # output in io_dict just needs an empty list
+        if_name = if_dict["m_axis"][o][0]
+        io_dict["outputs"][if_name] = []
+        # extract output shape
+        o_name = o_vi.name
+        o_shape = model.get_tensor_shape(o_name)
+        o_dt = model.get_tensor_datatype(o_name)
+        last_node = getCustomOp(model.find_producer(o_name))
+        o_folded_shape = last_node.get_folded_output_shape()
+        # override batch size from actual input
+        o_shape = list(o_shape)
+        o_shape[0] = batchsize
+        o_shape = tuple(o_shape)
+        o_folded_shape = list(o_folded_shape)
+        o_folded_shape[0] = batchsize
+        o_folded_shape = tuple(o_folded_shape)
+        o_stream_w = last_node.get_outstream_width()
+        o_tensor_info.append((o_stream_w, o_dt, o_folded_shape, o_shape))
+        num_out_values += batchsize * last_node.get_number_output_values()
+
+    # prepare pyverilator model
+    rtlsim_so = model.get_metadata_prop("rtlsim_so")
+    if (rtlsim_so is None) or (not os.path.isfile(rtlsim_so)):
+        sim = pyverilate_stitched_ip(model, extra_verilator_args=extra_verilator_args)
+        model.set_metadata_prop("rtlsim_so", sim.lib._name)
+    else:
+        sim = PyVerilator(rtlsim_so, auto_eval=False)
+
+    # reset and call rtlsim, including any pre/post hooks
+    reset_rtlsim(sim)
+    if pre_hook is not None:
+        pre_hook(sim)
+    n_cycles = rtlsim_multi_io(
+        sim,
+        io_dict,
+        num_out_values,
+        trace_file=trace_file,
+        sname="_",
+        liveness_threshold=pyverilate_get_liveness_threshold_cycles(),
+    )
+    if post_hook is not None:
+        post_hook(sim)
+
+    # unpack outputs and put back into execution context
+    for o, o_vi in enumerate(model.graph.output):
+        o_name = o_vi.name
+        if_name = if_dict["m_axis"][o][0]
+        o_stream_w, o_dt, o_folded_shape, o_shape = o_tensor_info[o]
+        packed_output = io_dict["outputs"][if_name]
+        o_folded_tensor = rtlsim_output_to_npy(
+            packed_output, None, o_dt, o_folded_shape, o_stream_w, o_dt.bitwidth()
+        )
+        execution_context[o_name] = o_folded_tensor.reshape(o_shape)
+
+    model.set_metadata_prop("cycles_rtlsim", str(n_cycles))
diff --git a/src/finn/core/throughput_test.py b/src/finn/core/throughput_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..07eda6aa1d82df0a9f9a01d4f17f7880a8cf8b26
--- /dev/null
+++ b/src/finn/core/throughput_test.py
@@ -0,0 +1,165 @@
+# Copyright (c) 2020 Xilinx, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of Xilinx nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import numpy as np
+import os
+import subprocess
+import warnings
+from qonnx.util.basic import gen_finn_dt_tensor
+
+from finn.core.rtlsim_exec import rtlsim_exec
+
+
+def throughput_test_remote(model, batchsize=1000, timeout=None):
+    """Runs the throughput test for the given model remotely on the pynq board.
+    The metadata properties related to the pynq board have to be set.
+    Additionally a timeout for the SSH communication can be set.
+    Returns a dictionary with results of the throughput test. Returns None
+    if the test fails."""
+
+    pynq_ip = model.get_metadata_prop("pynq_ip")
+    pynq_port = int(model.get_metadata_prop("pynq_port"))
+    pynq_username = model.get_metadata_prop("pynq_username")
+    pynq_password = model.get_metadata_prop("pynq_password")
+    pynq_target_dir = model.get_metadata_prop("pynq_target_dir")
+    deployment_dir = model.get_metadata_prop("pynq_deploy_dir")
+    # extracting last folder of absolute path (deployment_dir)
+    deployment_folder = os.path.basename(os.path.normpath(deployment_dir))
+    platform = model.get_metadata_prop("platform")
+    assert platform in ["alveo", "zynq-iodma"]
+    bitfile = model.get_metadata_prop("bitfile")
+    bitfile = os.path.basename(bitfile)
+    if pynq_password == "":
+        if "zynq" in platform:
+            raise Exception("PYNQ board remote exec needs password for sudo")
+        else:
+            local_prefix = ""  # assume we are using an ssh key
+            warnings.warn("Empty password, make sure you've set up an ssh key")
+    else:
+        local_prefix = "sshpass -p %s " % pynq_password
+
+    if platform == "alveo":
+        # Alveo can run without sudo but needs correct environment
+        remote_prefix = "conda activate finn-pynq-alveo; "
+    elif "zynq" in platform:
+        # PYNQ Zynq boards need to execute with sudo
+        remote_prefix = "echo %s | sudo -S " % pynq_password
+
+    # use platform attribute for correct remote execution
+    if platform == "alveo":
+        remote_cmd = "bash -ic 'bash alveo_run.sh throughput_test %d' \"" % batchsize
+    else:
+        remote_cmd = (
+            "python3.6 driver.py --exec_mode=throughput_test --batchsize={} "
+            "--bitfile={} --inputfile=input.npy --outputfile=output.npy "
+            '--platform={} "'
+        ).format(batchsize, bitfile, platform)
+    cmd = (
+        local_prefix + 'ssh {}@{} -p {} "cd {}/{}; ' + remote_prefix + remote_cmd
+    ).format(pynq_username, pynq_ip, pynq_port, pynq_target_dir, deployment_folder)
+    bash_command = ["/bin/bash", "-c", cmd]
+    process_throughput_test = subprocess.Popen(bash_command, stdout=subprocess.PIPE)
+    process_throughput_test.communicate(timeout=timeout)
+
+    # remove any pre-existing metrics file
+    try:
+        os.remove("{}/nw_metrics.txt".format(deployment_dir))
+    except FileNotFoundError:
+        pass
+
+    cmd = local_prefix + "scp -P{} {}@{}:{}/{}/nw_metrics.txt {}".format(
+        pynq_port,
+        pynq_username,
+        pynq_ip,
+        pynq_target_dir,
+        deployment_folder,
+        deployment_dir,
+    )
+    bash_command = ["/bin/bash", "-c", cmd]
+    process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE)
+    process_compile.communicate(timeout=timeout)
+
+    try:
+        with open("{}/nw_metrics.txt".format(deployment_dir), "r") as file:
+            res = eval(file.read())
+        return res
+    except FileNotFoundError:
+        return None
+
+
+def throughput_test_rtlsim(model, batchsize=100):
+    """Runs a throughput test for the given IP-stitched model. When combined
+    with tracing, useful to determine bottlenecks and required FIFO sizes."""
+
+    assert (
+        model.get_metadata_prop("exec_mode") == "rtlsim"
+    ), """Top-level exec_mode
+    metadata_prop must be set to rtlsim"""
+
+    # make empty exec context and insert random inputs
+    ctx = model.make_empty_exec_context()
+    i_bytes = 0
+    for i_vi in model.graph.input:
+        # create random input
+        iname = i_vi.name
+        ishape = model.get_tensor_shape(iname)
+        ishape_batch = ishape
+        ishape_batch[0] = batchsize
+        idt = model.get_tensor_datatype(iname)
+        dummy_input = gen_finn_dt_tensor(idt, ishape_batch)
+        ctx[iname] = dummy_input
+        i_bytes += (np.prod(ishape_batch) * idt.bitwidth()) / 8
+
+    # compute total output size as well
+    o_bytes = 0
+    for o_vi in model.graph.output:
+        oname = o_vi.name
+        oshape = model.get_tensor_shape(oname)
+        oshape_batch = oshape
+        oshape_batch[0] = batchsize
+        odt = model.get_tensor_datatype(oname)
+        o_bytes += (np.prod(oshape_batch) * odt.bitwidth()) / 8
+
+    # remove liveness threshold, launch rtlsim
+    os.environ["LIVENESS_THRESHOLD"] = "-1"
+    rtlsim_exec(model, ctx)
+    # extract metrics
+    cycles = int(model.get_metadata_prop("cycles_rtlsim"))
+    clk_ns = float(model.get_metadata_prop("clk_ns"))
+    fclk_mhz = 1 / (clk_ns * 0.001)
+    runtime_s = (cycles * clk_ns) * (10**-9)
+    res = dict()
+    res["cycles"] = cycles
+    res["runtime[ms]"] = runtime_s * 1000
+    res["throughput[images/s]"] = batchsize / runtime_s
+    res["DRAM_in_bandwidth[Mb/s]"] = i_bytes * 0.000001 / runtime_s
+    res["DRAM_out_bandwidth[Mb/s]"] = o_bytes * 0.000001 / runtime_s
+    res["fclk[mhz]"] = fclk_mhz
+    res["N"] = batchsize
+
+    return res
diff --git a/src/finn/custom_op/fpgadataflow/__init__.py b/src/finn/custom_op/fpgadataflow/__init__.py
index 2437f48588275f33d1bf258c973ed28aeba800fa..49577fbf1b5774e33b63674242aed69c1d12a53e 100644
--- a/src/finn/custom_op/fpgadataflow/__init__.py
+++ b/src/finn/custom_op/fpgadataflow/__init__.py
@@ -28,7 +28,7 @@
 
 from finn.custom_op.fpgadataflow.addstreams_batch import AddStreams_Batch
 from finn.custom_op.fpgadataflow.channelwise_op_batch import ChannelwiseOp_Batch
-from finn.custom_op.fpgadataflow.checksum import checksum
+from finn.custom_op.fpgadataflow.checksum import CheckSum
 from finn.custom_op.fpgadataflow.concat import StreamingConcat
 from finn.custom_op.fpgadataflow.convolutioninputgenerator import (
     ConvolutionInputGenerator,
@@ -88,4 +88,4 @@ custom_op["StreamingDataflowPartition"] = StreamingDataflowPartition
 custom_op["UpsampleNearestNeighbour_Batch"] = UpsampleNearestNeighbour_Batch
 custom_op["Lookup"] = Lookup
 custom_op["StreamingConcat"] = StreamingConcat
-custom_op["checksum"] = checksum
+custom_op["CheckSum"] = CheckSum
diff --git a/src/finn/custom_op/fpgadataflow/addstreams_batch.py b/src/finn/custom_op/fpgadataflow/addstreams_batch.py
index d1da1e0e524986332429079f79d36ae62f7cfd1e..13a4c5892c8f82c37e1794057a06217981a6a580 100644
--- a/src/finn/custom_op/fpgadataflow/addstreams_batch.py
+++ b/src/finn/custom_op/fpgadataflow/addstreams_batch.py
@@ -29,8 +29,8 @@
 import numpy as np
 import os
 import warnings
+from qonnx.core.datatype import DataType
 
-from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
 
diff --git a/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py b/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py
index 462b8b6e6ec845b75e3594460807ccc7f37bbe9e..3ed76db2982e411b711be5bd78e39dd866332714 100644
--- a/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py
+++ b/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py
@@ -30,8 +30,8 @@ import numpy as np
 import os
 import warnings
 from math import ceil
+from qonnx.core.datatype import DataType
 
-from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.util.data_packing import (
     npy_to_rtlsim_input,
@@ -51,7 +51,7 @@ from . import templates
 def get_smallest_possible(vals):
     """Returns smallest (fewest bits) possible DataType that can represent
     value. Prefers unsigned integers where possible."""
-    vals = np.array(vals)
+    vals = np.array(vals, dtype=np.float64)
     for v in vals:
         assert int(v) == v, "Error float value"
 
diff --git a/src/finn/custom_op/fpgadataflow/checksum.py b/src/finn/custom_op/fpgadataflow/checksum.py
index 59d26fdce83d7a3009606da0fd00c84f03110622..bde285eb0dd1b3818926c1feb7ac8d5de69a4be6 100644
--- a/src/finn/custom_op/fpgadataflow/checksum.py
+++ b/src/finn/custom_op/fpgadataflow/checksum.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022, Advanced Micro Devices, Inc.
+# Copyright (c) 2022, Xilinx, Inc.
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -29,13 +29,13 @@
 import numpy as np
 import os
 import warnings
+from qonnx.core.datatype import DataType
 
-from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
 
 
-class checksum(HLSCustomOp):
+class CheckSum(HLSCustomOp):
     """Class that corresponds to custom_hls checksum function."""
 
     def __init__(self, onnx_node):
@@ -254,10 +254,12 @@ class checksum(HLSCustomOp):
             'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width())
         )
         self.code_gen_dict["$STREAMDECLARATIONS$"].append("ap_uint<32> chk;")
+        # set drain = false for cppsim
+        self.code_gen_dict["$STREAMDECLARATIONS$"].append("ap_uint<1> drain = false;")
 
     def docompute(self):
         self.code_gen_dict["$DOCOMPUTE$"] = [
-            """checksum<WORDS_PER_FRAME, ITEMS_PER_WORD>(in0, out, chk);"""
+            """checksum<WORDS_PER_FRAME, ITEMS_PER_WORD>(in0, out, chk, drain);"""
         ]
 
     def dataoutstrm(self):
@@ -298,7 +300,7 @@ class checksum(HLSCustomOp):
     def blackboxfunction(self):
         self.code_gen_dict["$BLACKBOXFUNCTION$"] = [
             """using T = ap_uint<WORD_SIZE>;\n void {}(hls::stream<T> &in0,
-            hls::stream<T> &out, ap_uint<32> &chk)""".format(
+            hls::stream<T> &out, ap_uint<32> &chk, ap_uint<1> &drain)""".format(
                 self.onnx_node.name
             )
         ]
@@ -313,10 +315,16 @@ class checksum(HLSCustomOp):
         self.code_gen_dict["$PRAGMAS$"].append(
             "#pragma HLS interface s_axilite port=chk bundle=checksum"
         )
+        self.code_gen_dict["$PRAGMAS$"].append(
+            "#pragma HLS interface s_axilite port=drain bundle=checksum"
+        )
         self.code_gen_dict["$PRAGMAS$"].append(
             "#pragma HLS interface ap_ctrl_none port=return"
         )
         self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS dataflow")
+        self.code_gen_dict["$PRAGMAS$"].append(
+            "#pragma HLS dataflow disable_start_propagation"
+        )
 
     def get_verilog_top_module_intf_names(self):
         intf_names = super().get_verilog_top_module_intf_names()
diff --git a/src/finn/custom_op/fpgadataflow/concat.py b/src/finn/custom_op/fpgadataflow/concat.py
index ee8a2c323238c4e4f91b76c91d1445c69e3cdaa0..5fcf9cf96cbacd4e444af0b90618a19eefb9bfe2 100644
--- a/src/finn/custom_op/fpgadataflow/concat.py
+++ b/src/finn/custom_op/fpgadataflow/concat.py
@@ -28,10 +28,10 @@
 
 import numpy as np
 import os
+from qonnx.core.datatype import DataType
+from qonnx.util.basic import roundup_to_integer_multiple
 
-from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
-from finn.util.basic import roundup_to_integer_multiple
 from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
 
 
diff --git a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py
index 150c3b7198d139c29a342460bab499c73bb84196..251a9882c58a3cf94449701795b72c8a6adab318 100644
--- a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py
+++ b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py
@@ -29,10 +29,10 @@
 import math
 import numpy as np
 import os
+from qonnx.core.datatype import DataType
+from qonnx.custom_op.general.im2col import compute_conv_output_dim
 
-from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
-from finn.custom_op.general.im2col import compute_conv_output_dim
 from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
 
 # ONNX i/o tensor shape assumptions for ConvolutionInputGenerator:
diff --git a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py
index b25246f1eaf73e14836bb6d00a5704f8bd3ce892..aba74baecc0f40571fa288459a04ad42e167ccf6 100644
--- a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py
+++ b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py
@@ -30,10 +30,10 @@ import math
 import numpy as np
 import os
 import warnings
+from qonnx.core.datatype import DataType
+from qonnx.custom_op.general.im2col import compute_conv_output_dim
 
-from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
-from finn.custom_op.general.im2col import compute_conv_output_dim
 from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
 
 # This operation should only be used for 1D convolutions. Either the
diff --git a/src/finn/custom_op/fpgadataflow/downsampler.py b/src/finn/custom_op/fpgadataflow/downsampler.py
index aa3bad9e41f78c3d6ae4bcd23d99bb7c4c72800c..da29a524b6bba7ce0c7a71bc64a44ae128d91709 100644
--- a/src/finn/custom_op/fpgadataflow/downsampler.py
+++ b/src/finn/custom_op/fpgadataflow/downsampler.py
@@ -29,8 +29,8 @@
 import numpy as np
 import os
 import warnings
+from qonnx.core.datatype import DataType
 
-from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
 
diff --git a/src/finn/custom_op/fpgadataflow/duplicatestreams_batch.py b/src/finn/custom_op/fpgadataflow/duplicatestreams_batch.py
index fb15b260e6bdfd57f42e0e4659a1536bb716b526..04ca45e7f1c1844a9976d46392be46f6cffc2167 100644
--- a/src/finn/custom_op/fpgadataflow/duplicatestreams_batch.py
+++ b/src/finn/custom_op/fpgadataflow/duplicatestreams_batch.py
@@ -29,8 +29,8 @@
 import numpy as np
 import os
 import warnings
+from qonnx.core.datatype import DataType
 
-from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
 
diff --git a/src/finn/custom_op/fpgadataflow/fmpadding_batch.py b/src/finn/custom_op/fpgadataflow/fmpadding_batch.py
index 177ca2acbd60b49658a61741ec042e651b560b27..d69ea471ea8ae1d58f97d056936b505cc2a2806b 100644
--- a/src/finn/custom_op/fpgadataflow/fmpadding_batch.py
+++ b/src/finn/custom_op/fpgadataflow/fmpadding_batch.py
@@ -29,8 +29,8 @@
 import numpy as np
 import os
 import warnings
+from qonnx.core.datatype import DataType
 
-from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
 
diff --git a/src/finn/custom_op/fpgadataflow/globalaccpool_batch.py b/src/finn/custom_op/fpgadataflow/globalaccpool_batch.py
index 43a7dc211c0fe0689629cb9bb4d4b0664ac9eef9..adafa7dcf36111e63fa49e0d184594fff54be99d 100644
--- a/src/finn/custom_op/fpgadataflow/globalaccpool_batch.py
+++ b/src/finn/custom_op/fpgadataflow/globalaccpool_batch.py
@@ -29,8 +29,8 @@
 import numpy as np
 import os
 import warnings
+from qonnx.core.datatype import DataType
 
-from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
 
diff --git a/src/finn/custom_op/fpgadataflow/hlscustomop.py b/src/finn/custom_op/fpgadataflow/hlscustomop.py
index 402f2cce19efe05620c0fcaee761a88c919f822a..9978ab0c7138aa6846a1427cd346c5257e4f8728 100644
--- a/src/finn/custom_op/fpgadataflow/hlscustomop.py
+++ b/src/finn/custom_op/fpgadataflow/hlscustomop.py
@@ -25,26 +25,23 @@
 # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-# namespace package, extend path
 
 import numpy as np
 import os
 import subprocess
 from abc import abstractmethod
+from pyverilator.util.axi_utils import rtlsim_multi_io
+from qonnx.core.datatype import DataType
+from qonnx.custom_op.base import CustomOp
+from qonnx.util.basic import roundup_to_integer_multiple
 
-from finn.core.datatype import DataType
-from finn.custom_op.base import CustomOp
 from finn.util.basic import (
     CppBuilder,
     get_rtlsim_trace_depth,
     make_build_dir,
-    roundup_to_integer_multiple,
-)
-from finn.util.hls import CallHLS
-from finn.util.pyverilator import (
     pyverilate_get_liveness_threshold_cycles,
-    rtlsim_multi_io,
 )
+from finn.util.hls import CallHLS
 
 from . import templates
 
@@ -114,21 +111,13 @@ class HLSCustomOp(CustomOp):
             "inFIFODepth": ("i", False, 2),
             "outFIFODepth": ("i", False, 2),
             "output_hook": ("s", False, ""),
-            # HLS version to be used for IP synthesis
-            "hls_version": ("s", False, "vitis_hls", {"vivado_hls", "vitis_hls"}),
         }
 
     def get_verilog_top_module_name(self):
         "Return the Verilog top module name for this node."
 
         node = self.onnx_node
-        hls_version = self.get_nodeattr("hls_version")
-        if hls_version == "vivado_hls":
-            prefixed_top_name = "%s_%s" % (node.name, node.name)
-        elif hls_version == "vitis_hls":
-            prefixed_top_name = node.name
-        else:
-            raise Exception("Unknown hls_version: %s" % hls_version)
+        prefixed_top_name = node.name
 
         return prefixed_top_name
 
@@ -320,25 +309,16 @@ class HLSCustomOp(CustomOp):
         self.code_gen_dict.clear()
 
     def ipgen_default_directives(self):
-        """Return list of default HLS synthesis directives, which differ
-        slightly between vivado_hls and vitis_hls"""
-
-        hls_version = self.get_nodeattr("hls_version")
-        default_directives = {
-            "vivado_hls": [
-                "config_compile -ignore_long_run_time -disable_unroll_code_size_check",
-                "config_interface -m_axi_addr64",
-                "config_rtl -auto_prefix",
-            ],
-            "vitis_hls": [
-                "set_param hls.enable_hidden_option_error false",
-                "config_compile -disable_unroll_code_size_check -pipeline_style flp",
-                "config_interface -m_axi_addr64",
-                "config_rtl -module_auto_prefix",
-                "config_rtl -deadlock_detection none",
-            ],
-        }
-        return default_directives[hls_version]
+        """Return list of default HLS synthesis directives"""
+
+        default_directives = [
+            "set_param hls.enable_hidden_option_error false",
+            "config_compile -disable_unroll_code_size_check -pipeline_style flp",
+            "config_interface -m_axi_addr64",
+            "config_rtl -module_auto_prefix",
+            "config_rtl -deadlock_detection none",
+        ]
+        return default_directives
 
     def ipgen_extra_directives(self):
         "Return a list of extra tcl directives for HLS synthesis."
@@ -348,8 +328,7 @@ class HLSCustomOp(CustomOp):
         """Builds the bash script for IP generation using the CallHLS utility."""
         node = self.onnx_node
         code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen")
-        hls_version = self.get_nodeattr("hls_version")
-        builder = CallHLS(backend=hls_version)
+        builder = CallHLS()
         builder.append_tcl(code_gen_dir + "/hls_syn_{}.tcl".format(node.name))
         builder.set_ipgen_path(code_gen_dir + "/project_{}".format(node.name))
         builder.build(code_gen_dir)
@@ -503,15 +482,10 @@ compilation transformations?
         sim.io.ap_clk = 0
 
     def hls_sname(self):
-        """Get the naming convention used by chosen HLS version for stream signals,
-        decided by the hls_version node attribute.
-        Example: the TDATA for a stream called "out" would be out_V_V_TDATA
-        in vivado_hls and out_V_TDATA in vitis_hls.
+        """Get the naming convention used by Vitis HLS for stream signals
+        Example: the TDATA for a stream called "out" would be out_V_TDATA.
         """
-        hls_version = self.get_nodeattr("hls_version")
-        sname_dict = {"vivado_hls": "V_V", "vitis_hls": "V"}
-        sname = sname_dict[hls_version]
-        return sname
+        return "V"
 
     def rtlsim(self, sim, inp, inp2=None):
         """Runs the pyverilator simulation by passing the input values to the simulation,
@@ -596,7 +570,7 @@ compilation transformations?
     def rtlsim_multi_io(self, sim, io_dict):
         "Run rtlsim for this node, supports multiple i/o streams."
 
-        # signal naming differs slightly between vivado_hls/vitis_hls
+        # signal name
         sname = "_" + self.hls_sname() + "_"
 
         trace_file = self.get_nodeattr("rtlsim_trace")
@@ -604,7 +578,12 @@ compilation transformations?
             trace_file = self.onnx_node.name + ".vcd"
         num_out_values = self.get_number_output_values()
         total_cycle_count = rtlsim_multi_io(
-            sim, io_dict, num_out_values, trace_file, sname=sname
+            sim,
+            io_dict,
+            num_out_values,
+            trace_file=trace_file,
+            sname=sname,
+            liveness_threshold=pyverilate_get_liveness_threshold_cycles(),
         )
         self.set_nodeattr("cycles_rtlsim", total_cycle_count)
 
diff --git a/src/finn/custom_op/fpgadataflow/iodma.py b/src/finn/custom_op/fpgadataflow/iodma.py
index a331caee0193e101dd108299c159dfd97c893cfa..33ee1d359c7b82494e1b5ce1b83aa5d0199f8153 100644
--- a/src/finn/custom_op/fpgadataflow/iodma.py
+++ b/src/finn/custom_op/fpgadataflow/iodma.py
@@ -29,8 +29,8 @@
 import math
 import numpy as np
 import warnings
+from qonnx.core.datatype import DataType
 
-from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 
 # the IODMA inerfaces a memory-mapped AXI interface and an AXI stream
diff --git a/src/finn/custom_op/fpgadataflow/labelselect_batch.py b/src/finn/custom_op/fpgadataflow/labelselect_batch.py
index bb83311dab44a4942d6bc1b581c21abb1e993493..3e27ee01113392174c1206fc10e1c9abe82fdfe7 100644
--- a/src/finn/custom_op/fpgadataflow/labelselect_batch.py
+++ b/src/finn/custom_op/fpgadataflow/labelselect_batch.py
@@ -29,10 +29,10 @@
 import numpy as np
 import os
 from onnx import TensorProto, helper
+from qonnx.core.datatype import DataType
+from qonnx.util.basic import roundup_to_integer_multiple
 
-from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
-from finn.util.basic import roundup_to_integer_multiple
 from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
 
 
diff --git a/src/finn/custom_op/fpgadataflow/lookup.py b/src/finn/custom_op/fpgadataflow/lookup.py
index dcf67e4c4338b8a903fefd7a83a96331d0a5c8e9..d90fa0f05ab2a92391f610ae1c4516a95a881ce4 100644
--- a/src/finn/custom_op/fpgadataflow/lookup.py
+++ b/src/finn/custom_op/fpgadataflow/lookup.py
@@ -30,8 +30,8 @@ import numpy as np
 import os
 import warnings
 from math import ceil, log2
+from qonnx.core.datatype import DataType
 
-from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.util.data_packing import (
     npy_to_rtlsim_input,
@@ -471,6 +471,8 @@ class Lookup(HLSCustomOp):
 
     def get_verilog_top_module_intf_names(self):
         intf_names = super().get_verilog_top_module_intf_names()
-        intf_names["axilite"] = ["s_axi_control"]
-        intf_names["aximm"] = [("m_axi_gmem", self.get_nodeattr("ext_mem_width"))]
+        mem_mode = self.get_nodeattr("mem_mode")
+        if mem_mode == "external":
+            intf_names["axilite"] = ["s_axi_control"]
+            intf_names["aximm"] = [("m_axi_gmem", self.get_nodeattr("ext_mem_width"))]
         return intf_names
diff --git a/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py b/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py
index 4198486b94fe79638081e183ea48375a767b2477..9d2717dc8c65ddb5329816880067b81b10db2c02 100644
--- a/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py
+++ b/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py
@@ -31,14 +31,14 @@ import numpy as np
 import os
 import textwrap
 import warnings
-
-from finn.core.datatype import DataType
-from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
-from finn.util.basic import (
+from qonnx.core.datatype import DataType
+from qonnx.util.basic import (
     calculate_matvec_accumulator_range,
     interleave_matrix_outer_dim_from_partitions,
     roundup_to_integer_multiple,
 )
+
+from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.util.data_packing import (
     npy_to_rtlsim_input,
     numpy_to_hls_code,
@@ -836,7 +836,7 @@ class MatrixVectorActivation(HLSCustomOp):
                     # UltraRAM must have no memory initializer, or only zeroes
                     # otherwise BRAM will be inferred instead of URAM
                     # as a workaround we provide a zero-weight init here
-                    synth_weights = np.zeros_like(weights)
+                    synth_weights = np.zeros_like(weights, dtype=np.float32)
                 else:
                     synth_weights = weights
                 self.make_weight_file(
diff --git a/src/finn/custom_op/fpgadataflow/pool_batch.py b/src/finn/custom_op/fpgadataflow/pool_batch.py
index 09d707ae238a90b596f18400b58a6508f0413692..3bf187fa9a78ed2c812f042a29079ee1e3163d74 100644
--- a/src/finn/custom_op/fpgadataflow/pool_batch.py
+++ b/src/finn/custom_op/fpgadataflow/pool_batch.py
@@ -28,8 +28,8 @@
 
 import numpy as np
 import os
+from qonnx.core.datatype import DataType
 
-from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
 
diff --git a/src/finn/custom_op/fpgadataflow/streamingdataflowpartition.py b/src/finn/custom_op/fpgadataflow/streamingdataflowpartition.py
index cf065cf156abed591e579b3f257e8f442eb3a976..2ae6d92b88a4154abc3a50c2b39071b7d25a89e8 100644
--- a/src/finn/custom_op/fpgadataflow/streamingdataflowpartition.py
+++ b/src/finn/custom_op/fpgadataflow/streamingdataflowpartition.py
@@ -26,7 +26,10 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from finn.custom_op.base import CustomOp
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.base import CustomOp
+
+from finn.core.onnx_exec import execute_onnx
 
 # TODO move StreamingDataflowPartition to HLSCustomOp base class
 
@@ -48,6 +51,7 @@ class StreamingDataflowPartition(CustomOp):
             "device_id": ("i", False, 0),
             "mem_port": ("s", False, ""),
             "instance_name": ("s", False, ""),
+            "return_full_exec_context": ("i", False, 0),
         }
 
     def make_shape_compatible_op(self, model):
@@ -57,8 +61,26 @@ class StreamingDataflowPartition(CustomOp):
         pass
 
     def execute_node(self, context, graph):
-        # TODO add RPC execution with synthesized bitfile?
-        # whole-design rtlsim with PyVerilator may also be an alternative
+        model = ModelWrapper(self.get_nodeattr("model"))
+        return_full_exec_context = self.get_nodeattr("return_full_exec_context") == 1
+        node = self.onnx_node
+        inp_ctx = dict(filter(lambda x: x[0] in node.input, context.items()))
+        # inputs may have been renamed in partition
+        for i, old_iname in enumerate(node.input):
+            new_iname = model.graph.input[i].name
+            if old_iname != new_iname:
+                inp_ctx[new_iname] = inp_ctx[old_iname]
+                del inp_ctx[old_iname]
+        ret = execute_onnx(model, inp_ctx, return_full_exec_context)
+        # outputs may have been renamed in partition
+        for i, node_oname in enumerate(node.output):
+            model_oname = model.graph.output[i].name
+            context[node_oname] = ret[model_oname]
+        # prefix and insert exec context entries
+        if return_full_exec_context:
+            for tname in ret.keys():
+                if tname not in [x.name for x in model.graph.output]:
+                    context[node.name + "_" + tname] = ret[tname]
         pass
 
     def verify_node(self):
diff --git a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py
index 5fabef57be3675c38fcfd74c0db99f50d98340f4..1e6b72e4d54ede639e797f32f51fb7705ec8ce4b 100644
--- a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py
+++ b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py
@@ -30,8 +30,8 @@ import math
 import numpy as np
 import os
 import warnings
+from qonnx.core.datatype import DataType
 
-from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
 
diff --git a/src/finn/custom_op/fpgadataflow/streamingfifo.py b/src/finn/custom_op/fpgadataflow/streamingfifo.py
index ad5300eec1b0b74d9ae4bcc898983a5c429a660d..a7c3cd0be59db4ba8665f8fba5be72282339b8c8 100644
--- a/src/finn/custom_op/fpgadataflow/streamingfifo.py
+++ b/src/finn/custom_op/fpgadataflow/streamingfifo.py
@@ -30,9 +30,9 @@ import numpy as np
 import os
 import subprocess
 import warnings
+from qonnx.core.datatype import DataType
 from shutil import copy
 
-from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.util.basic import get_finn_root
 from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
diff --git a/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py b/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py
index b9c2350c0c20035358780e90ddb6f2923d171af5..d9ffea4d9cd8895fdf55a497e8c7d0e49808ac95 100755
--- a/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py
+++ b/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py
@@ -29,10 +29,10 @@
 import numpy as np
 import os
 import warnings
+from qonnx.core.datatype import DataType
+from qonnx.custom_op.general.maxpoolnhwc import compute_pool_output_dim
 
-from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
-from finn.custom_op.general.maxpoolnhwc import compute_pool_output_dim
 from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
 
 # TODO: consider splitting this into separate implementations for 1D and 2D
diff --git a/src/finn/custom_op/fpgadataflow/thresholding_batch.py b/src/finn/custom_op/fpgadataflow/thresholding_batch.py
index 3acfc7d8b004733131ee997f69aa4ac2aac88577..5383cc1f4bdf9eb88c7d7bd69c25231282f11c6f 100644
--- a/src/finn/custom_op/fpgadataflow/thresholding_batch.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_batch.py
@@ -31,13 +31,13 @@ import os
 import textwrap
 import warnings
 from math import ceil, log2
-
-from finn.core.datatype import DataType
-from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
-from finn.util.basic import (
+from qonnx.core.datatype import DataType
+from qonnx.util.basic import (
     interleave_matrix_outer_dim_from_partitions,
     roundup_to_integer_multiple,
 )
+
+from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.util.data_packing import (
     npy_to_rtlsim_input,
     numpy_to_hls_code,
@@ -480,7 +480,7 @@ class Thresholding_Batch(HLSCustomOp):
                 # UltraRAM must have no memory initializer, or only zeroes
                 # otherwise BRAM will be inferred instead of URAM
                 # as a workaround we provide a zero-weight init here
-                synth_thresholds = np.zeros_like(thresholds)
+                synth_thresholds = np.zeros_like(thresholds, dtype=np.float32)
             else:
                 synth_thresholds = thresholds
             self.make_weight_file(
diff --git a/src/finn/custom_op/fpgadataflow/upsampler.py b/src/finn/custom_op/fpgadataflow/upsampler.py
index 221725d49440653c5e56287f0d910848ec0b24c5..b62e4f2f6784e8964232efcc9971f0b8bc35ac5d 100644
--- a/src/finn/custom_op/fpgadataflow/upsampler.py
+++ b/src/finn/custom_op/fpgadataflow/upsampler.py
@@ -29,8 +29,8 @@
 import numpy as np
 import os
 import warnings
+from qonnx.core.datatype import DataType
 
-from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
 
diff --git a/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py b/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py
index f1f3f5b5027678982e5b79b05b1dc47e90a69e3d..27b23dd32835c265759a8cabfd2a3412844077ca 100644
--- a/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py
+++ b/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py
@@ -30,14 +30,14 @@ import math
 import numpy as np
 import os
 import warnings
-
-from finn.core.datatype import DataType
-from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
-from finn.util.basic import (
+from qonnx.core.datatype import DataType
+from qonnx.util.basic import (
     calculate_matvec_accumulator_range,
     interleave_matrix_outer_dim_from_partitions,
     roundup_to_integer_multiple,
 )
+
+from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.util.data_packing import (
     npy_to_rtlsim_input,
     numpy_to_hls_code,
diff --git a/src/finn/qnn-data/mdd-data/finn_design.mdd b/src/finn/qnn-data/mdd-data/finn_design.mdd
index 517180fa94079ad3e04d3a45776f165fd82cc483..0be2da6e8f9d573e64f39fa1f2da222db00f5824 100644
--- a/src/finn/qnn-data/mdd-data/finn_design.mdd
+++ b/src/finn/qnn-data/mdd-data/finn_design.mdd
@@ -1,4 +1,4 @@
-# Copyright (c) 2022  Advanced Micro Devices, Inc.
+# Copyright (c) 2022  Xilinx, Inc.
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -11,7 +11,7 @@
 #   this list of conditions and the following disclaimer in the documentation
 #   and/or other materials provided with the distribution.
 #
-# * Neither the name of  Advanced Micro Devices nor the names of its
+# * Neither the name of Xilinx nor the names of its
 #   contributors may be used to endorse or promote products derived from
 #   this software without specific prior written permission.
 #
diff --git a/src/finn/qnn-data/mdd-data/finn_design.tcl b/src/finn/qnn-data/mdd-data/finn_design.tcl
index b8c55e12b22a2152157cbecd2b0b4bf061e9918a..d4915d468d6eac897e25a4d284954086835b2389 100644
--- a/src/finn/qnn-data/mdd-data/finn_design.tcl
+++ b/src/finn/qnn-data/mdd-data/finn_design.tcl
@@ -1,58 +1,60 @@
-# Copyright (c) 2022  Advanced Micro Devices, Inc.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# * Redistributions of source code must retain the above copyright notice, this
-#   list of conditions and the following disclaimer.
-#
-# * Redistributions in binary form must reproduce the above copyright notice,
-#   this list of conditions and the following disclaimer in the documentation
-#   and/or other materials provided with the distribution.
-#
-# * Neither the name of  Advanced Micro Devices nor the names of its
-#   contributors may be used to endorse or promote products derived from
-#   this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-# @brief        Address range defines for FINN IP.
-# @author       Thomas B. PreuÃŸer <thomas.preusser@amd.com>
-##
-
-proc generate {drv_handle} {
-        # Bounds of all exposed slave address ranges to xparameters.h
-        set file_handle [hsi::utils::open_include_file "xparameters.h"]
-        generate_memrange_parameters $drv_handle $file_handle
-        close $file_handle
-}
-
-proc generate_memrange_parameters {drv_handle file_handle} {
-        # Collect unique slave interfaces to custom module
-        array unset ranges
-        foreach mem_range [hsi::get_mem_ranges -of_object [hsi::get_cells -hier [hsi::get_sw_processor]] $drv_handle] {
-                set ranges([common::get_property SLAVE_INTERFACE $mem_range]) [list \
-                        [common::get_property BASE_NAME  $mem_range] \
-                        [common::get_property BASE_VALUE $mem_range] \
-                        [common::get_property HIGH_NAME  $mem_range] \
-                        [common::get_property HIGH_VALUE $mem_range] \
-                ]
-        }
-
-        # Produce defines for the address range bounds
-        set prefix "XPAR_[string toupper $drv_handle]"
-        foreach {key val} [array get ranges] {
-                puts $file_handle "#define [format "%s_%s_%s" $prefix $key [lindex $val 0]] [lindex $val 1]"
-                puts $file_handle "#define [format "%s_%s_%s" $prefix $key [lindex $val 2]] [lindex $val 3]"
-        }
-        puts $file_handle ""
-}
+# Copyright (c) 2022  Xilinx, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of Xilinx nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# @brief	Address range defines for FINN IP.
+# @author	Thomas B. PreuÃŸer <thomas.preusser@amd.com>
+##
+
+proc generate {drv_handle} {
+	# Bounds of all exposed slave address ranges to xparameters.h
+	set file_handle [hsi::utils::open_include_file "xparameters.h"]
+	foreach drv [hsi::get_drivers -filter "NAME==[common::get_property NAME $drv_handle]"] {
+		generate_memrange_parameters $drv $file_handle
+	}
+	close $file_handle
+}
+
+proc generate_memrange_parameters {drv_handle file_handle} {
+	# Collect unique slave interfaces to custom module
+	array unset ranges
+	foreach mem_range [hsi::get_mem_ranges -of_object [hsi::get_cells -hier [hsi::get_sw_processor]] $drv_handle] {
+		set ranges([common::get_property SLAVE_INTERFACE $mem_range]) [list \
+			[common::get_property BASE_NAME  $mem_range] \
+			[common::get_property BASE_VALUE $mem_range] \
+			[common::get_property HIGH_NAME  $mem_range] \
+			[common::get_property HIGH_VALUE $mem_range] \
+		]
+	}
+
+	# Produce defines for the address range bounds
+	set prefix "XPAR_[string toupper $drv_handle]"
+	foreach {key val} [array get ranges] {
+		puts $file_handle "#define [format "%s_%s_%s" $prefix $key [lindex $val 0]] [lindex $val 1]"
+		puts $file_handle "#define [format "%s_%s_%s" $prefix $key [lindex $val 2]] [lindex $val 3]"
+	}
+	puts $file_handle ""
+}
diff --git a/src/finn/qnn-data/templates/driver/driver_base.py b/src/finn/qnn-data/templates/driver/driver_base.py
index b6dd8350809a33ab5dad3e21b0f52f41cbe872ec..497477da9d4cff736dc32eb27532e658890d5cc7 100644
--- a/src/finn/qnn-data/templates/driver/driver_base.py
+++ b/src/finn/qnn-data/templates/driver/driver_base.py
@@ -31,9 +31,9 @@ import os
 import time
 from pynq import Overlay, allocate
 from pynq.ps import Clocks
+from qonnx.core.datatype import DataType
+from qonnx.util.basic import gen_finn_dt_tensor
 
-from finn.core.datatype import DataType
-from finn.util.basic import gen_finn_dt_tensor
 from finn.util.data_packing import (
     finnpy_to_packed_bytearray,
     packed_bytearray_to_finnpy,
diff --git a/src/finn/transformation/__init__.py b/src/finn/transformation/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/finn/transformation/fpgadataflow/__init__.py b/src/finn/transformation/fpgadataflow/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/finn/transformation/fpgadataflow/annotate_cycles.py b/src/finn/transformation/fpgadataflow/annotate_cycles.py
index 5ab491dd1031cfec64308aee678edc9c94aa6da2..7befad7aa76ab782e8331e9ed14c749276ea5657 100644
--- a/src/finn/transformation/fpgadataflow/annotate_cycles.py
+++ b/src/finn/transformation/fpgadataflow/annotate_cycles.py
@@ -26,10 +26,11 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import finn.custom_op.registry as registry
-from finn.core.modelwrapper import ModelWrapper
-from finn.custom_op.registry import getCustomOp
-from finn.transformation.base import Transformation
+import qonnx.custom_op.registry as registry
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.base import Transformation
+
 from finn.transformation.move_reshape import _is_fpgadataflow_node
 
 
diff --git a/src/finn/transformation/fpgadataflow/annotate_resources.py b/src/finn/transformation/fpgadataflow/annotate_resources.py
index d9089cbeba6e0791f6d8375e28b2c2d99b506eda..0cc4234c8c46a73c1515b3c7ad50bfa78d4d579d 100644
--- a/src/finn/transformation/fpgadataflow/annotate_resources.py
+++ b/src/finn/transformation/fpgadataflow/annotate_resources.py
@@ -26,13 +26,14 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import finn.custom_op.registry as registry
+import qonnx.custom_op.registry as registry
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.base import Transformation
+
 from finn.analysis.fpgadataflow.hls_synth_res_estimation import hls_synth_res_estimation
 from finn.analysis.fpgadataflow.post_synth_res import post_synth_res
 from finn.analysis.fpgadataflow.res_estimation import res_estimation
-from finn.core.modelwrapper import ModelWrapper
-from finn.custom_op.registry import getCustomOp
-from finn.transformation.base import Transformation
 from finn.transformation.move_reshape import _is_fpgadataflow_node
 
 
diff --git a/src/finn/transformation/fpgadataflow/cleanup.py b/src/finn/transformation/fpgadataflow/cleanup.py
index f59f4bdeab72a5af9615ecf308306e4fb4b69fb5..1d0efaf4bbede08543e00d4023a7f973fe439f85 100644
--- a/src/finn/transformation/fpgadataflow/cleanup.py
+++ b/src/finn/transformation/fpgadataflow/cleanup.py
@@ -27,10 +27,10 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import os
+import qonnx.custom_op.registry as registry
 import shutil
+from qonnx.transformation.base import Transformation
 
-import finn.custom_op.registry as registry
-from finn.transformation.base import Transformation
 from finn.util.fpgadataflow import is_fpgadataflow_node
 
 
diff --git a/src/finn/transformation/fpgadataflow/compile_cppsim.py b/src/finn/transformation/fpgadataflow/compile_cppsim.py
index 5f7c534b4561ffc0fac0c8c2b6160279f4e34fbc..da337caa62c82be5b7f15284530683f74918fbb2 100644
--- a/src/finn/transformation/fpgadataflow/compile_cppsim.py
+++ b/src/finn/transformation/fpgadataflow/compile_cppsim.py
@@ -26,8 +26,9 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import finn.custom_op.registry as registry
-from finn.transformation.base import NodeLocalTransformation
+import qonnx.custom_op.registry as registry
+from qonnx.transformation.base import NodeLocalTransformation
+
 from finn.util.fpgadataflow import is_fpgadataflow_node
 
 
diff --git a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
index e3faa03ace5dc856e9571773c2b5a907f794fa89..b8b7233c8073e23bb00779ba82e1123f6aadaa74 100644
--- a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
+++ b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
@@ -28,21 +28,21 @@
 
 
 import numpy as np
+import qonnx.core.data_layout as DataLayout
 import warnings
 from onnx import TensorProto, helper
+from qonnx.core.datatype import DataType
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.base import Transformation
+from qonnx.transformation.general import SortGraph
+from qonnx.transformation.infer_datatypes import InferDataTypes
+from qonnx.transformation.infer_shapes import InferShapes
+from qonnx.util.basic import get_by_name
+from qonnx.util.onnx import nchw_to_nhwc
 
-import finn.core.data_layout as DataLayout
-from finn.core.datatype import DataType
-from finn.custom_op.registry import getCustomOp
-from finn.transformation.base import Transformation
 from finn.transformation.fpgadataflow.minimize_accumulator_width import (
     MinimizeAccumulatorWidth,
 )
-from finn.transformation.general import SortGraph
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.infer_shapes import InferShapes
-from finn.util.basic import get_by_name
-from finn.util.onnx import nchw_to_nhwc
 
 
 class InferConvInpGen(Transformation):
@@ -547,7 +547,7 @@ class InferPool_Batch(Transformation):
                     "Im2Col",
                     [im2col_in],
                     [im2col_out],
-                    domain="finn.custom_op.general",
+                    domain="qonnx.custom_op.general",
                     stride=[sh, sw],
                     kernel_size=[kh, kw],
                     pad_amount=pad,
@@ -935,7 +935,7 @@ class InferVectorVectorActivation(Transformation):
                     W = W.transpose(0, 3, 1, 2)
                     # now we can extract the values using a for loop over the channels
                     # and fill a zero numpy array in the correct shape
-                    w_tensor = np.zeros((channels, 1, k_h, k_w))
+                    w_tensor = np.zeros((channels, 1, k_h, k_w), dtype=np.float32)
                     for ch in range(channels):
                         w_tensor[ch][0] = W[ch][ch]
                     model.set_initializer(mm_weight, w_tensor)
@@ -1286,7 +1286,7 @@ class InferChannelwiseLinearLayer(Transformation):
     def get_smallest_possible(self, vals):
         """Returns smallest (fewest bits) possible DataType that can represent
         value. Prefers unsigned integers where possible."""
-        vals = np.array(vals)
+        vals = np.array(vals, dtype=np.float64)
         for v in vals:
             assert int(v) == v, "Error float value"
 
@@ -1562,7 +1562,9 @@ class InferGlobalAccPoolLayer(Transformation):
                     model.make_new_valueinfo_name(), TensorProto.FLOAT, [1]
                 )
                 model.graph.value_info.append(mul_value)
-                model.set_initializer(mul_value.name, np.array(1 / (vecs[1] * vecs[2])))
+                model.set_initializer(
+                    mul_value.name, np.array(1 / (vecs[1] * vecs[2]), dtype=np.float32)
+                )
                 new_mul = helper.make_node(
                     "Mul",
                     [pool_out, mul_value.name],
diff --git a/src/finn/transformation/fpgadataflow/create_dataflow_partition.py b/src/finn/transformation/fpgadataflow/create_dataflow_partition.py
index 9b2577bc2b863e1075fc3252412ff1001b955cda..07d6961be3cf0c6ff1808cecbeb1ffaadba0bcde 100644
--- a/src/finn/transformation/fpgadataflow/create_dataflow_partition.py
+++ b/src/finn/transformation/fpgadataflow/create_dataflow_partition.py
@@ -26,12 +26,14 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from finn.core.modelwrapper import ModelWrapper
-from finn.custom_op.registry import getCustomOp
-from finn.transformation.base import Transformation
-from finn.transformation.create_generic_partitions import PartitionFromLambda
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.base import Transformation
+from qonnx.transformation.create_generic_partitions import PartitionFromLambda
+from qonnx.util.basic import get_by_name
+
 from finn.transformation.fpgadataflow.externalize_params import ExternalizeParams
-from finn.util.basic import get_by_name, make_build_dir
+from finn.util.basic import make_build_dir
 
 
 class CreateDataflowPartition(Transformation):
diff --git a/src/finn/transformation/fpgadataflow/create_stitched_ip.py b/src/finn/transformation/fpgadataflow/create_stitched_ip.py
index 0f410ec7a083ce2d68c40a9c5495365a17df4e13..7c978cf61a465cacb4d562634d950311ed992021 100644
--- a/src/finn/transformation/fpgadataflow/create_stitched_ip.py
+++ b/src/finn/transformation/fpgadataflow/create_stitched_ip.py
@@ -33,14 +33,15 @@ import multiprocessing as mp
 import os
 import subprocess
 import warnings
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.base import Transformation
+from qonnx.util.basic import get_num_default_workers
 from shutil import copytree
 
-from finn.custom_op.registry import getCustomOp
-from finn.transformation.base import Transformation
 from finn.transformation.fpgadataflow.replace_verilog_relpaths import (
     ReplaceVerilogRelPaths,
 )
-from finn.util.basic import get_num_default_workers, make_build_dir
+from finn.util.basic import make_build_dir
 from finn.util.fpgadataflow import is_fpgadataflow_node
 
 
@@ -165,11 +166,12 @@ class CreateStitchedIP(Transformation):
                 "make_bd_intf_pins_external [get_bd_intf_pins %s/%s]"
                 % (inst_name, aximm_intf_name[0][0])
             )
+            ext_if_name = "m_axi_gmem%d" % (len(self.intf_names["aximm"]))
             self.connect_cmds.append(
-                "set_property name m_axi_gmem0 [get_bd_intf_ports m_axi_gmem_0]"
+                "set_property name %s [get_bd_intf_ports m_axi_gmem_0]" % ext_if_name
             )
             self.connect_cmds.append("assign_bd_address")
-            seg_name = "%s/Data_m_axi_gmem/SEG_m_axi_gmem0_Reg" % (inst_name)
+            seg_name = "%s/Data_m_axi_gmem/SEG_%s_Reg" % (inst_name, ext_if_name)
             self.connect_cmds.append(
                 "set_property offset 0 [get_bd_addr_segs {%s}]" % (seg_name)
             )
@@ -177,9 +179,7 @@ class CreateStitchedIP(Transformation):
             self.connect_cmds.append(
                 "set_property range 4G [get_bd_addr_segs {%s}]" % (seg_name)
             )
-
-            self.intf_names["aximm"] = [("m_axi_gmem0", aximm_intf_name[0][1])]
-            assert self.has_aximm is False, "Currently limited to one AXI-MM interface"
+            self.intf_names["aximm"] = [(ext_if_name, aximm_intf_name[0][1])]
             self.has_aximm = True
 
     def connect_m_axis_external(self, node, idx=None):
@@ -424,6 +424,13 @@ class CreateStitchedIP(Transformation):
             )
             % (vivado_stitch_proj_dir, block_vendor, block_library, block_name)
         )
+        # in some cases, the IP packager seems to infer an aperture of 64K or 4G,
+        # preventing address assignment of the DDR_LOW and/or DDR_HIGH segments
+        # the following is a hotfix to remove this aperture during IODMA packaging
+        tcl.append(
+            "ipx::remove_segment -quiet m_axi_gmem0:APERTURE_0 "
+            "[ipx::get_address_spaces m_axi_gmem0 -of_objects [ipx::current_core]]"
+        )
         tcl.append("set_property core_revision 2 [ipx::find_open_core %s]" % block_vlnv)
         tcl.append("ipx::create_xgui_files [ipx::find_open_core %s]" % block_vlnv)
         # mark bus interface params as user-resolvable to avoid FREQ_MHZ mismatches
diff --git a/src/finn/transformation/fpgadataflow/externalize_params.py b/src/finn/transformation/fpgadataflow/externalize_params.py
index dcb66a8538fdff46214c23491f48a59459625082..732b82c6756c1b8ac35cc2c5e4f3a35d86d6d81e 100644
--- a/src/finn/transformation/fpgadataflow/externalize_params.py
+++ b/src/finn/transformation/fpgadataflow/externalize_params.py
@@ -27,8 +27,8 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
-from finn.transformation.base import Transformation
-from finn.util.basic import get_by_name
+from qonnx.transformation.base import Transformation
+from qonnx.util.basic import get_by_name
 
 
 class ExternalizeParams(Transformation):
diff --git a/src/finn/transformation/fpgadataflow/floorplan.py b/src/finn/transformation/fpgadataflow/floorplan.py
index ec5afef506ab81eeb7bdc45c49bdebbdd3742338..67920172231e685a4f5dd72f037f64fe6baf8449 100644
--- a/src/finn/transformation/fpgadataflow/floorplan.py
+++ b/src/finn/transformation/fpgadataflow/floorplan.py
@@ -28,12 +28,13 @@
 
 import json
 import warnings
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.base import Transformation
+from qonnx.transformation.general import ApplyConfig
+from qonnx.util.basic import get_by_name
 
 from finn.analysis.fpgadataflow.floorplan_params import floorplan_params
-from finn.custom_op.registry import getCustomOp
-from finn.transformation.base import Transformation
-from finn.transformation.general import ApplyConfig
-from finn.util.basic import get_by_name, make_build_dir
+from finn.util.basic import make_build_dir
 
 
 class Floorplan(Transformation):
diff --git a/src/finn/transformation/fpgadataflow/hlssynth_ip.py b/src/finn/transformation/fpgadataflow/hlssynth_ip.py
index 2a7d9e9066836ea0d4af004f01d88953e4adaeb7..1fede0667888ee9059cfb2e7f5db00b6bb3f4259 100644
--- a/src/finn/transformation/fpgadataflow/hlssynth_ip.py
+++ b/src/finn/transformation/fpgadataflow/hlssynth_ip.py
@@ -27,10 +27,10 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import os
+import qonnx.custom_op.registry as registry
 import warnings
+from qonnx.transformation.base import NodeLocalTransformation
 
-import finn.custom_op.registry as registry
-from finn.transformation.base import NodeLocalTransformation
 from finn.util.fpgadataflow import is_fpgadataflow_node
 
 
diff --git a/src/finn/transformation/fpgadataflow/insert_dwc.py b/src/finn/transformation/fpgadataflow/insert_dwc.py
index 627b9b9af016fd33553dfef9155d192b17937e2d..9817f2e3d2857bd5e59b304fbdaf3bad74a9b037 100644
--- a/src/finn/transformation/fpgadataflow/insert_dwc.py
+++ b/src/finn/transformation/fpgadataflow/insert_dwc.py
@@ -1,8 +1,8 @@
 from onnx import TensorProto
 from onnx import helper as oh
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.base import Transformation
 
-from finn.custom_op.registry import getCustomOp
-from finn.transformation.base import Transformation
 from finn.util.fpgadataflow import is_fpgadataflow_node
 
 
diff --git a/src/finn/transformation/fpgadataflow/insert_fifo.py b/src/finn/transformation/fpgadataflow/insert_fifo.py
index 26613849060e361a6bc93483e3e1d8416e1fd97f..78200b280960ad53e3e84d44394c10296c432ba5 100644
--- a/src/finn/transformation/fpgadataflow/insert_fifo.py
+++ b/src/finn/transformation/fpgadataflow/insert_fifo.py
@@ -1,10 +1,38 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 import numpy as np
 import warnings
 from onnx import TensorProto
 from onnx import helper as oh
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.base import Transformation
 
-from finn.custom_op.registry import getCustomOp
-from finn.transformation.base import Transformation
 from finn.util.fpgadataflow import is_fpgadataflow_node
 
 
diff --git a/src/finn/transformation/fpgadataflow/insert_hook.py b/src/finn/transformation/fpgadataflow/insert_hook.py
index c1fce40c574eb58b67e728b78d31454f0c709b78..21ec3f049fa66b66644f1c79286d1e495d97e7a3 100644
--- a/src/finn/transformation/fpgadataflow/insert_hook.py
+++ b/src/finn/transformation/fpgadataflow/insert_hook.py
@@ -1,15 +1,43 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 import numpy as np
 from onnx import TensorProto
 from onnx import helper as oh
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.base import Transformation
+from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
 
-from finn.custom_op.registry import getCustomOp
-from finn.transformation.base import Transformation
-from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
 from finn.util.fpgadataflow import is_fpgadataflow_node
 
 
 def _is_hook_node(node):
-    if node.op_type in ["checksum"]:
+    if node.op_type in ["CheckSum"]:
         return True
     else:
         return False
@@ -54,7 +82,7 @@ class InsertHook(Transformation):
                     if n0_hook in list_supported_hooks:
                         if n0_hook == "checksum":
                             if len(consumers) == 1:
-                                if consumers[0].op_type == "checksum":
+                                if consumers[0].op_type == "CheckSum":
                                     continue
                             n0_normal_oshape = n0.get_normal_output_shape()
                             n0_folded_oshape = n0.get_folded_output_shape()
@@ -72,7 +100,7 @@ class InsertHook(Transformation):
                                 [1],
                             )
                             chk_node = oh.make_node(
-                                "checksum",
+                                "CheckSum",
                                 [output_name],
                                 outputs=[chk_otensor.name, chk_result.name],
                                 domain="finn.custom_op.fpgadataflow",
@@ -94,6 +122,7 @@ class InsertHook(Transformation):
                             else:
                                 model.graph.output.pop()
                                 model.graph.output.append(chk_otensor)
+                                model.graph.value_info.remove(chk_otensor)
                                 model = model.transform(GiveUniqueNodeNames())
                                 model = model.transform(GiveReadableTensorNames())
                             graph_modified = True
diff --git a/src/finn/transformation/fpgadataflow/insert_iodma.py b/src/finn/transformation/fpgadataflow/insert_iodma.py
index cc85f544eb0e3a99bfee8dcfe1f5a8d722b656db..4b4eb6362faf641def057afadfa7b5e019f54698 100644
--- a/src/finn/transformation/fpgadataflow/insert_iodma.py
+++ b/src/finn/transformation/fpgadataflow/insert_iodma.py
@@ -30,18 +30,27 @@ import math
 import numpy as np
 from onnx import TensorProto
 from onnx import helper as oh
-
-from finn.custom_op.registry import getCustomOp
-from finn.transformation.base import Transformation
-from finn.transformation.general import SortGraph
-from finn.util.basic import get_by_name
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.base import Transformation
+from qonnx.transformation.general import SortGraph
+from qonnx.util.basic import get_by_name
 
 
 class InsertIODMA(Transformation):
-    """Insert DMA nodes on all inputs and outputs."""
+    """Insert DMA nodes on inputs and outputs, or as specified by filters in
+    the constructor."""
 
-    def __init__(self, max_intfwidth=32):
+    def __init__(
+        self,
+        max_intfwidth=32,
+        insert_input=True,
+        insert_output=True,
+        insert_extmemw=True,
+    ):
         super().__init__()
+        self.insert_input = insert_input
+        self.insert_output = insert_output
+        self.insert_extmemw = insert_extmemw
         assert (
             2 ** math.log2(max_intfwidth) == max_intfwidth
         ), "max_intfwidth must be a power of 2"
@@ -68,7 +77,7 @@ class InsertIODMA(Transformation):
 
         assert out_w % pe == 0, "Malformed weight matrix"
         assert inp_w % simd == 0, "Malformed weight matrix"
-        reshaped_w = np.zeros(inp_w * out_w).reshape(-1, pe * simd)
+        reshaped_w = np.zeros(inp_w * out_w, dtype=np.float32).reshape(-1, pe * simd)
 
         addr = 0
         for fr in range(out_w // pe):
@@ -94,153 +103,163 @@ class InsertIODMA(Transformation):
             get_by_name(x.attribute, "backend").s.decode("UTF-8") == "fpgadataflow"
             for x in all_nodes
         )
-        # parse matrixvectoractivation layers looking for external weights with no
-        # attached IODMA
-        fc_extw_nodes = list(
-            filter(
-                lambda x: x.op_type == "MatrixVectorActivation"
-                and getCustomOp(x).get_nodeattr("mem_mode") == "external"
-                and model.find_producer(x.input[1]) is None,
-                all_nodes,
-            )
-        )
         # insert IODMAs for graph inputs
-        graph_in_names = [x.name for x in model.graph.input]
-        for graph_in_name in graph_in_names:
-            first_node = model.find_consumer(graph_in_name)
-            if first_node.op_type == "IODMA":
-                # IODMA already inserted for this input
-                continue
-            else:
-                in_shape = model.get_tensor_shape(graph_in_name)
-                in_dtype = model.get_tensor_datatype(graph_in_name)
-                first_node_inst = getCustomOp(first_node)
-                in_folded_shape = first_node_inst.get_folded_input_shape()
-                # take advantage of AXI stream width padding for DMA alignment
-                # (AXI streams are always padded to 8 bits)
-                # this is the width of stream output expected from the DMA
-                padded_instream_width = first_node_inst.get_instream_width_padded()
-                padded_instream_bytes = padded_instream_width // 8
+        if self.insert_input:
+            graph_in_names = [x.name for x in model.graph.input]
+            for graph_in_name in graph_in_names:
+                first_node = model.find_consumer(graph_in_name)
+                if first_node.op_type == "IODMA":
+                    # IODMA already inserted for this input
+                    continue
+                else:
+                    in_shape = model.get_tensor_shape(graph_in_name)
+                    in_dtype = model.get_tensor_datatype(graph_in_name)
+                    first_node_inst = getCustomOp(first_node)
+                    in_folded_shape = first_node_inst.get_folded_input_shape()
+                    # take advantage of AXI stream width padding for DMA alignment
+                    # (AXI streams are always padded to 8 bits)
+                    # this is the width of stream output expected from the DMA
+                    padded_instream_width = first_node_inst.get_instream_width_padded()
+                    padded_instream_bytes = padded_instream_width // 8
+                    # determine the feasible interface width
+                    transfer_bits = padded_instream_width * np.prod(
+                        in_folded_shape[:-1]
+                    )
+                    intfwidth = math.gcd(transfer_bits, self.max_intfwidth)
+                    assert (
+                        intfwidth % 8 == 0
+                    ), "No feasible interface width for transfer size"
+                    # make new buffer
+                    first_node_in = oh.make_tensor_value_info(
+                        model.make_new_valueinfo_name(), TensorProto.FLOAT, in_shape
+                    )
+                    model.graph.value_info.append(first_node_in)
+                    model.set_tensor_datatype(first_node_in.name, in_dtype)
+                    # reroute first node input
+                    # FIXME: currently always using 8-bit dtypes to work around the
+                    # padding problems for i/o DMA
+                    first_node.input[0] = first_node_in.name
+                    dma_node = oh.make_node(
+                        "IODMA",
+                        [graph_in_name],
+                        [first_node_in.name],
+                        numInputVectors=in_folded_shape[:-1],
+                        NumChannels=padded_instream_bytes,
+                        dataType="UINT8",
+                        intfWidth=intfwidth,
+                        streamWidth=padded_instream_width,
+                        direction="in",
+                        domain="finn.custom_op.fpgadataflow",
+                        backend="fpgadataflow",
+                    )
+                    model.graph.node.insert(0, dma_node)
+                    modified = True
+        # insert IODMAs for graph outputs
+        if self.insert_output:
+            graph_out_names = [x.name for x in model.graph.output]
+            for graph_out_name in graph_out_names:
+                final_node = model.find_producer(graph_out_name)
+                if final_node.op_type == "IODMA":
+                    continue
+                else:
+                    out_shape = model.get_tensor_shape(graph_out_name)
+                    out_dtype = model.get_tensor_datatype(graph_out_name)
+                    final_node_inst = getCustomOp(final_node)
+                    out_folded_shape = final_node_inst.get_folded_output_shape()
+                    # take advantage of AXI stream width padding for DMA alignment
+                    # (AXI streams are always padded to 8 bits)
+                    # this is the width of stream input to DMA
+                    padded_outstream_width = (
+                        final_node_inst.get_outstream_width_padded()
+                    )
+                    padded_outstream_bytes = padded_outstream_width // 8
+                    # determine the feasible interface width
+                    transfer_bits = padded_outstream_width * np.prod(
+                        out_folded_shape[:-1]
+                    )
+                    intfwidth = math.gcd(transfer_bits, self.max_intfwidth)
+                    assert (
+                        intfwidth % 8 == 0
+                    ), "No feasible interface width for transfer size"
+                    # make new buffer
+                    final_node_out = oh.make_tensor_value_info(
+                        model.make_new_valueinfo_name(), TensorProto.FLOAT, out_shape
+                    )
+                    model.graph.value_info.append(final_node_out)
+                    model.set_tensor_datatype(final_node_out.name, out_dtype)
+                    # reroute final node output to final_node_out_name
+                    final_node.output[0] = final_node_out.name
+                    # FIXME: currently always using 8-bit dtypes to work around the
+                    # padding problems for i/o DMA
+                    dma_node = oh.make_node(
+                        "IODMA",
+                        [final_node_out.name],
+                        [graph_out_name],
+                        numInputVectors=out_folded_shape[:-1],
+                        NumChannels=padded_outstream_bytes,
+                        dataType="UINT8",
+                        intfWidth=intfwidth,
+                        streamWidth=padded_outstream_width,
+                        direction="out",
+                        domain="finn.custom_op.fpgadataflow",
+                        backend="fpgadataflow",
+                    )
+                    model.graph.node.append(dma_node)
+                    modified = True
+        if self.insert_extmemw:
+            # parse matrixvectoractivation layers looking for external weights with no
+            # attached IODMA
+            fc_extw_nodes = list(
+                filter(
+                    lambda x: x.op_type == "MatrixVectorActivation"
+                    and getCustomOp(x).get_nodeattr("mem_mode") == "external"
+                    and model.find_producer(x.input[1]) is None,
+                    all_nodes,
+                )
+            )
+            for fc_node in fc_extw_nodes:
+                fc_inst = getCustomOp(fc_node)
+                fc_w_name = fc_node.input[1]
+                w_shape = model.get_tensor_shape(fc_w_name)
+                w_dtype = model.get_tensor_datatype(fc_w_name)
                 # determine the feasible interface width
-                transfer_bits = padded_instream_width * np.prod(in_folded_shape[:-1])
+                transfer_bits = np.prod(w_shape) * w_dtype.bitwidth()
                 intfwidth = math.gcd(transfer_bits, self.max_intfwidth)
                 assert (
                     intfwidth % 8 == 0
                 ), "No feasible interface width for transfer size"
+                # calculate width of stream output from DMA
+                pe = get_by_name(fc_node.attribute, "PE").i
+                simd = get_by_name(fc_node.attribute, "SIMD").i
+                streamWidth = fc_inst.get_weightstream_width_padded()
                 # make new buffer
-                first_node_in = oh.make_tensor_value_info(
-                    model.make_new_valueinfo_name(), TensorProto.FLOAT, in_shape
+                W = model.get_initializer(fc_w_name)
+                iodma_mem = self.get_mem_init(W, pe, simd)
+                model.set_initializer(fc_w_name, iodma_mem)
+
+                fc_node_in = oh.make_tensor_value_info(
+                    model.make_new_valueinfo_name(), TensorProto.FLOAT, iodma_mem.shape
                 )
-                model.graph.value_info.append(first_node_in)
-                model.set_tensor_datatype(first_node_in.name, in_dtype)
-                # reroute first node input
-                # FIXME: currently always using 8-bit dtypes to work around the
-                # padding problems for i/o DMA
-                first_node.input[0] = first_node_in.name
+                model.graph.value_info.append(fc_node_in)
+                model.set_tensor_datatype(fc_node_in.name, w_dtype)
+                model.set_initializer(fc_node_in.name, W)
                 dma_node = oh.make_node(
                     "IODMA",
-                    [graph_in_name],
-                    [first_node_in.name],
-                    numInputVectors=in_folded_shape[:-1],
-                    NumChannels=padded_instream_bytes,
-                    dataType="UINT8",
+                    [fc_w_name],
+                    [fc_node_in.name],
+                    numInputVectors=[iodma_mem.shape[0]],
+                    NumChannels=pe * simd,
+                    dataType=str(w_dtype.name),
                     intfWidth=intfwidth,
-                    streamWidth=padded_instream_width,
+                    streamWidth=streamWidth,
                     direction="in",
+                    burstMode="wrap",
                     domain="finn.custom_op.fpgadataflow",
                     backend="fpgadataflow",
                 )
+                fc_node.input[1] = fc_node_in.name
                 model.graph.node.insert(0, dma_node)
                 modified = True
-        # insert IODMAs for graph outputs
-        graph_out_names = [x.name for x in model.graph.output]
-        for graph_out_name in graph_out_names:
-            final_node = model.find_producer(graph_out_name)
-            if final_node.op_type == "IODMA":
-                continue
-            else:
-                out_shape = model.get_tensor_shape(graph_out_name)
-                out_dtype = model.get_tensor_datatype(graph_out_name)
-                final_node_inst = getCustomOp(final_node)
-                out_folded_shape = final_node_inst.get_folded_output_shape()
-                # take advantage of AXI stream width padding for DMA alignment
-                # (AXI streams are always padded to 8 bits)
-                # this is the width of stream input to DMA
-                padded_outstream_width = final_node_inst.get_outstream_width_padded()
-                padded_outstream_bytes = padded_outstream_width // 8
-                # determine the feasible interface width
-                transfer_bits = padded_outstream_width * np.prod(out_folded_shape[:-1])
-                intfwidth = math.gcd(transfer_bits, self.max_intfwidth)
-                assert (
-                    intfwidth % 8 == 0
-                ), "No feasible interface width for transfer size"
-                # make new buffer
-                final_node_out = oh.make_tensor_value_info(
-                    model.make_new_valueinfo_name(), TensorProto.FLOAT, out_shape
-                )
-                model.graph.value_info.append(final_node_out)
-                model.set_tensor_datatype(final_node_out.name, out_dtype)
-                # reroute final node output to final_node_out_name
-                final_node.output[0] = final_node_out.name
-                # FIXME: currently always using 8-bit dtypes to work around the
-                # padding problems for i/o DMA
-                dma_node = oh.make_node(
-                    "IODMA",
-                    [final_node_out.name],
-                    [graph_out_name],
-                    numInputVectors=out_folded_shape[:-1],
-                    NumChannels=padded_outstream_bytes,
-                    dataType="UINT8",
-                    intfWidth=intfwidth,
-                    streamWidth=padded_outstream_width,
-                    direction="out",
-                    domain="finn.custom_op.fpgadataflow",
-                    backend="fpgadataflow",
-                )
-                model.graph.node.append(dma_node)
-                modified = True
-
-        for fc_node in fc_extw_nodes:
-            fc_inst = getCustomOp(fc_node)
-            fc_w_name = fc_node.input[1]
-            w_shape = model.get_tensor_shape(fc_w_name)
-            w_dtype = model.get_tensor_datatype(fc_w_name)
-            # determine the feasible interface width
-            transfer_bits = np.prod(w_shape) * w_dtype.bitwidth()
-            intfwidth = math.gcd(transfer_bits, self.max_intfwidth)
-            assert intfwidth % 8 == 0, "No feasible interface width for transfer size"
-            # calculate width of stream output from DMA
-            pe = get_by_name(fc_node.attribute, "PE").i
-            simd = get_by_name(fc_node.attribute, "SIMD").i
-            streamWidth = fc_inst.get_weightstream_width_padded()
-            # make new buffer
-            W = model.get_initializer(fc_w_name)
-            iodma_mem = self.get_mem_init(W, pe, simd)
-            model.set_initializer(fc_w_name, iodma_mem)
-
-            fc_node_in = oh.make_tensor_value_info(
-                model.make_new_valueinfo_name(), TensorProto.FLOAT, iodma_mem.shape
-            )
-            model.graph.value_info.append(fc_node_in)
-            model.set_tensor_datatype(fc_node_in.name, w_dtype)
-            model.set_initializer(fc_node_in.name, W)
-            dma_node = oh.make_node(
-                "IODMA",
-                [fc_w_name],
-                [fc_node_in.name],
-                numInputVectors=[iodma_mem.shape[0]],
-                NumChannels=pe * simd,
-                dataType=str(w_dtype.name),
-                intfWidth=intfwidth,
-                streamWidth=streamWidth,
-                direction="in",
-                burstMode="wrap",
-                domain="finn.custom_op.fpgadataflow",
-                backend="fpgadataflow",
-            )
-            fc_node.input[1] = fc_node_in.name
-            model.graph.node.insert(0, dma_node)
-            modified = True
         if modified:
             model = model.transform(SortGraph())
         return (model, modified)
diff --git a/src/finn/transformation/fpgadataflow/insert_tlastmarker.py b/src/finn/transformation/fpgadataflow/insert_tlastmarker.py
index 0d764b9ed4b3f14850cd678656ebb3ef98162644..1610916eb693dbd55989712199ee5b414134a5af 100644
--- a/src/finn/transformation/fpgadataflow/insert_tlastmarker.py
+++ b/src/finn/transformation/fpgadataflow/insert_tlastmarker.py
@@ -29,10 +29,9 @@
 import numpy as np
 from onnx import TensorProto
 from onnx import helper as oh
-
-from finn.custom_op.registry import getCustomOp
-from finn.transformation.base import Transformation
-from finn.util.basic import get_by_name
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.base import Transformation
+from qonnx.util.basic import get_by_name
 
 
 class InsertTLastMarker(Transformation):
diff --git a/src/finn/transformation/fpgadataflow/make_deployment.py b/src/finn/transformation/fpgadataflow/make_deployment.py
index d43d81716ac7a8b097fc7ec9e38bf5bcb954c7fb..d4684dc83ce1f22ecae2ca04af5e5973519db4f6 100644
--- a/src/finn/transformation/fpgadataflow/make_deployment.py
+++ b/src/finn/transformation/fpgadataflow/make_deployment.py
@@ -29,10 +29,10 @@
 import os
 import subprocess
 from distutils.dir_util import copy_tree
+from qonnx.transformation.base import Transformation
 from shutil import copy
 
 import finn.transformation.fpgadataflow.templates as templates
-from finn.transformation.base import Transformation
 from finn.util.basic import make_build_dir
 
 
diff --git a/src/finn/transformation/fpgadataflow/make_pynq_driver.py b/src/finn/transformation/fpgadataflow/make_pynq_driver.py
index 8286f696fb1a6790bd7830b1fdedb43838827040..863523605580ef77559b65a1abd72802daff187d 100644
--- a/src/finn/transformation/fpgadataflow/make_pynq_driver.py
+++ b/src/finn/transformation/fpgadataflow/make_pynq_driver.py
@@ -31,19 +31,17 @@ import pkg_resources as pk
 
 import numpy as np
 import os
+import qonnx
 import shutil
 import warnings
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.base import Transformation
+from qonnx.util.basic import gen_finn_dt_tensor, roundup_to_integer_multiple
 
-import finn.core.datatype as dtp
+import finn.util
 import finn.util.data_packing as dpk
-from finn.core.modelwrapper import ModelWrapper
-from finn.custom_op.registry import getCustomOp
-from finn.transformation.base import Transformation
-from finn.util.basic import (
-    gen_finn_dt_tensor,
-    make_build_dir,
-    roundup_to_integer_multiple,
-)
+from finn.util.basic import make_build_dir
 from finn.util.data_packing import (
     hexstring2npbytearray,
     pack_innermost_dim_as_hex_string,
@@ -101,6 +99,33 @@ class MakePYNQDriver(Transformation):
         )
         driver_base_py = pynq_driver_dir + "/driver_base.py"
         shutil.copy(driver_base_template, driver_base_py)
+        # driver depends on qonnx and finn packages
+        # extract individual source files and copy to driver folder
+        qonnx_target_path = pynq_driver_dir + "/qonnx"
+        finn_target_path = pynq_driver_dir + "/finn"
+        os.makedirs(qonnx_target_path + "/core", exist_ok=True)
+        os.makedirs(qonnx_target_path + "/util", exist_ok=True)
+        os.makedirs(finn_target_path + "/util", exist_ok=True)
+        qonnx_path = qonnx.__path__[0]
+        finn_util_path = finn.util.__path__[0]
+        files_to_copy = []
+        files_to_copy.append(
+            (qonnx_path + "/core/datatype.py", qonnx_target_path + "/core/datatype.py")
+        )
+        files_to_copy.append(
+            (qonnx_path + "/core/__init__.py", qonnx_target_path + "/core/__init__.py")
+        )
+        files_to_copy.append(
+            (qonnx_path + "/util/basic.py", qonnx_target_path + "/util/basic.py")
+        )
+        files_to_copy.append(
+            (
+                finn_util_path + "/data_packing.py",
+                finn_target_path + "/util/data_packing.py",
+            )
+        )
+        for (src_file, target_file) in files_to_copy:
+            shutil.copy(src_file, target_file)
         # extract input-output shapes from the graph
         # TODO convert this to an analysis pass?
         idt = []
@@ -264,20 +289,6 @@ class MakePYNQDriver(Transformation):
         )
         shutil.copy(validate_template, validate_py)
 
-        # copy all the dependencies into the driver folder
-        # driver imports utils/data_packing and core/datatype
-        # both of which are in finn-base
-        # e.g. $FINN_ROOT/deps/finn-base/src/finn/util/data_packing.py
-        dpk_root = dpk.__file__
-        # e.g. $FINN_ROOT/deps/finn-base/src/finn/util
-        dpk_root = dpk_root.replace("data_packing.py", "")
-        # e.g. $FINN_ROOT/deps/finn-base/src/finn/core/datatype.py
-        dtp_root = dtp.__file__
-        # e.g. $FINN_ROOT/deps/finn-base/src/finn/core
-        dtp_root = dtp_root.replace("datatype.py", "")
-        shutil.copytree(dpk_root, pynq_driver_dir + "/finn/util")
-        shutil.copytree(dtp_root, pynq_driver_dir + "/finn/core")
-
         # generate weight files for runtime-writable layers
 
         for sdp_ind, sdp_node in enumerate(model.graph.node):
diff --git a/src/finn/transformation/fpgadataflow/make_zynq_proj.py b/src/finn/transformation/fpgadataflow/make_zynq_proj.py
index b52e61ff06d5185283cbb615d3018227ca988eca..a589cb039c825ff97c11df7ffa57109df27f3fd0 100644
--- a/src/finn/transformation/fpgadataflow/make_zynq_proj.py
+++ b/src/finn/transformation/fpgadataflow/make_zynq_proj.py
@@ -28,11 +28,13 @@
 
 import os
 import subprocess
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.base import Transformation
+from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
+from qonnx.transformation.infer_data_layouts import InferDataLayouts
 from shutil import copy
 
-from finn.core.modelwrapper import ModelWrapper
-from finn.custom_op.registry import getCustomOp
-from finn.transformation.base import Transformation
 from finn.transformation.fpgadataflow.create_dataflow_partition import (
     CreateDataflowPartition,
 )
@@ -43,8 +45,6 @@ from finn.transformation.fpgadataflow.insert_dwc import InsertDWC
 from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
 from finn.transformation.fpgadataflow.insert_iodma import InsertIODMA
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
-from finn.transformation.infer_data_layouts import InferDataLayouts
 from finn.util.basic import make_build_dir, pynq_part_map
 
 from . import templates
diff --git a/src/finn/transformation/fpgadataflow/minimize_accumulator_width.py b/src/finn/transformation/fpgadataflow/minimize_accumulator_width.py
index 0a0c45b6bedeabe7cfa1c7209ea74b6876b828b2..bc020ca428f37d50eb26fc9322df2183f665f27c 100644
--- a/src/finn/transformation/fpgadataflow/minimize_accumulator_width.py
+++ b/src/finn/transformation/fpgadataflow/minimize_accumulator_width.py
@@ -26,8 +26,9 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from finn.custom_op.registry import getCustomOp
-from finn.transformation.base import Transformation
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.base import Transformation
+
 from finn.util.fpgadataflow import is_fpgadataflow_node
 
 
diff --git a/src/finn/transformation/fpgadataflow/prepare_cppsim.py b/src/finn/transformation/fpgadataflow/prepare_cppsim.py
index 8b332972cac6bf001490c0c2396174be175d6d33..07021c1e8d20b153db2b3784e1f2bd0211ed1af5 100644
--- a/src/finn/transformation/fpgadataflow/prepare_cppsim.py
+++ b/src/finn/transformation/fpgadataflow/prepare_cppsim.py
@@ -29,10 +29,11 @@
 import copy
 import multiprocessing as mp
 import os
+import qonnx.custom_op.registry as registry
+from qonnx.transformation.base import Transformation
+from qonnx.util.basic import get_num_default_workers
 
-import finn.custom_op.registry as registry
-from finn.transformation.base import Transformation
-from finn.util.basic import get_num_default_workers, make_build_dir
+from finn.util.basic import make_build_dir
 from finn.util.fpgadataflow import is_fpgadataflow_node
 
 
diff --git a/src/finn/transformation/fpgadataflow/prepare_ip.py b/src/finn/transformation/fpgadataflow/prepare_ip.py
index 4fdcf3939fe6d879abe36907a1bf84a417cb9903..2ebd6310f01baebe307befef6bd5db41142edbc8 100644
--- a/src/finn/transformation/fpgadataflow/prepare_ip.py
+++ b/src/finn/transformation/fpgadataflow/prepare_ip.py
@@ -27,10 +27,10 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import os
+import qonnx.custom_op.registry as registry
 import warnings
+from qonnx.transformation.base import Transformation
 
-import finn.custom_op.registry as registry
-from finn.transformation.base import Transformation
 from finn.util.basic import make_build_dir
 from finn.util.fpgadataflow import is_fpgadataflow_node
 
diff --git a/src/finn/transformation/fpgadataflow/prepare_rtlsim.py b/src/finn/transformation/fpgadataflow/prepare_rtlsim.py
index 66799ff4297ad0e2f8afa9261b0f3f983b27452d..645d86cf1473303f85bf68ca1cb65371ce1f979b 100644
--- a/src/finn/transformation/fpgadataflow/prepare_rtlsim.py
+++ b/src/finn/transformation/fpgadataflow/prepare_rtlsim.py
@@ -26,8 +26,9 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import finn.custom_op.registry as registry
-from finn.transformation.base import NodeLocalTransformation
+import qonnx.custom_op.registry as registry
+from qonnx.transformation.base import NodeLocalTransformation
+
 from finn.transformation.fpgadataflow.replace_verilog_relpaths import (
     ReplaceVerilogRelPaths,
 )
diff --git a/src/finn/transformation/fpgadataflow/replace_verilog_relpaths.py b/src/finn/transformation/fpgadataflow/replace_verilog_relpaths.py
index 7850d37423a9add0880e054c7b035b9e735c7f25..4e7970caa02ae01417ce7690167ce3444eca7f94 100644
--- a/src/finn/transformation/fpgadataflow/replace_verilog_relpaths.py
+++ b/src/finn/transformation/fpgadataflow/replace_verilog_relpaths.py
@@ -27,9 +27,9 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import os
+import qonnx.custom_op.registry as registry
+from qonnx.transformation.base import Transformation
 
-import finn.custom_op.registry as registry
-from finn.transformation.base import Transformation
 from finn.util.fpgadataflow import is_fpgadataflow_node
 
 
diff --git a/src/finn/transformation/fpgadataflow/set_exec_mode.py b/src/finn/transformation/fpgadataflow/set_exec_mode.py
index caf891bc4444a65976103746685b2e79abdd708f..a08d153cb2f6c56a83c8ee5874faa675eca7c057 100644
--- a/src/finn/transformation/fpgadataflow/set_exec_mode.py
+++ b/src/finn/transformation/fpgadataflow/set_exec_mode.py
@@ -26,8 +26,9 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import finn.custom_op.registry as registry
-from finn.transformation.base import Transformation
+import qonnx.custom_op.registry as registry
+from qonnx.transformation.base import Transformation
+
 from finn.util.fpgadataflow import is_fpgadataflow_node
 
 
diff --git a/src/finn/transformation/fpgadataflow/set_fifo_depths.py b/src/finn/transformation/fpgadataflow/set_fifo_depths.py
index f1783c27ab150d6d5a83f458579c919d3b8c787b..0139c71666fdfa4b60cb356ceb65ce2c5b831c13 100644
--- a/src/finn/transformation/fpgadataflow/set_fifo_depths.py
+++ b/src/finn/transformation/fpgadataflow/set_fifo_depths.py
@@ -29,19 +29,20 @@
 import math
 import numpy as np
 import warnings
+from pyverilator.util.axi_utils import reset_rtlsim, toggle_clk
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.base import Transformation
+from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
 
 from finn.analysis.fpgadataflow.dataflow_performance import dataflow_performance
-from finn.custom_op.registry import getCustomOp
-from finn.transformation.base import Transformation
 from finn.transformation.fpgadataflow.annotate_cycles import AnnotateCycles
 from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.insert_dwc import InsertDWC
 from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
 from finn.util.fpgadataflow import is_fpgadataflow_node
-from finn.util.pyverilator import pyverilate_stitched_ip, reset_rtlsim, toggle_clk
+from finn.util.pyverilator import pyverilate_stitched_ip
 
 
 def reset_implementation(node):
diff --git a/src/finn/transformation/fpgadataflow/set_folding.py b/src/finn/transformation/fpgadataflow/set_folding.py
index 443d5c255316c5ca5b9b4ceba50981a906818d9a..23943084ab99d6ab880a69975e0b4a49756905a7 100644
--- a/src/finn/transformation/fpgadataflow/set_folding.py
+++ b/src/finn/transformation/fpgadataflow/set_folding.py
@@ -28,12 +28,12 @@
 
 import numpy as np
 import warnings
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.base import Transformation
+from qonnx.transformation.general import GiveUniqueNodeNames
 
 from finn.analysis.fpgadataflow.dataflow_performance import dataflow_performance
-from finn.custom_op.registry import getCustomOp
-from finn.transformation.base import Transformation
 from finn.transformation.fpgadataflow.annotate_cycles import AnnotateCycles
-from finn.transformation.general import GiveUniqueNodeNames
 from finn.util.fpgadataflow import is_fpgadataflow_node
 
 
diff --git a/src/finn/transformation/fpgadataflow/synth_ooc.py b/src/finn/transformation/fpgadataflow/synth_ooc.py
index 49cd6c82bca9ff7578314180c1ba433d63a32087..8d4aec259c440e311f6e3a6fb4d0359d55d738ca 100644
--- a/src/finn/transformation/fpgadataflow/synth_ooc.py
+++ b/src/finn/transformation/fpgadataflow/synth_ooc.py
@@ -27,9 +27,9 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import os
+from qonnx.transformation.base import Transformation
 from shutil import copy2
 
-from finn.transformation.base import Transformation
 from finn.util.basic import make_build_dir
 from finn.util.vivado import out_of_context_synth
 
diff --git a/src/finn/transformation/fpgadataflow/template_driver.py b/src/finn/transformation/fpgadataflow/template_driver.py
index 31dd22573e35894794dc522c0cf6ab47ce6c6cfc..05ee6ad920d7e921dc9611a7936e28288ba53a0a 100644
--- a/src/finn/transformation/fpgadataflow/template_driver.py
+++ b/src/finn/transformation/fpgadataflow/template_driver.py
@@ -60,7 +60,7 @@ pynq_driver_template = """
 import argparse
 import numpy as np
 import os
-from finn.core.datatype import DataType
+from qonnx.core.datatype import DataType
 from driver_base import FINNExampleOverlay
 
 # dictionary describing the I/O of the FINN-generated accelerator
diff --git a/src/finn/transformation/fpgadataflow/templates.py b/src/finn/transformation/fpgadataflow/templates.py
index 84fad1d8c3831fdfc8e59c6594ac21071eab83ec..d6d4df5e6c1add0af00b7a5558045b5e331177d6 100644
--- a/src/finn/transformation/fpgadataflow/templates.py
+++ b/src/finn/transformation/fpgadataflow/templates.py
@@ -129,7 +129,7 @@ if {$BOARD == "ZCU104"} {
 
 create_bd_design "top"
 if {$ZYNQ_TYPE == "zynq_us+"} {
-    create_bd_cell -type ip -vlnv xilinx.com:ip:zynq_ultra_ps_e:3.3 zynq_ps
+    create_bd_cell -type ip -vlnv xilinx.com:ip:zynq_ultra_ps_e:3.4 zynq_ps
     apply_bd_automation -rule xilinx.com:bd_rule:zynq_ultra_ps_e -config {apply_board_preset "1" }  [get_bd_cells zynq_ps]
     #activate one slave port, deactivate the second master port
     set_property -dict [list CONFIG.PSU__USE__S_AXI_GP2 {1}] [get_bd_cells zynq_ps]
@@ -182,7 +182,7 @@ proc assign_axi_addr_proc {axi_intf_path} {
     #align base address to range
     set offset [expr ($axi_peripheral_base + ($range-1)) & ~($range-1)]
     #perform assignment
-    assign_bd_address [get_bd_addr_segs $axi_intf_path/Reg] -offset $offset -range $range
+    assign_bd_address [get_bd_addr_segs $axi_intf_path/Reg*] -offset $offset -range $range
     #advance base address
     set axi_peripheral_base [expr $offset + $range]
 }
diff --git a/src/finn/transformation/fpgadataflow/vitis_build.py b/src/finn/transformation/fpgadataflow/vitis_build.py
index 4dce3ab16c38bfe5dd43f3e23b14ea2ec571f68c..855b30fe9573c534a13c961277ae4ab84507d619 100644
--- a/src/finn/transformation/fpgadataflow/vitis_build.py
+++ b/src/finn/transformation/fpgadataflow/vitis_build.py
@@ -30,10 +30,15 @@ import json
 import os
 import subprocess
 from enum import Enum
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.base import Transformation
+from qonnx.transformation.general import (
+    GiveReadableTensorNames,
+    GiveUniqueNodeNames,
+    RemoveUnusedTensors,
+)
 
-from finn.core.modelwrapper import ModelWrapper
-from finn.custom_op.registry import getCustomOp
-from finn.transformation.base import Transformation
 from finn.transformation.fpgadataflow.create_dataflow_partition import (
     CreateDataflowPartition,
 )
@@ -44,11 +49,6 @@ from finn.transformation.fpgadataflow.insert_dwc import InsertDWC
 from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
 from finn.transformation.fpgadataflow.insert_iodma import InsertIODMA
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.general import (
-    GiveReadableTensorNames,
-    GiveUniqueNodeNames,
-    RemoveUnusedTensors,
-)
 from finn.util.basic import make_build_dir
 
 from . import templates
diff --git a/src/finn/transformation/move_reshape.py b/src/finn/transformation/move_reshape.py
index 765d842997e1a388bd7d1e758f25dd861d2c4d4a..cec04a182b87e3af2c563a862554bfe026ad594a 100644
--- a/src/finn/transformation/move_reshape.py
+++ b/src/finn/transformation/move_reshape.py
@@ -1,8 +1,7 @@
 import warnings
-
-from finn.custom_op.registry import getCustomOp
-from finn.transformation.base import Transformation
-from finn.util.basic import get_by_name, is_finn_op
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.base import Transformation
+from qonnx.util.basic import get_by_name, is_finn_op
 
 
 def _is_fpgadataflow_node(node):
diff --git a/src/finn/transformation/qonnx/__init__.py b/src/finn/transformation/qonnx/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/finn/transformation/qonnx/convert_qonnx_to_finn.py b/src/finn/transformation/qonnx/convert_qonnx_to_finn.py
index 70656e4d0987924ba43d0e657414d0d172feb5ce..967a1276365e4af1a6d617c081b9c04b4710da97 100644
--- a/src/finn/transformation/qonnx/convert_qonnx_to_finn.py
+++ b/src/finn/transformation/qonnx/convert_qonnx_to_finn.py
@@ -26,12 +26,13 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+from qonnx.transformation.base import Transformation
+from qonnx.transformation.extract_conv_bias import ExtractBiasFromConv
+from qonnx.transformation.gemm_to_matmul import GemmToMatMul
+from qonnx.transformation.infer_datatypes import InferDataTypes
 from qonnx.transformation.quant_constant_folding import FoldTransposeIntoQuantInit
+from qonnx.transformation.remove import RemoveIdentityOps
 
-from finn.transformation.base import Transformation
-from finn.transformation.extract_conv_bias import ExtractBiasFromConv
-from finn.transformation.gemm_to_matmul import GemmToMatMul
-from finn.transformation.infer_datatypes import InferDataTypes
 from finn.transformation.qonnx.fold_quant_weights import FoldQuantWeights
 from finn.transformation.qonnx.infer_quant_avg_pool_2d import (
     AvgPoolAndTruncToQuantAvgPool,
@@ -40,7 +41,6 @@ from finn.transformation.qonnx.quant_act_to_multithreshold import (
     ConvertQuantActToMultiThreshold,
     default_filter_function_generator,
 )
-from finn.transformation.remove import RemoveIdentityOps
 
 
 class ConvertQONNXtoFINN(Transformation):
diff --git a/src/finn/transformation/qonnx/fold_quant_weights.py b/src/finn/transformation/qonnx/fold_quant_weights.py
index e8a0f418ae5eb587d6aabae57d8b379357d3a0ca..80b6042d03ea11a45493011288133ed3a6f57c8d 100644
--- a/src/finn/transformation/qonnx/fold_quant_weights.py
+++ b/src/finn/transformation/qonnx/fold_quant_weights.py
@@ -27,14 +27,13 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import numpy as np
+import qonnx.core.onnx_exec as oxe
 from onnx import TensorProto, helper
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.base import Transformation
+from qonnx.transformation.infer_shapes import InferShapes
 from qonnx.transformation.quant_constant_folding import FoldTransposeIntoQuantInit
-
-import finn.core.onnx_exec as oxe
-from finn.custom_op.registry import getCustomOp
-from finn.transformation.base import Transformation
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.remove import remove_node_and_rewire
+from qonnx.transformation.remove import remove_node_and_rewire
 
 
 class FoldQuantWeights(Transformation):
diff --git a/src/finn/transformation/qonnx/infer_quant_avg_pool_2d.py b/src/finn/transformation/qonnx/infer_quant_avg_pool_2d.py
index c234bd38d9679f72b6df73e81df57fba3e8d4554..5a3f176f1ff41f17bdf684d59433d4f184a7ccb5 100644
--- a/src/finn/transformation/qonnx/infer_quant_avg_pool_2d.py
+++ b/src/finn/transformation/qonnx/infer_quant_avg_pool_2d.py
@@ -29,13 +29,12 @@
 
 import math
 from onnx import TensorProto, helper
-
-from finn.core.datatype import DataType
-from finn.custom_op.registry import getCustomOp
-from finn.transformation.base import Transformation
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.infer_shapes import InferShapes
-from finn.util.basic import get_by_name
+from qonnx.core.datatype import DataType
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.base import Transformation
+from qonnx.transformation.infer_datatypes import InferDataTypes
+from qonnx.transformation.infer_shapes import InferShapes
+from qonnx.util.basic import get_by_name
 
 
 def _get_signed_from_upstream(model, trunc_node):
@@ -274,7 +273,7 @@ class AvgPoolAndTruncToQuantAvgPool(Transformation):
                             "QuantAvgPool2d",
                             [act_scale_div_tensor.name],
                             [act_scale_mul_tensor.name],
-                            domain="finn.custom_op.general",
+                            domain="qonnx.custom_op.general",
                             stride=stride,
                             kernel=k_s,
                             ibits=ibits,
diff --git a/src/finn/transformation/qonnx/qonnx_activation_handlers.py b/src/finn/transformation/qonnx/qonnx_activation_handlers.py
index c8bde7fea8ae8195001a7eccfd48baa4c48997ae..a50a5850779cadf7ab21b9c1c4dfdbb36232af42 100644
--- a/src/finn/transformation/qonnx/qonnx_activation_handlers.py
+++ b/src/finn/transformation/qonnx/qonnx_activation_handlers.py
@@ -29,16 +29,17 @@
 import numpy as np
 from abc import ABC, abstractmethod
 from onnx import TensorProto, helper
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.registry import getCustomOp
 
-from finn.core.modelwrapper import ModelWrapper
-from finn.custom_op.registry import getCustomOp
+np_default_dtype = np.float32
 
 
 class QuantActBaseHandler(ABC):
     """Base class for converting quantized activation expressed in the QONNX dialect
     to the FINN ONNX dialect.
     :param model: The model on which this handler should operate.
-    :type model: class: `finn.core.modelwrapper.ModelWrapper`
+    :type model: class: `qonnx.core.modelwrapper.ModelWrapper`
     :param quant_node: The Quant node which a given handler should replace.
     :param quant_node_index: The index of the Quant node in the given model.
     :type quant_node_index: `int`
@@ -146,7 +147,7 @@ class QuantActBaseHandler(ABC):
             [n.input[0], thresh_tensor.name],
             [n.output[0]],
             out_dtype="FLOAT32",
-            domain="finn.custom_op.general",
+            domain="qonnx.custom_op.general",
         )
         graph.node.insert(running_node_index, outp_trans_node)
         running_node_index += 1
@@ -164,17 +165,16 @@ class QuantActBaseHandler(ABC):
         if scale_scalar and bias_scalar and self._q_node.op_type == "BipolarQuant":
             # Get Quant parameters
             mul_scale = np.atleast_1d(mul_scale)
-            # ONNX only accepts 64bit floats as attributes
-            mul_scale = mul_scale.astype(dtype=np.float64)
             adder_bias = np.atleast_1d(adder_bias)
-            adder_bias = adder_bias.astype(dtype=np.float64)
 
             # Set Bias and scale
-            mt_inst.set_nodeattr("out_scale", mul_scale[0])
+            # note calls to .item() to get Python float instead of numpy float
+            # ONNX attribute setting fails otherwise
+            mt_inst.set_nodeattr("out_scale", mul_scale[0].item())
             # FINN applies scale first then bias,
             # which is the other way around in Brevitas,
             # we thus need to adjust the bias in the MultiThreshold node
-            finn_bias = adder_bias[0] * mul_scale[0]
+            finn_bias = adder_bias[0].item() * mul_scale[0].item()
             mt_inst.set_nodeattr("out_bias", finn_bias)
 
             # Set the output data type
@@ -190,8 +190,7 @@ class QuantActBaseHandler(ABC):
             zero_bias = False
             if bias_scalar:
                 adder_bias = np.atleast_1d(adder_bias)
-                # ONNX only accepts 64bit floats as attributes
-                adder_bias = adder_bias.astype(dtype=np.float64)[0]
+                adder_bias = adder_bias[0]
                 add_shape = tuple()
                 if adder_bias == 0.0:
                     zero_bias = True
@@ -234,7 +233,7 @@ class QuantActBaseHandler(ABC):
             unity_scale = False
             if scale_scalar:
                 mul_scale = np.atleast_1d(mul_scale)
-                mul_scale = mul_scale.astype(dtype=np.float64)[0]
+                mul_scale = mul_scale[0]
                 mul_shape = tuple()
                 if mul_scale == 1.0:
                     unity_scale = True
@@ -313,7 +312,7 @@ class QuantReluHandler(QuantActBaseHandler):
         # No bias allowed for Relu activations, see: https://github.com/Xilinx/
         # brevitas/blob/a5bfd6dc5e030f0047ac1ee47932b60e8e873e17/src/brevitas/
         # export/onnx/finn/handler/act.py#L48
-        bias = np.array([0.0])
+        bias = np.array([0.0], dtype=np_default_dtype)
         return bias
 
     def _calculate_thresholds(self):
@@ -339,7 +338,9 @@ class QuantReluHandler(QuantActBaseHandler):
         num_scale_channels = flat_scale.shape[0]
         step = np.abs(flat_scale).astype(np.float32)
         min_threshold = step / 2
-        thresholds = np.empty((num_scale_channels, num_thresholds)).astype(np.float32)
+        thresholds = np.empty(
+            (num_scale_channels, num_thresholds), dtype=np_default_dtype
+        )
         for c in range(num_scale_channels):
             for t in range(num_thresholds):
                 thresholds[c][t] = min_threshold[c] + step[c] * t
@@ -438,13 +439,13 @@ class QuantIdentityHandler(QuantActBaseHandler):
         # a5bfd6dc5e030f0047ac1ee47932b60e8e873e17/src/brevitas/export/
         # onnx/finn/handler/act.py#L64
         if bit_width == 1.0:
-            bias = np.array([-0.5])
+            bias = np.array([-0.5], dtype=np_default_dtype)
         else:
             if narrow:
                 min_non_scaled_val = -(2 ** (bit_width - 1) - 1)
             else:
                 min_non_scaled_val = -(2 ** (bit_width - 1))
-            bias = np.array([min_non_scaled_val])
+            bias = np.array([min_non_scaled_val], dtype=np_default_dtype)
         return bias
 
     def _calculate_thresholds(self):
@@ -463,7 +464,7 @@ class QuantIdentityHandler(QuantActBaseHandler):
         # blob/a5bfd6dc5e030f0047ac1ee47932b60e8e873e17/src/brevitas/
         # export/onnx/finn/handler/act.py#L76
         if bit_width == 1.0:
-            thresholds = np.empty([1, 1])
+            thresholds = np.empty([1, 1], dtype=np_default_dtype)
             thresholds[0] = 0
             return thresholds
         else:
@@ -477,7 +478,9 @@ class QuantIdentityHandler(QuantActBaseHandler):
             num_scale_channels = flat_scale.shape[0]
             step = np.abs(flat_scale)
             half_step = step / 2.0
-            thresholds = np.empty((num_scale_channels, num_thresholds))
+            thresholds = np.empty(
+                (num_scale_channels, num_thresholds), dtype=np_default_dtype
+            )
             # compute the value of the smallest threshold, we'll neg-bias all
             # generated thresholds by this much
             min_threshold = -half_step - step * ((num_thresholds // 2) - 1)
diff --git a/src/finn/transformation/qonnx/quant_act_to_multithreshold.py b/src/finn/transformation/qonnx/quant_act_to_multithreshold.py
index 29ba93dfcfe6d18e0ff8927b6d646cb310d0262a..c52d69b0f09d306c5b076bb6ef1775f38977241a 100644
--- a/src/finn/transformation/qonnx/quant_act_to_multithreshold.py
+++ b/src/finn/transformation/qonnx/quant_act_to_multithreshold.py
@@ -28,8 +28,8 @@
 
 
 import warnings
+from qonnx.transformation.base import Transformation
 
-from finn.transformation.base import Transformation
 from finn.transformation.qonnx.qonnx_activation_handlers import QuantActBaseHandler
 
 
diff --git a/src/finn/transformation/streamline/__init__.py b/src/finn/transformation/streamline/__init__.py
index d0ec26a4d10c688db7931e40d7cfd840394b55a1..2e68de698bb9ed67d77f9dc9c6c0e2559075e0e2 100644
--- a/src/finn/transformation/streamline/__init__.py
+++ b/src/finn/transformation/streamline/__init__.py
@@ -30,16 +30,17 @@ from pkgutil import extend_path
 
 __path__ = extend_path(__path__, __name__)
 
-from finn.transformation.base import Transformation
-from finn.transformation.batchnorm_to_affine import BatchNormToAffine
-from finn.transformation.general import (
+from qonnx.transformation.base import Transformation
+from qonnx.transformation.batchnorm_to_affine import BatchNormToAffine
+from qonnx.transformation.general import (
     ConvertDivToMul,
     ConvertSubToAdd,
     GiveReadableTensorNames,
     GiveUniqueNodeNames,
 )
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.remove import RemoveIdentityOps
+from qonnx.transformation.infer_datatypes import InferDataTypes
+from qonnx.transformation.remove import RemoveIdentityOps
+
 from finn.transformation.streamline.absorb import (
     Absorb1BitMulIntoConv,
     Absorb1BitMulIntoMatMul,
diff --git a/src/finn/transformation/streamline/absorb.py b/src/finn/transformation/streamline/absorb.py
index 32e539d87045520044378b94fd0e3c71486990c7..0299c4f4d89d1fdd94434db77c77a0e529c86d26 100644
--- a/src/finn/transformation/streamline/absorb.py
+++ b/src/finn/transformation/streamline/absorb.py
@@ -27,16 +27,15 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import numpy as np
+import qonnx.core.data_layout as DataLayout
 import warnings
 from onnx import helper as oh
-
-import finn.core.data_layout as DataLayout
-from finn.core.datatype import DataType
-from finn.custom_op.registry import getCustomOp
-from finn.transformation.base import Transformation
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.infer_shapes import InferShapes
-from finn.util.basic import get_by_name
+from qonnx.core.datatype import DataType
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.base import Transformation
+from qonnx.transformation.infer_datatypes import InferDataTypes
+from qonnx.transformation.infer_shapes import InferShapes
+from qonnx.util.basic import get_by_name
 
 
 class AbsorbSignBiasIntoMultiThreshold(Transformation):
diff --git a/src/finn/transformation/streamline/collapse_repeated.py b/src/finn/transformation/streamline/collapse_repeated.py
index 92c48c84ffa1a161f623ef6b22caaeb92f4a8199..d297110186c299b87ca04799497aab41f8e6f814 100644
--- a/src/finn/transformation/streamline/collapse_repeated.py
+++ b/src/finn/transformation/streamline/collapse_repeated.py
@@ -27,10 +27,9 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 from onnx import helper as oh
-
-from finn.core.datatype import DataType
-from finn.transformation.base import Transformation
-from finn.transformation.infer_shapes import InferShapes
+from qonnx.core.datatype import DataType
+from qonnx.transformation.base import Transformation
+from qonnx.transformation.infer_shapes import InferShapes
 
 
 class CollapseRepeatedOp(Transformation):
diff --git a/src/finn/transformation/streamline/reorder.py b/src/finn/transformation/streamline/reorder.py
index e922dffe37691a39434e9ebafa5df6d1a11d389e..9ff8a2173ce81e2a19c56bbd20a326759c3b9df2 100644
--- a/src/finn/transformation/streamline/reorder.py
+++ b/src/finn/transformation/streamline/reorder.py
@@ -27,20 +27,19 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import numpy as np
+import qonnx.core.data_layout as DataLayout
 import warnings
 from onnx import TensorProto
 from onnx import helper as oh
-
-import finn.core.data_layout as DataLayout
-from finn.core.datatype import DataType
-from finn.core.onnx_exec import execute_node
-from finn.custom_op.registry import getCustomOp
-from finn.transformation.base import Transformation
-from finn.transformation.general import SortGraph
-from finn.transformation.infer_data_layouts import InferDataLayouts
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.infer_shapes import InferShapes
-from finn.util.basic import get_by_name
+from qonnx.core.datatype import DataType
+from qonnx.core.onnx_exec import execute_node
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.base import Transformation
+from qonnx.transformation.general import SortGraph
+from qonnx.transformation.infer_data_layouts import InferDataLayouts
+from qonnx.transformation.infer_datatypes import InferDataTypes
+from qonnx.transformation.infer_shapes import InferShapes
+from qonnx.util.basic import get_by_name
 
 
 class MoveAddPastMul(Transformation):
@@ -678,7 +677,7 @@ class MakeMaxPoolNHWC(Transformation):
                                 0  # default to ceil_mode=0 (equivalent to np.floor)
                             )
                         n.op_type = "MaxPoolNHWC"
-                        n.domain = "finn.custom_op.general"
+                        n.domain = "qonnx.custom_op.general"
                         start_name = n.input[0]
                         mid_name = consumer.input[0]
                         end_name = consumer.output[0]
@@ -705,7 +704,7 @@ class MakeMaxPoolNHWC(Transformation):
                                 0  # default to ceil_mode=0 (equivalent to np.floor)
                             )
                         n.op_type = "MaxPoolNHWC"
-                        n.domain = "finn.custom_op.general"
+                        n.domain = "qonnx.custom_op.general"
                         start_name = producer.input[0]
                         mid_name = n.input[0]
                         end_name = n.output[0]
diff --git a/src/finn/transformation/streamline/round_thresholds.py b/src/finn/transformation/streamline/round_thresholds.py
index ba476504a4213a7d004113c39e2285beeecdddec..601dab04cb2e4f71c6b9f00c0c58fbe1cd5c390d 100644
--- a/src/finn/transformation/streamline/round_thresholds.py
+++ b/src/finn/transformation/streamline/round_thresholds.py
@@ -27,8 +27,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import numpy as np
-
-from finn.transformation.base import Transformation
+from qonnx.transformation.base import Transformation
 
 
 class RoundAndClipThresholds(Transformation):
diff --git a/src/finn/transformation/streamline/sign_to_thres.py b/src/finn/transformation/streamline/sign_to_thres.py
index 61d7eb35430262b1ee90dfa478076fb6f7556612..eafc071fb6b0d2b75ca5742da804aacc5ab39975 100644
--- a/src/finn/transformation/streamline/sign_to_thres.py
+++ b/src/finn/transformation/streamline/sign_to_thres.py
@@ -28,9 +28,8 @@
 
 import numpy as np
 from onnx import helper as oh
-
-from finn.core.datatype import DataType
-from finn.transformation.base import Transformation
+from qonnx.core.datatype import DataType
+from qonnx.transformation.base import Transformation
 
 
 class ConvertSignToThres(Transformation):
@@ -60,7 +59,7 @@ class ConvertSignToThres(Transformation):
                     "MultiThreshold",
                     [sign_in_name, thres_param_name],
                     [sign_out_name],
-                    domain="finn.custom_op.general",
+                    domain="qonnx.custom_op.general",
                     out_scale=2.0,
                     out_bias=-1.0,
                     out_dtype="BIPOLAR",
diff --git a/src/finn/util/__init__.py b/src/finn/util/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/finn/util/basic.py b/src/finn/util/basic.py
new file mode 100644
index 0000000000000000000000000000000000000000..c90985ebc9932c56c840e34464b838f3141c79a8
--- /dev/null
+++ b/src/finn/util/basic.py
@@ -0,0 +1,218 @@
+# Copyright (c) 2020 Xilinx, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of Xilinx nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import subprocess
+import sys
+import tempfile
+
+# mapping from PYNQ board names to FPGA part names
+pynq_part_map = dict()
+pynq_part_map["Ultra96"] = "xczu3eg-sbva484-1-e"
+pynq_part_map["Pynq-Z1"] = "xc7z020clg400-1"
+pynq_part_map["Pynq-Z2"] = "xc7z020clg400-1"
+pynq_part_map["ZCU102"] = "xczu9eg-ffvb1156-2-e"
+pynq_part_map["ZCU104"] = "xczu7ev-ffvc1156-2-e"
+pynq_part_map["ZCU111"] = "xczu28dr-ffvg1517-2-e"
+pynq_part_map["RFSoC2x2"] = "xczu28dr-ffvg1517-2-e"
+
+# native AXI HP port width (in bits) for PYNQ boards
+pynq_native_port_width = dict()
+pynq_native_port_width["Pynq-Z1"] = 64
+pynq_native_port_width["Pynq-Z2"] = 64
+pynq_native_port_width["Ultra96"] = 128
+pynq_native_port_width["ZCU102"] = 128
+pynq_native_port_width["ZCU104"] = 128
+pynq_native_port_width["ZCU111"] = 128
+pynq_native_port_width["RFSoC2x2"] = 128
+
+# Alveo device and platform mappings
+alveo_part_map = dict()
+alveo_part_map["U50"] = "xcu50-fsvh2104-2L-e"
+alveo_part_map["U200"] = "xcu200-fsgd2104-2-e"
+alveo_part_map["U250"] = "xcu250-figd2104-2L-e"
+alveo_part_map["U280"] = "xcu280-fsvh2892-2L-e"
+
+alveo_default_platform = dict()
+alveo_default_platform["U50"] = "xilinx_u50_gen3x16_xdma_201920_3"
+alveo_default_platform["U200"] = "xilinx_u200_xdma_201830_2"
+alveo_default_platform["U250"] = "xilinx_u250_xdma_201830_2"
+alveo_default_platform["U280"] = "xilinx_u280_xdma_201920_3"
+
+
+def get_rtlsim_trace_depth():
+    """Return the trace depth for rtlsim via PyVerilator. Controllable
+    via the RTLSIM_TRACE_DEPTH environment variable. If the env.var. is
+    undefined, the default value of 1 is returned. A trace depth of 1
+    will only show top-level signals and yield smaller .vcd files.
+
+    The following depth values are of interest for whole-network stitched IP
+    rtlsim:
+    - level 1 shows top-level input/output streams
+    - level 2 shows per-layer input/output streams
+    - level 3 shows per full-layer I/O including FIFO count signals
+    """
+
+    try:
+        return int(os.environ["RTLSIM_TRACE_DEPTH"])
+    except KeyError:
+        return 1
+
+
+def get_remote_vivado():
+    """Return the address of the remote Vivado synthesis server as set by the,
+    REMOTE_VIVADO environment variable, otherwise return None"""
+
+    try:
+        return os.environ["REMOTE_VIVADO"]
+    except KeyError:
+        return None
+
+
+def get_finn_root():
+    "Return the root directory that FINN is cloned into."
+
+    try:
+        return os.environ["FINN_ROOT"]
+    except KeyError:
+        raise Exception(
+            """Environment variable FINN_ROOT must be set
+        correctly. Please ensure you have launched the Docker contaier correctly.
+        """
+        )
+
+
+def pyverilate_get_liveness_threshold_cycles():
+    """Return the number of no-output cycles rtlsim will wait before assuming
+    the simulation is not finishing and throwing an exception."""
+
+    return int(os.getenv("LIVENESS_THRESHOLD", 10000))
+
+
+def make_build_dir(prefix=""):
+    """Creates a folder with given prefix to be used as a build dir.
+    Use this function instead of tempfile.mkdtemp to ensure any generated files
+    will survive on the host after the FINN Docker container exits."""
+    try:
+        tmpdir = tempfile.mkdtemp(prefix=prefix)
+        newdir = tmpdir.replace("/tmp", os.environ["FINN_BUILD_DIR"])
+        os.makedirs(newdir)
+        return newdir
+    except KeyError:
+        raise Exception(
+            """Environment variable FINN_BUILD_DIR must be set
+        correctly. Please ensure you have launched the Docker contaier correctly.
+        """
+        )
+
+
+class CppBuilder:
+    """Builds the g++ compiler command to produces the executable of the c++ code
+    in code_gen_dir which is passed to the function build() of this class."""
+
+    def __init__(self):
+        self.include_paths = []
+        self.cpp_files = []
+        self.executable_path = ""
+        self.code_gen_dir = ""
+        self.compile_components = []
+        self.compile_script = ""
+
+    def append_includes(self, library_path):
+        """Adds given library path to include_paths list."""
+        self.include_paths.append(library_path)
+
+    def append_sources(self, cpp_file):
+        """Adds given c++ file to cpp_files list."""
+        self.cpp_files.append(cpp_file)
+
+    def set_executable_path(self, path):
+        """Sets member variable "executable_path" to given path."""
+        self.executable_path = path
+
+    def build(self, code_gen_dir):
+        """Builds the g++ compiler command according to entries in include_paths
+        and cpp_files lists. Saves it in bash script in given folder and
+        executes it."""
+        # raise error if includes are empty
+        self.code_gen_dir = code_gen_dir
+        self.compile_components.append("g++ -o " + str(self.executable_path))
+        for cpp_file in self.cpp_files:
+            self.compile_components.append(cpp_file)
+        for lib in self.include_paths:
+            self.compile_components.append(lib)
+        bash_compile = ""
+        for component in self.compile_components:
+            bash_compile += str(component) + " "
+        self.compile_script = str(self.code_gen_dir) + "/compile.sh"
+        with open(self.compile_script, "w") as f:
+            f.write("#!/bin/bash \n")
+            f.write(bash_compile + "\n")
+        bash_command = ["bash", self.compile_script]
+        process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE)
+        process_compile.communicate()
+
+
+def launch_process_helper(args, proc_env=None, cwd=None):
+    """Helper function to launch a process in a way that facilitates logging
+    stdout/stderr with Python loggers.
+    Returns (cmd_out, cmd_err)."""
+    if proc_env is None:
+        proc_env = os.environ.copy()
+    with subprocess.Popen(
+        args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=proc_env, cwd=cwd
+    ) as proc:
+        (cmd_out, cmd_err) = proc.communicate()
+    if cmd_out is not None:
+        cmd_out = cmd_out.decode("utf-8")
+        sys.stdout.write(cmd_out)
+    if cmd_err is not None:
+        cmd_err = cmd_err.decode("utf-8")
+        sys.stderr.write(cmd_err)
+    return (cmd_out, cmd_err)
+
+
+def which(program):
+    "Python equivalent of the shell cmd 'which'."
+
+    # source:
+    # https://stackoverflow.com/questions/377017/test-if-executable-exists-in-python
+    def is_exe(fpath):
+        return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
+
+    fpath, fname = os.path.split(program)
+    if fpath:
+        if is_exe(program):
+            return program
+    else:
+        for path in os.environ["PATH"].split(os.pathsep):
+            exe_file = os.path.join(path, program)
+            if is_exe(exe_file):
+                return exe_file
+
+    return None
diff --git a/src/finn/util/create.py b/src/finn/util/create.py
index 46bf9980d55e18396809075907fa3e365d426a3d..a8c2e67b385b797905cd4c5a196091069898b583 100644
--- a/src/finn/util/create.py
+++ b/src/finn/util/create.py
@@ -28,10 +28,9 @@
 
 import numpy as np
 from onnx import TensorProto, helper
-
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
-from finn.util.basic import calculate_signed_dot_prod_range, gen_finn_dt_tensor
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.util.basic import calculate_signed_dot_prod_range, gen_finn_dt_tensor
 
 
 def hls_random_mlp_maker(layer_spec):
diff --git a/src/finn/util/data_packing.py b/src/finn/util/data_packing.py
new file mode 100644
index 0000000000000000000000000000000000000000..65478d2540b53443d3f74b44a22fde3defd8ca93
--- /dev/null
+++ b/src/finn/util/data_packing.py
@@ -0,0 +1,456 @@
+# Copyright (c) 2020 Xilinx, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of Xilinx nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import binascii
+import numpy as np
+import os
+import sys
+from bitstring import BitArray
+from qonnx.core.datatype import DataType
+from qonnx.util.basic import roundup_to_integer_multiple
+
+
+def array2hexstring(array, dtype, pad_to_nbits, prefix="0x", reverse=False):
+    """
+    Pack given one-dimensional NumPy array with FINN DataType dtype into a hex
+    string.
+    Any BIPOLAR values will be converted to a single bit with a 0 representing
+    -1.
+    pad_to_nbits is used to prepend leading zeros to ensure packed strings of
+    fixed width. The minimum value for pad_to_nbits is 4, since a single hex
+    digit is four bits. reverse can be used to reverse the array prior to
+    packing.
+
+    Examples:
+
+    array2hexstring([1, 1, 1, 0], DataType["BINARY"], 4) = "0xe"
+
+    array2hexstring([1, 1, 1, 0], DataType["BINARY"], 8) = "0x0e"
+
+    array2hexstring([1, 1, 0, 1], DataType["BINARY"], 4, reverse=True) = "0xb"
+
+    array2hexstring([1, 1, 1, 0], DataType["BINARY"], 8, reverse=True) = "0x07"
+    """
+    if pad_to_nbits < 4:
+        pad_to_nbits = 4
+    # ensure input is a numpy array with float values
+    if type(array) != np.ndarray or array.dtype != np.float32:
+        # try to convert to a float numpy array (container dtype is float)
+        array = np.asarray(array, dtype=np.float32)
+    # ensure one-dimensional array to pack
+    assert array.ndim == 1, "The given array is not one-dimensional."
+    if dtype == DataType["BIPOLAR"]:
+        # convert bipolar values to binary
+        array = (array + 1) / 2
+        dtype = DataType["BINARY"]
+    # reverse prior to packing, if desired
+    if reverse:
+        array = np.flip(array, -1)
+    lineval = BitArray(length=0)
+    bw = dtype.bitwidth()
+    # special handling for fixed point: rescale, then pack as integers
+    if dtype.is_fixed_point():
+        sf = dtype.scale_factor()
+        array = array / sf
+        # replace dtype with signed integer equivalent
+        dtype = DataType["INT" + str(bw)]
+    for val in array:
+        # ensure that this value is permitted by chosen dtype
+        assert dtype.allowed(val), "This value is not permitted by chosen dtype."
+        if dtype.is_integer():
+            if dtype.signed():
+                lineval.append(BitArray(int=int(val), length=bw))
+            else:
+                lineval.append(BitArray(uint=int(val), length=bw))
+        else:
+            lineval.append(BitArray(float=val, length=bw))
+    if pad_to_nbits >= lineval.len:
+        # extend to the desired output width (a minimum of 4 bits)
+        lineval.prepend(BitArray(length=pad_to_nbits - lineval.len))
+    else:
+        raise Exception("Number of bits is greater than pad_to_nbits")
+    # represent as hex
+    return prefix + lineval.hex
+
+
+def hexstring2npbytearray(hexstring, remove_prefix="0x"):
+    """Convert a hex string into a NumPy array of dtype uint8.
+
+    Example:
+
+    hexstring2npbytearray("0f01") = array([15,  1], dtype=uint8)
+    """
+    # remove prefix if found
+    if hexstring.startswith(remove_prefix):
+        lrp = len(remove_prefix)
+        hexstring = hexstring[lrp:]
+    # use Python's built-in bytearray
+    return np.asarray(bytearray.fromhex(hexstring), dtype=np.uint8)
+
+
+def npbytearray2hexstring(npbytearray, prefix="0x"):
+    """Convert a NumPy array of uint8 dtype into a hex string.
+
+    Example:
+
+    npbytearray2hexstring(array([15,  1], dtype=uint8)) = "0x0f01"
+    """
+    return prefix + binascii.hexlify(bytearray(npbytearray)).decode("utf-8")
+
+
+def pack_innermost_dim_as_hex_string(
+    ndarray, dtype, pad_to_nbits, reverse_inner=False, prefix="0x"
+):
+    """Pack the innermost dimension of the given numpy ndarray into hex
+    strings using array2hexstring.
+
+    Examples:
+
+    A = [[1, 1, 1, 0], [0, 1, 1, 0]]
+
+    eA = ["0e", "06"]
+
+    pack_innermost_dim_as_hex_string(A, DataType["BINARY"], 8) == eA
+
+    B = [[[3, 3], [3, 3]], [[1, 3], [3, 1]]]
+
+    eB = [[ "0f", "0f"], ["07", "0d"]]
+
+    pack_innermost_dim_as_hex_string(B, DataType["UINT2"], 8) == eB
+    """
+
+    if type(ndarray) != np.ndarray or ndarray.dtype != np.float32:
+        # try to convert to a float numpy array (container dtype is float)
+        ndarray = np.asarray(ndarray, dtype=np.float32)
+
+    def fun(x):
+        return array2hexstring(
+            x, dtype, pad_to_nbits, reverse=reverse_inner, prefix=prefix
+        )
+
+    return np.apply_along_axis(fun, ndarray.ndim - 1, ndarray)
+
+
+def unpack_innermost_dim_from_hex_string(
+    ndarray, dtype, out_shape, packedBits, reverse_inner=False
+):
+    """Convert a NumPy array of hex strings into a FINN NumPy array by unpacking
+    the hex strings into the specified data type. out_shape can be specified
+    such that any padding in the packing dimension is removed. If reverse_inner
+    is set, the innermost unpacked dimension will be reversed."""
+
+    if type(ndarray) != np.ndarray:
+        raise Exception(
+            """unpack_innermost_dim_from_hex_string needs ndarray
+        as input"""
+        )
+    if ndarray.dtype.kind not in {"U", "S"}:
+        raise Exception(
+            """unpack_innermost_dim_from_hex_string needs ndarray of
+        hex strings as input"""
+        )
+    # convert ndarray into flattened list
+    data = ndarray.flatten().tolist()
+    targetBits = dtype.bitwidth()
+    # calculate outer and inner dim shapes
+    outer_dim_elems = 1
+    for dim in range(len(out_shape) - 1):
+        outer_dim_elems = outer_dim_elems * out_shape[dim]
+    inner_dim_elems = out_shape[-1]
+
+    array = []
+    if dtype.is_fixed_point():
+        # convert fixed point as signed integer
+        conv_dtype = DataType["INT" + str(targetBits)]
+    else:
+        conv_dtype = dtype
+    for outer_elem in range(outer_dim_elems):
+        ar_list = []
+        ar_elem = data[0]
+        data.pop(0)
+        ar_elem = ar_elem.split("x")
+        ar_elem_bin = bin(int(ar_elem[1], 16))[2:].zfill(packedBits)
+        ar_elem_bin = [int(x) for x in ar_elem_bin]
+
+        ar_elem_bin.reverse()
+        for i in range(inner_dim_elems):
+            upper_limit = (i + 1) * targetBits
+            lower_limit = i * targetBits
+            elem = ar_elem_bin[lower_limit:upper_limit]
+            elem.reverse()
+            elem_str = "".join(map(str, elem))
+            if conv_dtype == DataType["FLOAT32"]:
+                ar_list.append(BitArray(bin=elem_str).float)
+            elif conv_dtype.is_integer():
+                ar_list.append(int(elem_str, 2))
+            else:
+                raise Exception("Not implemented for conv_dtype " + conv_dtype.name)
+        # reverse inner dimension back to "normal" positions
+        if reverse_inner is False:
+            ar_list.reverse()
+
+        # interpret output values correctly
+
+        # interpret values as bipolar
+        if conv_dtype == DataType["BIPOLAR"]:
+            ar_list = [2 * x - 1 for x in ar_list]
+        # interpret values as signed values
+        elif conv_dtype.name.startswith("INT"):
+            mask = 2 ** (conv_dtype.bitwidth() - 1)
+            ar_list = [-(x & mask) + (x & ~mask) for x in ar_list]
+
+        array.append(ar_list)
+    array = np.asarray(array, dtype=np.float32).reshape(out_shape)
+    if dtype.is_fixed_point():
+        # convert signed integer to fixed point by applying scale
+        array = array * dtype.scale_factor()
+    return array
+
+
+def numpy_to_hls_code(
+    ndarray, dtype, hls_var_name, pack_innermost_dim=True, no_decl=False
+):
+    """Return C++ code representation of a numpy ndarray with FINN DataType
+    dtype, using hls_var_name as the resulting C++ variable name. If
+    pack_innermost_dim is specified, the innermost dimension of the ndarray
+    will be packed into a hex string using array2hexstring. If no_decl is
+    set to True, no variable name and type will be generated as part of the
+    emitted string.
+    """
+    hls_dtype = dtype.get_hls_datatype_str()
+    if type(ndarray) != np.ndarray or ndarray.dtype != np.float32:
+        # try to convert to a float numpy array (container dtype is float)
+        ndarray = np.asarray(ndarray, dtype=np.float32)
+    if pack_innermost_dim:
+        idimlen = ndarray.shape[-1]
+        idimbits = idimlen * dtype.bitwidth()
+        idimbits = roundup_to_integer_multiple(idimbits, 4)
+        ndarray = pack_innermost_dim_as_hex_string(ndarray, dtype, idimbits)
+        hls_dtype = "ap_uint<%d>" % idimbits
+    ndims = ndarray.ndim
+    # add type string and variable name
+    # e.g. "const ap_uint<64>" "weightMem0"
+    ret = "%s %s" % (hls_dtype, hls_var_name)
+    # add dimensions
+    for d in range(ndims):
+        ret += "[%d]" % ndarray.shape[d]
+    orig_printops = np.get_printoptions()
+    np.set_printoptions(threshold=sys.maxsize)
+
+    # define a function to convert a single element into a C++ init string
+    # a single element can be a hex string if we are using packing
+    def elem2str(x):
+        if type(x) == str or type(x) == np.str_ or type(x) == np.str:
+            return '%s("%s", 16)' % (hls_dtype, x)
+        elif type(x) == np.float32:
+            if dtype.is_integer():
+                return str(int(x))
+            else:
+                return str(x)
+        else:
+            raise Exception("Unsupported type for numpy_to_hls_code")
+
+    strarr = np.array2string(ndarray, separator=", ", formatter={"all": elem2str})
+    np.set_printoptions(**orig_printops)
+    strarr = strarr.replace("[", "{").replace("]", "}")
+    if no_decl:
+        ret = strarr + ";"
+    else:
+        ret = ret + " = \n" + strarr + ";"
+    return ret
+
+
+def npy_to_rtlsim_input(input_file, input_dtype, pad_to_nbits, reverse_inner=True):
+    """Convert the multidimensional NumPy array of integers (stored as floats)
+    from input_file into a flattened sequence of Python arbitrary-precision
+    integers, packing the innermost dimension. See
+    finn.util.basic.pack_innermost_dim_as_hex_string() for more info on how the
+    packing works. If reverse_inner is set, the innermost dimension will be
+    reversed prior to packing."""
+    pad_to_nbits = roundup_to_integer_multiple(pad_to_nbits, 4)
+    if issubclass(type(input_file), np.ndarray):
+        inp = input_file
+    elif os.path.isfile(input_file):
+        inp = np.load(input_file)
+    else:
+        raise Exception("input_file must be ndarray or filename for .npy")
+    if inp.shape[-1] == 1 and input_dtype.is_integer():
+        packed_data = inp.flatten().astype(input_dtype.to_numpy_dt())
+        packed_data = [int(x) for x in packed_data]
+    else:
+        packed_data = pack_innermost_dim_as_hex_string(
+            inp, input_dtype, pad_to_nbits, reverse_inner=reverse_inner
+        )
+        packed_data = packed_data.flatten()
+        packed_data = [int(x[2:], 16) for x in packed_data]
+    return packed_data
+
+
+def rtlsim_output_to_npy(
+    output, path, dtype, shape, packedBits, targetBits, reverse_inner=True
+):
+    """Convert a flattened sequence of Python arbitrary-precision integers
+    output into a NumPy array, saved as npy file at path. Each arbitrary-precision
+    integer is assumed to be a packed array of targetBits-bit elements, which
+    will be unpacked as the innermost dimension of the NumPy array. If path is
+    not None it will also be saved as a npy file."""
+
+    # TODO should have its own testbench?
+    output = np.asarray([hex(int(x)) for x in output])
+    out_array = unpack_innermost_dim_from_hex_string(
+        output, dtype, shape, packedBits=packedBits, reverse_inner=reverse_inner
+    )
+    # make copy before saving the array
+    out_array = out_array.copy()
+    if path is not None:
+        np.save(path, out_array)
+    return out_array
+
+
+def finnpy_to_packed_bytearray(
+    ndarray, dtype, reverse_inner=False, reverse_endian=False, fast_mode=False
+):
+    """Given a numpy ndarray with FINN DataType dtype, pack the innermost
+    dimension and return the packed representation as an ndarray of uint8.
+    The packed innermost dimension will be padded to the nearest multiple
+    of 8 bits. The returned ndarray has the same number of dimensions as the
+    input.
+
+    If fast_mode is enabled, will attempt to use shortcuts  to save
+    on runtime for certain cases:
+    * 8-bit ndarray -> 8-bit
+    * ndarray -> 1-bit and total bits % 8 == 0
+    This mode is currently not well-tested, use at your own risk!
+    """
+
+    # handle fast_mode cases (currently only called from driver):
+    if issubclass(type(ndarray), np.ndarray) and fast_mode:
+        inp_is_byte = ndarray.dtype in [np.uint8, np.int8]
+        out_is_byte = dtype.bitwidth() == 8
+        double_reverse = reverse_inner and reverse_endian
+        # fast mode case: byte -> byte: cast
+        if inp_is_byte and out_is_byte and double_reverse:
+            return ndarray.view(np.uint8)
+        # fast mode case: xxx -> bit with nbits % 8 == 0: np.packbits
+        out_is_bit = dtype.bitwidth() == 1
+        bits = dtype.bitwidth() * ndarray.shape[-1]
+        bits_padded = roundup_to_integer_multiple(bits, 8)
+        no_pad = bits_padded == bits
+        if out_is_bit and no_pad and double_reverse:
+            in_as_int8 = ndarray.astype(np.int8)
+            # bipolar -> binary if needed
+            if dtype == DataType["BIPOLAR"]:
+                in_as_int8 = (in_as_int8 + 1) // 2
+            # reverse inner
+            in_as_int8 = np.flip(in_as_int8, axis=-1)
+            # pack with numpy
+            packed_data = np.packbits(in_as_int8, axis=-1)
+            # reverse endianness and return
+            return np.flip(packed_data, axis=-1)
+
+    if (not issubclass(type(ndarray), np.ndarray)) or ndarray.dtype != np.float32:
+        # try to convert to a float numpy array (container dtype is float)
+        ndarray = np.asarray(ndarray, dtype=np.float32)
+    # pack innermost dim to hex strings padded to 8 bits
+    bits = dtype.bitwidth() * ndarray.shape[-1]
+    bits_padded = roundup_to_integer_multiple(bits, 8)
+    packed_hexstring = pack_innermost_dim_as_hex_string(
+        ndarray, dtype, bits_padded, reverse_inner=reverse_inner
+    )
+
+    def fn(x):
+        return np.asarray(list(map(hexstring2npbytearray, x)))
+
+    if packed_hexstring.ndim == 0:
+        # scalar, call hexstring2npbytearray directly
+        ret = hexstring2npbytearray(np.asscalar(packed_hexstring))
+    else:
+        # convert ndarray of hex strings to byte array
+        ret = np.apply_along_axis(fn, packed_hexstring.ndim - 1, packed_hexstring)
+    if reverse_endian:
+        # reverse the endianness of packing dimension
+        ret = np.flip(ret, axis=-1)
+    return ret
+
+
+def packed_bytearray_to_finnpy(
+    packed_bytearray,
+    dtype,
+    output_shape=None,
+    reverse_inner=False,
+    reverse_endian=False,
+    fast_mode=False,
+):
+    """Given a packed numpy uint8 ndarray, unpack it into a FINN array of
+    given DataType.
+
+    output_shape can be specified to remove padding from the
+    packed dimension, or set to None to be inferred from the input.
+
+    If fast_mode is enabled, will attempt to use shortcuts (casting) to save
+    on runtime for certain cases.
+    This mode is currently not well-tested, use at your own risk.
+
+    """
+
+    if (
+        not issubclass(type(packed_bytearray), np.ndarray)
+    ) or packed_bytearray.dtype != np.uint8:
+        raise Exception("packed_bytearray_to_finnpy needs NumPy uint8 arrays")
+    if packed_bytearray.ndim == 0:
+        raise Exception("packed_bytearray_to_finnpy expects at least 1D ndarray")
+    packed_dim = packed_bytearray.ndim - 1
+    packed_bits = packed_bytearray.shape[packed_dim] * 8
+    target_bits = dtype.bitwidth()
+    if output_shape is None:
+        # determine output shape from input shape
+        assert (
+            packed_bits % target_bits == 0
+        ), """packed_bits are not divisable by
+        target_bits."""
+        n_target_elems = packed_bits // target_bits
+        output_shape = packed_bytearray.shape[:-1] + (n_target_elems,)
+    # handle no-packing cases (if fast_mode) via casting to save on compute
+    out_is_byte = target_bits in [8, 16]
+    double_reverse = reverse_inner and reverse_endian
+    if out_is_byte and double_reverse and fast_mode:
+        no_unpad = np.prod(packed_bytearray.shape) == np.prod(output_shape)
+        if no_unpad:
+            as_np_type = packed_bytearray.view(dtype.to_numpy_dt())
+            return as_np_type.reshape(output_shape).astype(np.float32)
+    if reverse_endian:
+        packed_bytearray = np.flip(packed_bytearray, axis=-1)
+    # convert innermost dim of byte array to hex strings
+    packed_hexstring = np.apply_along_axis(
+        npbytearray2hexstring, packed_dim, packed_bytearray
+    )
+    ret = unpack_innermost_dim_from_hex_string(
+        packed_hexstring, dtype, output_shape, packed_bits, reverse_inner
+    )
+
+    return ret
diff --git a/src/finn/util/fpgadataflow.py b/src/finn/util/fpgadataflow.py
new file mode 100644
index 0000000000000000000000000000000000000000..769ddb94657f8bc9bed858a784fee19a1767d2d5
--- /dev/null
+++ b/src/finn/util/fpgadataflow.py
@@ -0,0 +1,43 @@
+# Copyright (c) 2020 Xilinx, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of Xilinx nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from qonnx.util.basic import get_by_name, is_finn_op
+
+
+def is_fpgadataflow_node(node):
+    """Returns True if given node is fpgadataflow node. Otherwise False."""
+    is_node = False
+    if node is not None:
+        if is_finn_op(node.domain):
+            n_backend = get_by_name(node.attribute, "backend")
+            if n_backend is not None:
+                backend_value = n_backend.s.decode("UTF-8")
+                if backend_value == "fpgadataflow":
+                    is_node = True
+
+    return is_node
diff --git a/src/finn/util/hls.py b/src/finn/util/hls.py
new file mode 100644
index 0000000000000000000000000000000000000000..52ed121a43ea416d99930245e6986fe23399ce4d
--- /dev/null
+++ b/src/finn/util/hls.py
@@ -0,0 +1,69 @@
+# Copyright (c) 2021 Xilinx, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of Xilinx nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+import os
+import subprocess
+
+from finn.util.basic import which
+
+
+class CallHLS:
+    """Call vitis_hls to run HLS build tcl scripts."""
+
+    def __init__(self):
+        self.tcl_script = ""
+        self.ipgen_path = ""
+        self.code_gen_dir = ""
+        self.ipgen_script = ""
+
+    def append_tcl(self, tcl_script):
+        """Sets the tcl script to be executed."""
+        self.tcl_script = tcl_script
+
+    def set_ipgen_path(self, path):
+        """Sets member variable ipgen_path to given path."""
+        self.ipgen_path = path
+
+    def build(self, code_gen_dir):
+        """Builds the bash script with given parameters and saves it in given folder.
+        To guarantee the generation in the correct folder the bash script contains a
+        cd command."""
+        assert which("vitis_hls") is not None, "vitis_hls not found in PATH"
+        self.code_gen_dir = code_gen_dir
+        self.ipgen_script = str(self.code_gen_dir) + "/ipgen.sh"
+        working_dir = os.environ["PWD"]
+        f = open(self.ipgen_script, "w")
+        f.write("#!/bin/bash \n")
+        f.write("cd {}\n".format(code_gen_dir))
+        f.write("vitis_hls %s\n" % (self.tcl_script))
+        f.write("cd {}\n".format(working_dir))
+        f.close()
+        bash_command = ["bash", self.ipgen_script]
+        process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE)
+        process_compile.communicate()
diff --git a/src/finn/util/imagenet.py b/src/finn/util/imagenet.py
index abd412e8d963cbcc80370298fb833de86a218c41..b4548bb3520c4116ad6d85820bc7366eff1d4469 100644
--- a/src/finn/util/imagenet.py
+++ b/src/finn/util/imagenet.py
@@ -29,8 +29,8 @@
 import numpy as np
 import os
 from PIL import Image
+from qonnx.core.data_layout import NCHW, NHWC
 
-from finn.core.data_layout import NCHW, NHWC
 from finn.util.test import crop_center, resize_smaller_side
 
 
diff --git a/src/finn/util/platforms.py b/src/finn/util/platforms.py
new file mode 100644
index 0000000000000000000000000000000000000000..8212cb5712e5b5421e55d8d957905677af555615
--- /dev/null
+++ b/src/finn/util/platforms.py
@@ -0,0 +1,480 @@
+# Copyright (c) 2021, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import numpy as np
+from abc import abstractmethod
+
+# contains the amount of available FPGA resources for several
+# Xilinx platforms, as well as certain resource limit guidelines
+# for creating designs that can achieve timing closure
+
+# explicit value for res types/costs we don't care about
+DONT_CARE = -1
+# recommended resource limits from Xilinx for timing closure
+# respectively for LUT, FF, BRAM_18K, URAM, DSP res types
+DEFAULT_RES_LIMITS = np.array([0.7, 0.5, 0.80, 0.80, 0.80])
+DEFAULT_AVG_CONSTRAINTS = [((2, 3, 4), 0.7)]  #
+
+# resources required to instantiate certain infrastructure components
+# such as memory controllers and network interfaces
+DDR_RESOURCE_REQUIREMENTS = {
+    "LUT": 33256,
+    "FF": 44889,
+    "BRAM_18K": 199,
+    "URAM": 0,
+    "DSP": 3,
+}
+HBM_RESOURCE_REQUIREMENTS = {
+    "LUT": 10718,
+    "FF": 21793,
+    "BRAM_18K": 8,
+    "URAM": 0,
+    "DSP": 0,
+}
+
+# we assume use of VNx Alveo UDP stack
+# see: https://gitenterprise.xilinx.com/mruiznog/vitis_network_layer
+ETH_RESOURCE_REQUIREMENTS = {
+    "LUT": 35219,
+    "FF": 86269,
+    "BRAM_18K": 183,
+    "URAM": 0,
+    "DSP": 0,
+}
+
+
+class Platform:
+    def __init__(
+        self,
+        nslr=1,
+        ndevices=1,
+        sll_count=[],
+        hbm_slr=-1,
+        ddr_slr=[0],
+        eth_slr=0,
+        eth_gbps=0,
+        limits=DEFAULT_RES_LIMITS,
+        avg_constraints=DEFAULT_AVG_CONSTRAINTS,
+    ):
+        self.nslr = nslr
+        self.sll_count = sll_count
+        self.eth_slr = eth_slr
+        self.eth_gbps = eth_gbps
+        self.ndevices = ndevices
+        self.hbm_slr = hbm_slr
+        self.ddr_slr = ddr_slr
+        # limits must be a np.array either of
+        # the same shape as compute_resources
+        # or broadcastable to it
+        self.res_limits = limits
+        # list of tuples of the form ( tuple of resource positions to avg, limit )
+        self.avg_constraints = avg_constraints
+
+    @property
+    @abstractmethod
+    def compute_resources(self):
+        pass
+
+    @property
+    def guide_resources(self):
+        guide = []
+        # TODO: assert limits is of correct size
+        guide_res = (
+            np.tile(np.array(self.compute_resources), (self.ndevices, 1))
+        ).astype(int)
+        for i in range(self.nslr * self.ndevices):
+            # when in multi-FPGA mode, subtract cost of UDP connection from eth_slr
+            local_slr = i % self.nslr
+            if self.ndevices > 1 and local_slr == self.eth_slr:
+                guide_res[i][0] -= ETH_RESOURCE_REQUIREMENTS["LUT"]
+                guide_res[i][1] -= ETH_RESOURCE_REQUIREMENTS["FF"]
+                guide_res[i][2] -= ETH_RESOURCE_REQUIREMENTS["BRAM_18K"]
+                guide_res[i][3] -= ETH_RESOURCE_REQUIREMENTS["URAM"]
+                guide_res[i][4] -= ETH_RESOURCE_REQUIREMENTS["DSP"]
+            # subtract the cost of memory controllers
+            # if we have a choice between DDR and HBM, use HBM
+            if local_slr == self.hbm_slr:
+                guide_res[i][0] -= HBM_RESOURCE_REQUIREMENTS["LUT"]
+                guide_res[i][1] -= HBM_RESOURCE_REQUIREMENTS["FF"]
+                guide_res[i][2] -= HBM_RESOURCE_REQUIREMENTS["BRAM_18K"]
+                guide_res[i][3] -= HBM_RESOURCE_REQUIREMENTS["URAM"]
+                guide_res[i][4] -= HBM_RESOURCE_REQUIREMENTS["DSP"]
+            elif local_slr in self.ddr_slr:
+                guide_res[i][0] -= DDR_RESOURCE_REQUIREMENTS["LUT"]
+                guide_res[i][1] -= DDR_RESOURCE_REQUIREMENTS["FF"]
+                guide_res[i][2] -= DDR_RESOURCE_REQUIREMENTS["BRAM_18K"]
+                guide_res[i][3] -= DDR_RESOURCE_REQUIREMENTS["URAM"]
+                guide_res[i][4] -= DDR_RESOURCE_REQUIREMENTS["DSP"]
+            guide.append(list(guide_res[i]))
+        return guide
+
+    @property
+    def resource_count_dict(self):
+        res = dict()
+        for i in range(self.nslr * self.ndevices):
+            slr_res = dict()
+            slr_res["LUT"] = self.compute_resources[i % self.nslr][0]
+            slr_res["FF"] = self.compute_resources[i % self.nslr][1]
+            slr_res["BRAM_18K"] = self.compute_resources[i % self.nslr][2]
+            slr_res["URAM"] = self.compute_resources[i % self.nslr][3]
+            slr_res["DSP"] = self.compute_resources[i % self.nslr][4]
+            res["slr" + str(i)] = slr_res
+        return res
+
+    @property
+    def compute_connection_cost(self):
+        x = np.full((self.nslr * self.ndevices, self.nslr * self.ndevices), DONT_CARE)
+        # build connection cost matrix for one device's SLRs
+        xlocal = np.full((self.nslr, self.nslr), DONT_CARE)
+        for i in range(self.nslr):
+            for j in range(self.nslr):
+                if i == j:
+                    xlocal[i][j] = 0
+                elif abs(i - j) == 1:
+                    xlocal[i][j] = 1
+        # tile connection cost matrices for entire system
+        for i in range(self.ndevices):
+            x[
+                i * self.nslr : (i + 1) * self.nslr, i * self.nslr : (i + 1) * self.nslr
+            ] = xlocal
+        # set cost for ethernet connections, assuming daisy-chaining
+        for i in range(self.ndevices - 1):
+            x[i * self.nslr + self.eth_slr][(i + 1) * self.nslr + self.eth_slr] = 10
+            x[(i + 1) * self.nslr + self.eth_slr][i * self.nslr + self.eth_slr] = 10
+        return x
+
+    @property
+    def compute_connection_resource(self):
+        sll = np.full((self.nslr * self.ndevices, self.nslr * self.ndevices), 0)
+        # build connection resource matrix for one device's SLRs
+        slllocal = np.full((self.nslr, self.nslr), -1)
+        for i in range(self.nslr):
+            for j in range(self.nslr):
+                if i == j:
+                    # no SLL constraint when going from one SLR to itself
+                    slllocal[i][j] = -1
+                else:
+                    slllocal[i][j] = self.sll_count[i][j]
+        # tile connection cost matrices for entire system
+        for i in range(self.ndevices):
+            sll[
+                i * self.nslr : (i + 1) * self.nslr, i * self.nslr : (i + 1) * self.nslr
+            ] = slllocal
+        # set cost for ethernet connections, assuming daisy-chaining
+        eth = np.full((self.nslr * self.ndevices, self.nslr * self.ndevices), 0)
+        # no Eth throughput constraints from one SLR to itself
+        for i in range(self.ndevices * self.nslr):
+            eth[i][i] = -1
+        # apply symmetric ETH throughput constraints between the SLRs that have GTXes
+        for i in range(self.ndevices - 1):
+            eth[i * self.nslr + self.eth_slr][
+                (i + 1) * self.nslr + self.eth_slr
+            ] = self.eth_gbps * (10**9)
+            eth[(i + 1) * self.nslr + self.eth_slr][
+                i * self.nslr + self.eth_slr
+            ] = self.eth_gbps * (10**9)
+        # pack sll and eth info in one list-of-list-of-tuple structure
+        constraints = []
+        for i in range(self.ndevices * self.nslr):
+            constraints_line = []
+            for j in range(self.ndevices * self.nslr):
+                # make sure not to constrain both resources at the same time
+                # constrain for Eth throughput between SLRs on different devices
+                # constrain for SLLs between SLRs on same device
+                is_offchip = i // self.nslr != j // self.nslr
+                constraints_line.append(
+                    (-1 if is_offchip else sll[i][j], eth[i][j] if is_offchip else -1)
+                )
+            constraints.append(constraints_line)
+        return constraints
+
+    def map_device_to_slr(self, idx):
+        """Given a global SLR index, return device id and local slr index"""
+        assert idx <= self.nslr * self.ndevices
+        return (idx % self.nslr, idx // self.nslr)
+
+
+class Zynq7020_Platform(Platform):
+    def __init__(
+        self,
+        ndevices=1,
+        limits=DEFAULT_RES_LIMITS,
+        avg_constraints=DEFAULT_AVG_CONSTRAINTS,
+    ):
+        super(Zynq7020_Platform, self).__init__(
+            nslr=1,
+            ndevices=ndevices,
+            sll_count=[[0]],
+            ddr_slr=[],
+            eth_slr=0,
+            eth_gbps=1,
+            limits=limits,
+            avg_constraints=avg_constraints,
+        )
+
+    @property
+    def compute_resources(self):
+        return [[53200, 2 * 53200, 280, 0, 220] for i in range(1)]
+
+
+class ZU3EG_Platform(Platform):
+    def __init__(
+        self,
+        ndevices=1,
+        limits=DEFAULT_RES_LIMITS,
+        avg_constraints=DEFAULT_AVG_CONSTRAINTS,
+    ):
+        super(ZU3EG_Platform, self).__init__(
+            nslr=1,
+            ndevices=ndevices,
+            sll_count=[[0]],
+            ddr_slr=[],
+            eth_slr=0,
+            eth_gbps=1,
+            limits=limits,
+            avg_constraints=avg_constraints,
+        )
+
+    @property
+    def compute_resources(self):
+        return [[71000, 2 * 71000, 412, 0, 360] for i in range(1)]
+
+
+class ZU7EV_Platform(Platform):
+    def __init__(
+        self,
+        ndevices=1,
+        limits=DEFAULT_RES_LIMITS,
+        avg_constraints=DEFAULT_AVG_CONSTRAINTS,
+    ):
+        super(ZU7EV_Platform, self).__init__(
+            nslr=1,
+            ndevices=ndevices,
+            sll_count=[[0]],
+            ddr_slr=[],
+            eth_slr=0,
+            eth_gbps=1,
+            limits=limits,
+            avg_constraints=avg_constraints,
+        )
+
+    @property
+    def compute_resources(self):
+        return [[230000, 2 * 230000, 610, 92, 1728] for i in range(1)]
+
+
+class ZU9EG_Platform(Platform):
+    def __init__(
+        self,
+        ndevices=1,
+        limits=DEFAULT_RES_LIMITS,
+        avg_constraints=DEFAULT_AVG_CONSTRAINTS,
+    ):
+        super(ZU9EG_Platform, self).__init__(
+            nslr=1,
+            ndevices=ndevices,
+            sll_count=[[0]],
+            ddr_slr=[],
+            eth_slr=0,
+            eth_gbps=1,
+            limits=limits,
+            avg_constraints=avg_constraints,
+        )
+
+    @property
+    def compute_resources(self):
+        return [[274000, 2 * 274000, 1824, 0, 2520] for i in range(1)]
+
+
+class ZU28DR_Platform(Platform):
+    def __init__(
+        self,
+        ndevices=1,
+        limits=DEFAULT_RES_LIMITS,
+        avg_constraints=DEFAULT_AVG_CONSTRAINTS,
+    ):
+        super(ZU28DR_Platform, self).__init__(
+            nslr=1,
+            ndevices=ndevices,
+            sll_count=[[0]],
+            ddr_slr=[],
+            eth_slr=0,
+            eth_gbps=1,
+            limits=limits,
+            avg_constraints=avg_constraints,
+        )
+
+    @property
+    def compute_resources(self):
+        return [[425000, 2 * 425000, 2160, 80, 4272] for i in range(1)]
+
+
+class Alveo_NxU50_Platform(Platform):
+    def __init__(
+        self,
+        ndevices=1,
+        limits=DEFAULT_RES_LIMITS,
+        avg_constraints=DEFAULT_AVG_CONSTRAINTS,
+    ):
+        # according to Vivado: 23040 SLR0 <-> SLR1
+        sll_counts = [[0, 5000], [5000, 0]]
+        super(Alveo_NxU50_Platform, self).__init__(
+            nslr=2,
+            ndevices=ndevices,
+            sll_count=sll_counts,
+            ddr_slr=[],
+            hbm_slr=0,
+            eth_slr=1,
+            eth_gbps=100,
+            limits=limits,
+            avg_constraints=avg_constraints,
+        )
+
+    @property
+    def compute_resources(self):
+        # According to UG1120:
+        # U50 has identical resource counts on both SLRs
+        # return [[365000,2*365000,2*564, 304, 2580] for i in range(2)]
+        # we observe from Vivado that the resource counts are actually:
+        return [
+            [374400, 2 * 374400, 2 * 564, 304, 2592],
+            [368160, 2 * 368160, 2 * 564, 304, 2760],
+        ]
+
+
+class Alveo_NxU200_Platform(Platform):
+    def __init__(
+        self,
+        ndevices=1,
+        limits=DEFAULT_RES_LIMITS,
+        avg_constraints=DEFAULT_AVG_CONSTRAINTS,
+    ):
+        sll_counts = [[0, 5000, 0], [5000, 0, 5000], [0, 5000, 0]]
+        super(Alveo_NxU200_Platform, self).__init__(
+            nslr=3,
+            ndevices=ndevices,
+            sll_count=sll_counts,
+            ddr_slr=[0, 2],
+            eth_slr=2,
+            eth_gbps=100,
+            limits=limits,
+            avg_constraints=avg_constraints,
+        )
+
+    @property
+    def compute_resources(self):
+        # According to UG1120:
+        # return [[355000, 723000, 2*638, 320, 2265],
+        #        [160000, 331000, 2*326, 160, 1317],
+        #        [355000, 723000, 2*638, 320, 2265]]
+        # we observe from Vivado that the resource counts are actually:
+        return [
+            [385920, 2 * 385920, 2 * 714, 320, 2268],
+            [199680, 2 * 199680, 2 * 420, 160, 1320],
+            [385920, 2 * 385920, 2 * 714, 320, 2268],
+        ]
+
+
+class Alveo_NxU250_Platform(Platform):
+    def __init__(
+        self,
+        ndevices=1,
+        limits=DEFAULT_RES_LIMITS,
+        avg_constraints=DEFAULT_AVG_CONSTRAINTS,
+    ):
+        sll_counts = [
+            [0, 5000, 0, 0],
+            [5000, 0, 5000, 0],
+            [0, 5000, 0, 5000],
+            [0, 0, 5000, 0],
+        ]
+        super(Alveo_NxU250_Platform, self).__init__(
+            nslr=4,
+            ndevices=ndevices,
+            sll_count=sll_counts,
+            ddr_slr=[0, 1, 2, 3],
+            eth_slr=3,
+            eth_gbps=100,
+            limits=limits,
+            avg_constraints=avg_constraints,
+        )
+
+    @property
+    def compute_resources(self):
+        # According to UG1120:
+        # U250 has identical resource counts on all 4 SLRs:
+        # return [[345000,2*345000,2*500, 320, 2877] for i in range(4)]
+        # we observe from Vivado that the resource counts are actually:
+        return [[375000, 2 * 375000, 2 * 576, 320, 2880] for i in range(4)]
+
+
+class Alveo_NxU280_Platform(Platform):
+    def __init__(
+        self,
+        ndevices=1,
+        limits=DEFAULT_RES_LIMITS,
+        avg_constraints=DEFAULT_AVG_CONSTRAINTS,
+    ):
+        sll_counts = [[0, 5000, 0], [5000, 0, 5000], [0, 5000, 0]]
+        super(Alveo_NxU280_Platform, self).__init__(
+            nslr=3,
+            ndevices=ndevices,
+            sll_count=sll_counts,
+            ddr_slr=[0, 1],
+            hbm_slr=0,
+            eth_slr=2,
+            eth_gbps=100,
+            limits=limits,
+            avg_constraints=avg_constraints,
+        )
+
+    @property
+    def compute_resources(self):
+        # according to UG1120
+        # return [[369000, 746000, 2*507, 320, 2733],
+        #        [333000, 675000, 2*468, 320, 2877],
+        #        [367000, 729000, 2*512, 320, 2880]]
+        # observed from Vivado:
+        return [
+            [400800, 2 * 400800, 2 * 600, 320, 2736],
+            [382080, 2 * 382080, 2 * 576, 320, 2880],
+            [380640, 2 * 380640, 2 * 576, 320, 2880],
+        ]
+
+
+platforms = dict()
+platforms["U50"] = Alveo_NxU50_Platform
+platforms["U200"] = Alveo_NxU200_Platform
+platforms["U250"] = Alveo_NxU250_Platform
+platforms["U280"] = Alveo_NxU280_Platform
+platforms["Pynq-Z1"] = Zynq7020_Platform
+platforms["Pynq-Z2"] = Zynq7020_Platform
+platforms["Ultra96"] = ZU3EG_Platform
+platforms["ZCU104"] = ZU7EV_Platform
+platforms["ZCU102"] = ZU9EG_Platform
+platforms["ZCU111"] = ZU28DR_Platform
diff --git a/src/finn/util/pyverilator.py b/src/finn/util/pyverilator.py
new file mode 100644
index 0000000000000000000000000000000000000000..3396561e06f553785e842ec0b6626bc405d262c5
--- /dev/null
+++ b/src/finn/util/pyverilator.py
@@ -0,0 +1,120 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+from pyverilator import PyVerilator
+
+from finn.util.basic import get_rtlsim_trace_depth, make_build_dir
+
+
+def pyverilate_stitched_ip(
+    model,
+    read_internal_signals=True,
+    disable_common_warnings=True,
+    extra_verilator_args=[],
+):
+    """Given a model with stitched IP, return a PyVerilator sim object.
+    Trace depth is also controllable, see get_rtlsim_trace_depth()
+
+    :param read_internal_signals  If set, it will be possible to examine the
+        internal (not only port) signals of the Verilog module, but this may
+        slow down compilation and emulation.
+
+    :param disable_common_warnings If set, disable the set of warnings that
+        Vivado-HLS-generated Verilog typically triggers in Verilator
+        (which can be very verbose otherwise)
+
+    """
+    if PyVerilator is None:
+        raise ImportError("Installation of PyVerilator is required.")
+
+    vivado_stitch_proj_dir = model.get_metadata_prop("vivado_stitch_proj")
+    with open(vivado_stitch_proj_dir + "/all_verilog_srcs.txt", "r") as f:
+        all_verilog_srcs = f.read().split()
+
+    def file_to_dir(x):
+        return os.path.dirname(os.path.realpath(x))
+
+    def file_to_basename(x):
+        return os.path.basename(os.path.realpath(x))
+
+    top_module_file_name = file_to_basename(model.get_metadata_prop("wrapper_filename"))
+    top_module_name = top_module_file_name.strip(".v")
+    build_dir = make_build_dir("pyverilator_ipstitched_")
+
+    # dump all Verilog code to a single file
+    # this is because large models with many files require
+    # a verilator command line too long for bash on most systems
+    # NOTE: there are duplicates in this list, and some files
+    # are identical but in multiple directories (regslice_core.v)
+
+    # remove duplicates from list by doing list -> set -> list
+    all_verilog_files = list(set(filter(lambda x: x.endswith(".v"), all_verilog_srcs)))
+
+    # remove all but one instances of regslice_core.v
+    filtered_verilog_files = []
+    remove_entry = False
+    for vfile in all_verilog_files:
+        if "regslice_core" in vfile:
+            if not remove_entry:
+                filtered_verilog_files.append(vfile)
+            remove_entry = True
+        else:
+            filtered_verilog_files.append(vfile)
+
+    # concatenate all verilog code into a single file
+    with open(vivado_stitch_proj_dir + "/" + top_module_file_name, "w") as wf:
+        for vfile in filtered_verilog_files:
+            with open(vfile) as rf:
+                wf.write("//Added from " + vfile + "\n\n")
+                wf.write(rf.read())
+
+    verilator_args = []
+    # disable common verilator warnings that should be harmless but commonly occur
+    # in large quantities for Vivado HLS-generated verilog code
+    if disable_common_warnings:
+        verilator_args += ["-Wno-STMTDLY"]
+        verilator_args += ["-Wno-PINMISSING"]
+        verilator_args += ["-Wno-IMPLICIT"]
+        verilator_args += ["-Wno-WIDTH"]
+        verilator_args += ["-Wno-COMBDLY"]
+    # force inlining of all submodules to ensure we can read internal signals properly
+    if read_internal_signals:
+        verilator_args += ["--inline-mult", "0"]
+
+    sim = PyVerilator.build(
+        top_module_file_name,
+        verilog_path=[vivado_stitch_proj_dir],
+        build_dir=build_dir,
+        trace_depth=get_rtlsim_trace_depth(),
+        top_module_name=top_module_name,
+        auto_eval=False,
+        read_internal_signals=read_internal_signals,
+        extra_args=verilator_args + extra_verilator_args,
+    )
+    return sim
diff --git a/src/finn/util/test.py b/src/finn/util/test.py
index 9c5462ae7f3ca3122fe672f8f01e939e398963a8..f5d3b1c30b8b7b439eae1c684ad84b33a3401c7c 100644
--- a/src/finn/util/test.py
+++ b/src/finn/util/test.py
@@ -38,10 +38,10 @@ import torchvision.transforms.functional as torchvision_util
 import warnings
 from brevitas_examples import bnn_pynq, imagenet_classification
 from pkgutil import get_data
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.registry import getCustomOp
 
-from finn.core.modelwrapper import ModelWrapper
 from finn.core.onnx_exec import execute_onnx
-from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.make_zynq_proj import ZynqBuild
 from finn.transformation.fpgadataflow.vitis_build import VitisBuild, VitisOptStrategy
 from finn.util.basic import alveo_default_platform, alveo_part_map, pynq_part_map
@@ -144,7 +144,7 @@ def get_example_input(topology):
     "Get example numpy input tensor for given topology."
 
     if "fc" in topology:
-        raw_i = get_data("finn.data", "onnx/mnist-conv/test_data_set_0/input_0.pb")
+        raw_i = get_data("qonnx.data", "onnx/mnist-conv/test_data_set_0/input_0.pb")
         onnx_tensor = onnx.load_tensor_from_string(raw_i)
         return nph.to_array(onnx_tensor)
     elif topology == "cnv":
diff --git a/src/finn/util/vcd.py b/src/finn/util/vcd.py
index 6a5a68f09930783f5a4e094ea88d6eeb9e07b99a..aaeb3ab920d1d8fae79c1173582d18cf81d03063 100644
--- a/src/finn/util/vcd.py
+++ b/src/finn/util/vcd.py
@@ -27,10 +27,9 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import multiprocessing as mp
+from qonnx.util.basic import get_num_default_workers
 from vcdvcd import VCDVCD
 
-from finn.util.basic import get_num_default_workers
-
 # string patterns to search for to find particular interfaces
 # streaming interfaces
 vname = "TVALID"
diff --git a/src/finn/util/vivado.py b/src/finn/util/vivado.py
new file mode 100644
index 0000000000000000000000000000000000000000..bc8ca40d8856a780260b8f809e22567c434461cf
--- /dev/null
+++ b/src/finn/util/vivado.py
@@ -0,0 +1,83 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+
+from finn.util.basic import launch_process_helper, which
+
+
+def out_of_context_synth(
+    verilog_dir,
+    top_name,
+    fpga_part="xczu3eg-sbva484-1-e",
+    clk_name="ap_clk_0",
+    clk_period_ns=5.0,
+):
+    "Run out-of-context Vivado synthesis, return resources and slack."
+
+    # ensure that the OH_MY_XILINX envvar is set
+    if "OHMYXILINX" not in os.environ:
+        raise Exception("The environment variable OHMYXILINX is not defined.")
+    # ensure that vivado is in PATH: source $VIVADO_PATH/settings64.sh
+    if which("vivado") is None:
+        raise Exception("vivado is not in PATH, ensure settings64.sh is sourced.")
+    omx_path = os.environ["OHMYXILINX"]
+    script = "vivadocompile.sh"
+    # vivadocompile.sh <top-level-entity> <clock-name (optional)> <fpga-part (optional)>
+    call_omx = "zsh %s/%s %s %s %s %f" % (
+        omx_path,
+        script,
+        top_name,
+        clk_name,
+        fpga_part,
+        float(clk_period_ns),
+    )
+    call_omx = call_omx.split()
+    launch_process_helper(call_omx, proc_env=os.environ.copy(), cwd=verilog_dir)
+
+    vivado_proj_folder = "%s/results_%s" % (verilog_dir, top_name)
+    res_counts_path = vivado_proj_folder + "/res.txt"
+
+    with open(res_counts_path, "r") as myfile:
+        res_data = myfile.read().split("\n")
+    ret = {}
+    ret["vivado_proj_folder"] = vivado_proj_folder
+    for res_line in res_data:
+        res_fields = res_line.split("=")
+        print(res_fields)
+        try:
+            ret[res_fields[0]] = float(res_fields[1])
+        except ValueError:
+            ret[res_fields[0]] = 0
+        except IndexError:
+            ret[res_fields[0]] = 0
+    if ret["WNS"] == 0:
+        ret["fmax_mhz"] = 0
+    else:
+        ret["fmax_mhz"] = 1000.0 / (clk_period_ns - ret["WNS"])
+    return ret
diff --git a/tests/brevitas/test_brevitas_avg_pool_export.py b/tests/brevitas/test_brevitas_avg_pool_export.py
index 6d0c68f0f456c05ab60ffa043277409730b695ce..669601ecb6ebfd6758d3382ab097a1e93dc848c7 100644
--- a/tests/brevitas/test_brevitas_avg_pool_export.py
+++ b/tests/brevitas/test_brevitas_avg_pool_export.py
@@ -34,15 +34,15 @@ from brevitas.export import FINNManager
 from brevitas.export.onnx.generic.manager import BrevitasONNXManager
 from brevitas.nn import QuantAvgPool2d
 from brevitas.quant_tensor import QuantTensor
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.infer_datatypes import InferDataTypes
+from qonnx.transformation.infer_shapes import InferShapes
+from qonnx.util.basic import gen_finn_dt_tensor
 from qonnx.util.cleanup import cleanup as qonnx_cleanup
 
 import finn.core.onnx_exec as oxe
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
-from finn.util.basic import gen_finn_dt_tensor
 
 base_export_onnx_path = "test_brevitas_avg_pool_export.onnx"
 
diff --git a/tests/brevitas/test_brevitas_cnv.py b/tests/brevitas/test_brevitas_cnv.py
index 2592d381173ee2112565f17d6631dd98f05e221a..62aab2e3c2b85c6462c24194c917bdc2d8eec448 100644
--- a/tests/brevitas/test_brevitas_cnv.py
+++ b/tests/brevitas/test_brevitas_cnv.py
@@ -35,18 +35,19 @@ import numpy as np
 import os
 import torch
 from brevitas.export.onnx.generic.manager import BrevitasONNXManager
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.fold_constants import FoldConstants
+from qonnx.transformation.general import GiveUniqueNodeNames, RemoveStaticGraphInputs
+from qonnx.transformation.infer_shapes import InferShapes
 from qonnx.util.cleanup import cleanup as qonnx_cleanup
 
 import finn.core.onnx_exec as oxe
-from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.fold_constants import FoldConstants
-from finn.transformation.general import GiveUniqueNodeNames, RemoveStaticGraphInputs
-from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
 from finn.util.test import get_test_model_trained
 
 export_onnx_path = "test_brevitas_cnv.onnx"
 
+
 @pytest.mark.brevitas_export
 @pytest.mark.parametrize("abits", [1, 2])
 @pytest.mark.parametrize("wbits", [1, 2])
diff --git a/tests/brevitas/test_brevitas_debug.py b/tests/brevitas/test_brevitas_debug.py
index 3db1a208456f7209623530681d96d6aa35928900..181d610fff7a703a8ccbcf3bbb19bed2e5d7e89d 100644
--- a/tests/brevitas/test_brevitas_debug.py
+++ b/tests/brevitas/test_brevitas_debug.py
@@ -36,13 +36,13 @@ import os
 import torch
 from brevitas.export.onnx.generic.manager import BrevitasONNXManager
 from pkgutil import get_data
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.fold_constants import FoldConstants
+from qonnx.transformation.general import RemoveStaticGraphInputs
+from qonnx.transformation.infer_shapes import InferShapes
 from qonnx.util.cleanup import cleanup as qonnx_cleanup
 
 import finn.core.onnx_exec as oxe
-from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.fold_constants import FoldConstants
-from finn.transformation.general import RemoveStaticGraphInputs
-from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
 from finn.util.test import get_test_model_trained
 
@@ -63,7 +63,7 @@ def test_brevitas_debug(QONNX_export, QONNX_FINN_conversion):
         model = ModelWrapper(finn_onnx)
         dbg_nodes = model.get_nodes_by_op_type("DebugMarker")
         for dbg_node in dbg_nodes:
-            dbg_node.domain = "finn.custom_op.general"
+            dbg_node.domain = "qonnx.custom_op.general"
         model.save(finn_onnx)
         qonnx_cleanup(finn_onnx, out_file=finn_onnx)
         if QONNX_FINN_conversion:
@@ -79,7 +79,7 @@ def test_brevitas_debug(QONNX_export, QONNX_FINN_conversion):
         #  domain conversion for us?
         dbg_nodes = model.get_nodes_by_op_type("DebugMarker")
         for dbg_node in dbg_nodes:
-            dbg_node.domain = "finn.custom_op.general"
+            dbg_node.domain = "qonnx.custom_op.general"
         model = model.transform(InferShapes())
         model = model.transform(FoldConstants())
         model = model.transform(RemoveStaticGraphInputs())
@@ -88,7 +88,7 @@ def test_brevitas_debug(QONNX_export, QONNX_FINN_conversion):
     assert len(model.graph.input) == 1
     assert len(model.graph.output) == 1
     # load one of the test vectors
-    raw_i = get_data("finn.data", "onnx/mnist-conv/test_data_set_0/input_0.pb")
+    raw_i = get_data("qonnx.data", "onnx/mnist-conv/test_data_set_0/input_0.pb")
     input_tensor = onnx.load_tensor_from_string(raw_i)
     # run using FINN-based execution
     input_dict = {model.graph.input[0].name: nph.to_array(input_tensor)}
diff --git a/tests/brevitas/test_brevitas_fc.py b/tests/brevitas/test_brevitas_fc.py
index fc0f24b9172eb7882197026420ede8fe5d69bee5..211fdb629b7c0465a145a094bab428064227afc9 100644
--- a/tests/brevitas/test_brevitas_fc.py
+++ b/tests/brevitas/test_brevitas_fc.py
@@ -35,13 +35,13 @@ import onnx.numpy_helper as nph
 import torch
 from brevitas.export.onnx.generic.manager import BrevitasONNXManager
 from pkgutil import get_data
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.fold_constants import FoldConstants
+from qonnx.transformation.general import RemoveStaticGraphInputs
+from qonnx.transformation.infer_shapes import InferShapes
 from qonnx.util.cleanup import cleanup as qonnx_cleanup
 
 import finn.core.onnx_exec as oxe
-from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.fold_constants import FoldConstants
-from finn.transformation.general import RemoveStaticGraphInputs
-from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
 from finn.util.basic import make_build_dir
 from finn.util.test import get_test_model_trained
@@ -82,7 +82,7 @@ def test_brevitas_fc_onnx_export_and_exec(size, wbits, abits, QONNX_export):
     assert len(model.graph.input) == 1
     assert len(model.graph.output) == 1
     # load one of the test vectors
-    raw_i = get_data("finn.data", "onnx/mnist-conv/test_data_set_0/input_0.pb")
+    raw_i = get_data("qonnx.data", "onnx/mnist-conv/test_data_set_0/input_0.pb")
     input_tensor = onnx.load_tensor_from_string(raw_i)
     # run using FINN-based execution
     input_dict = {model.graph.input[0].name: nph.to_array(input_tensor)}
diff --git a/tests/brevitas/test_brevitas_mobilenet.py b/tests/brevitas/test_brevitas_mobilenet.py
index 9c51206ee097e17b85728d6e606b1cf05eb0b9e4..b1475b6f4ec8c4a6ed34b4249b961031780d4be8 100644
--- a/tests/brevitas/test_brevitas_mobilenet.py
+++ b/tests/brevitas/test_brevitas_mobilenet.py
@@ -32,26 +32,27 @@ import brevitas.onnx as bo
 import numpy as np
 import torch
 from PIL import Image
-
-import finn.core.onnx_exec as oxe
-import finn.transformation.streamline.absorb as absorb
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.fold_constants import FoldConstants
-from finn.transformation.general import (
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.fold_constants import FoldConstants
+from qonnx.transformation.general import (
     GiveReadableTensorNames,
     GiveUniqueNodeNames,
     GiveUniqueParameterTensors,
 )
-from finn.transformation.infer_data_layouts import InferDataLayouts
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.insert_topk import InsertTopK
-from finn.transformation.merge_onnx_models import MergeONNXModels
+from qonnx.transformation.infer_data_layouts import InferDataLayouts
+from qonnx.transformation.infer_datatypes import InferDataTypes
+from qonnx.transformation.infer_shapes import InferShapes
+from qonnx.transformation.insert_topk import InsertTopK
+from qonnx.transformation.merge_onnx_models import MergeONNXModels
+
+import finn.core.onnx_exec as oxe
+import finn.transformation.streamline.absorb as absorb
 from finn.util.basic import get_finn_root, make_build_dir
 from finn.util.pytorch import NormalizePreProc
 from finn.util.test import crop_center, get_test_model_trained, resize_smaller_side
 
+
 @pytest.mark.brevitas_export
 @pytest.mark.xfail
 def test_brevitas_mobilenet():
diff --git a/tests/brevitas/test_brevitas_non_scaled_quanthardtanh_export.py b/tests/brevitas/test_brevitas_non_scaled_quanthardtanh_export.py
index 4f9d2778028223d85882839ef7243e170ef90dd6..5d70acb10264dc10a3681589075507f06a9c903b 100644
--- a/tests/brevitas/test_brevitas_non_scaled_quanthardtanh_export.py
+++ b/tests/brevitas/test_brevitas_non_scaled_quanthardtanh_export.py
@@ -38,15 +38,16 @@ from brevitas.core.restrict_val import RestrictValueType
 from brevitas.core.scaling import ScalingImplType
 from brevitas.export.onnx.generic.manager import BrevitasONNXManager
 from brevitas.nn import QuantHardTanh
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.infer_shapes import InferShapes
 from qonnx.util.cleanup import cleanup as qonnx_cleanup
 
 import finn.core.onnx_exec as oxe
-from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
 
 export_onnx_path = "test_brevitas_non_scaled_QuantHardTanh_export.onnx"
 
+
 @pytest.mark.brevitas_export
 @pytest.mark.parametrize("abits", [1, 2, 4, 8])
 @pytest.mark.parametrize("narrow_range", [False, True])
diff --git a/tests/brevitas/test_brevitas_qconv2d.py b/tests/brevitas/test_brevitas_qconv2d.py
index 4d9bd14ae3500fd8c0e78e6c4d377ce1f234d168..214c55e5fd8b8c25c1ccca880f76690556af6397 100644
--- a/tests/brevitas/test_brevitas_qconv2d.py
+++ b/tests/brevitas/test_brevitas_qconv2d.py
@@ -38,17 +38,18 @@ from brevitas.core.scaling import ScalingImplType
 from brevitas.core.stats import StatsOp
 from brevitas.export.onnx.generic.manager import BrevitasONNXManager
 from brevitas.nn import QuantConv2d
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.infer_shapes import InferShapes
+from qonnx.util.basic import gen_finn_dt_tensor
 from qonnx.util.cleanup import cleanup as qonnx_cleanup
 
 import finn.core.onnx_exec as oxe
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
-from finn.util.basic import gen_finn_dt_tensor
 
 export_onnx_path = "test_brevitas_conv.onnx"
 
+
 @pytest.mark.brevitas_export
 @pytest.mark.parametrize("dw", [False, True])
 @pytest.mark.parametrize("bias", [True, False])
diff --git a/tests/brevitas/test_brevitas_qlinear.py b/tests/brevitas/test_brevitas_qlinear.py
index e78262fcb24a1fec1fa876a39c67bd3aa850299c..bcd75a545544122c1faacf4c321b19a489defe85 100644
--- a/tests/brevitas/test_brevitas_qlinear.py
+++ b/tests/brevitas/test_brevitas_qlinear.py
@@ -35,17 +35,18 @@ import torch
 from brevitas.core.quant import QuantType
 from brevitas.export.onnx.generic.manager import BrevitasONNXManager
 from brevitas.nn import QuantLinear
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.infer_shapes import InferShapes
+from qonnx.util.basic import gen_finn_dt_tensor
 from qonnx.util.cleanup import cleanup as qonnx_cleanup
 
 import finn.core.onnx_exec as oxe
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
-from finn.util.basic import gen_finn_dt_tensor
 
 export_onnx_path = "test_brevitas_qlinear.onnx"
 
+
 @pytest.mark.brevitas_export
 @pytest.mark.parametrize("bias", [False, True])
 @pytest.mark.parametrize("out_features", [4])
diff --git a/tests/brevitas/test_brevitas_relu_act_export.py b/tests/brevitas/test_brevitas_relu_act_export.py
index 01ba7f382535ea8a12a60f211b7718ca57164db4..b0c3d6088c27291f1f49dd2f1ee746b65ca0a737 100644
--- a/tests/brevitas/test_brevitas_relu_act_export.py
+++ b/tests/brevitas/test_brevitas_relu_act_export.py
@@ -38,15 +38,16 @@ from brevitas.core.restrict_val import RestrictValueType
 from brevitas.core.scaling import ScalingImplType
 from brevitas.export.onnx.generic.manager import BrevitasONNXManager
 from brevitas.nn import QuantReLU
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.infer_shapes import InferShapes
 from qonnx.util.cleanup import cleanup as qonnx_cleanup
 
 import finn.core.onnx_exec as oxe
-from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
 
 export_onnx_path = "test_brevitas_relu_act_export.onnx"
 
+
 @pytest.mark.brevitas_export
 @pytest.mark.parametrize("abits", [2, 4, 8])
 @pytest.mark.parametrize("max_val", [1.0, 1.5, 1 - 2 ** (-7)])
@@ -110,6 +111,7 @@ scaling_impl.learned_value": torch.tensor(
     assert np.isclose(produced, expected, atol=1e-3).all()
     os.remove(export_onnx_path)
 
+
 @pytest.mark.brevitas_export
 @pytest.mark.parametrize("abits", [2, 4, 8])
 @pytest.mark.parametrize("max_val", [1.0, 1.5, 1 - 2 ** (-7)])
diff --git a/tests/brevitas/test_brevitas_scaled_qhardtanh_export.py b/tests/brevitas/test_brevitas_scaled_qhardtanh_export.py
index 9f17c0f26c42058f314a25c066c8ba37a06e0b65..403d406105e8e60e6ef87f833c495dc2974de68c 100644
--- a/tests/brevitas/test_brevitas_scaled_qhardtanh_export.py
+++ b/tests/brevitas/test_brevitas_scaled_qhardtanh_export.py
@@ -38,15 +38,16 @@ from brevitas.core.restrict_val import RestrictValueType
 from brevitas.core.scaling import ScalingImplType
 from brevitas.export.onnx.generic.manager import BrevitasONNXManager
 from brevitas.nn import QuantHardTanh
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.infer_shapes import InferShapes
 from qonnx.util.cleanup import cleanup as qonnx_cleanup
 
 import finn.core.onnx_exec as oxe
-from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
 
 export_onnx_path = "test_brevitas_scaled_QHardTanh_export.onnx"
 
+
 @pytest.mark.brevitas_export
 @pytest.mark.parametrize("abits", [2, 4, 8])
 @pytest.mark.parametrize("narrow_range", [False, True])
diff --git a/tests/brevitas/test_brevitas_validate_mobilenet.py b/tests/brevitas/test_brevitas_validate_mobilenet.py
index 67e6b785a70c81717adadd3d2695017e0382edda..55915838e8a10d19d3aa6446d0bb667785bbd905 100644
--- a/tests/brevitas/test_brevitas_validate_mobilenet.py
+++ b/tests/brevitas/test_brevitas_validate_mobilenet.py
@@ -35,23 +35,23 @@ import os
 import torch
 import torchvision.datasets as datasets
 import torchvision.transforms as transforms
-
-import finn.core.onnx_exec as oxe
-import finn.transformation.streamline.absorb as absorb
-import finn.util.imagenet as imagenet_util
-from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.fold_constants import FoldConstants
-from finn.transformation.general import (
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.fold_constants import FoldConstants
+from qonnx.transformation.general import (
     GiveReadableTensorNames,
     GiveUniqueNodeNames,
     GiveUniqueParameterTensors,
     RemoveStaticGraphInputs,
 )
-from finn.transformation.infer_data_layouts import InferDataLayouts
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.insert_topk import InsertTopK
-from finn.transformation.merge_onnx_models import MergeONNXModels
+from qonnx.transformation.infer_data_layouts import InferDataLayouts
+from qonnx.transformation.infer_datatypes import InferDataTypes
+from qonnx.transformation.infer_shapes import InferShapes
+from qonnx.transformation.insert_topk import InsertTopK
+from qonnx.transformation.merge_onnx_models import MergeONNXModels
+
+import finn.core.onnx_exec as oxe
+import finn.transformation.streamline.absorb as absorb
+import finn.util.imagenet as imagenet_util
 from finn.util.basic import make_build_dir
 from finn.util.pytorch import NormalizePreProc
 from finn.util.test import get_test_model_trained
@@ -61,6 +61,7 @@ mean = [0.485, 0.456, 0.406]
 std = 0.226
 ch = 3
 
+
 @pytest.mark.brevitas_export
 def test_brevitas_mobilenet_preproc():
     if "IMAGENET_VAL_PATH" not in os.environ.keys():
diff --git a/tests/end2end/test_end2end_access_board.py b/tests/end2end/test_end2end_access_board.py
index ee15980ffb1b750c993a4b499dce57a1b8133e57..ba3c49195b298059149303c63ef2db8ab6e16039 100644
--- a/tests/end2end/test_end2end_access_board.py
+++ b/tests/end2end/test_end2end_access_board.py
@@ -34,6 +34,7 @@ from finn.util.test import get_build_env
 
 
 @pytest.mark.board
+@pytest.mark.end2end
 def test_end2end_access_board():
     build_env = get_build_env("zynq", 5)
     if build_env["ip"] == "":
diff --git a/tests/end2end/test_end2end_bnn_pynq.py b/tests/end2end/test_end2end_bnn_pynq.py
index ad54ac9e29d17592d24b62a1f203165bdcfd7d94..ab82a00c234b48ced48f3987d929bb1f340083f5 100644
--- a/tests/end2end/test_end2end_bnn_pynq.py
+++ b/tests/end2end/test_end2end_bnn_pynq.py
@@ -42,19 +42,31 @@ from brevitas.export.onnx.generic.manager import BrevitasONNXManager
 from collections import OrderedDict
 from dataset_loading import cifar, mnist
 from datetime import datetime
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
+from qonnx.transformation.fold_constants import FoldConstants
+from qonnx.transformation.general import (
+    GiveReadableTensorNames,
+    GiveUniqueNodeNames,
+    RemoveStaticGraphInputs,
+    RemoveUnusedTensors,
+)
+from qonnx.transformation.infer_data_layouts import InferDataLayouts
+from qonnx.transformation.infer_datatypes import InferDataTypes
+from qonnx.transformation.infer_shapes import InferShapes
+from qonnx.transformation.insert_topk import InsertTopK
+from qonnx.transformation.lower_convs_to_matmul import LowerConvsToMatMul
+from qonnx.transformation.merge_onnx_models import MergeONNXModels
 from qonnx.util.cleanup import cleanup as qonnx_cleanup
 from scipy.stats import linregress
 
 import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
 import finn.transformation.streamline.absorb as absorb
 from finn.analysis.fpgadataflow.dataflow_performance import dataflow_performance
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
 from finn.core.onnx_exec import execute_onnx
 from finn.core.throughput_test import throughput_test_remote, throughput_test_rtlsim
-from finn.custom_op.registry import getCustomOp
-from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
-from finn.transformation.fold_constants import FoldConstants
 from finn.transformation.fpgadataflow.annotate_cycles import AnnotateCycles
 from finn.transformation.fpgadataflow.annotate_resources import AnnotateResources
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
@@ -71,18 +83,6 @@ from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.fpgadataflow.set_fifo_depths import InsertAndSetFIFODepths
-from finn.transformation.general import (
-    GiveReadableTensorNames,
-    GiveUniqueNodeNames,
-    RemoveStaticGraphInputs,
-    RemoveUnusedTensors,
-)
-from finn.transformation.infer_data_layouts import InferDataLayouts
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.insert_topk import InsertTopK
-from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
-from finn.transformation.merge_onnx_models import MergeONNXModels
 from finn.transformation.move_reshape import RemoveCNVtoFCFlatten
 from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
 from finn.transformation.streamline import Streamline
@@ -314,6 +314,7 @@ def topology2dataset(topology):
 @pytest.mark.parametrize("abits", [1, 2])
 @pytest.mark.parametrize("topology", ["lfc", "tfc", "cnv"])
 @pytest.mark.parametrize("QONNX_export", [False, True])
+@pytest.mark.end2end
 class TestEnd2End:
     def test_export(self, topology, wbits, abits, QONNX_export):
         if wbits > abits:
@@ -672,6 +673,9 @@ class TestEnd2End:
     @pytest.mark.vitis
     @pytest.mark.parametrize("kind", ["zynq", "alveo"])
     def test_build(self, topology, wbits, abits, QONNX_export, kind):
+        # temporarily adding skip for alveo builds
+        if kind == "alveo":
+            pytest.skip("Alveo tests temporarily excluded")
         if kind == "alveo" and ("VITIS_PATH" not in os.environ):
             pytest.skip("VITIS_PATH not set")
         prev_chkpt_name = get_checkpoint_name(
@@ -694,6 +698,9 @@ class TestEnd2End:
     @pytest.mark.vitis
     @pytest.mark.parametrize("kind", ["zynq", "alveo"])
     def test_make_pynq_driver(self, topology, wbits, abits, QONNX_export, kind):
+        # temporarily adding skip for alveo builds
+        if kind == "alveo":
+            pytest.skip("Alveo tests temporarily excluded")
         if kind == "alveo" and ("VITIS_PATH" not in os.environ):
             pytest.skip("VITIS_PATH not set")
         prev_chkpt_name = get_checkpoint_name(
@@ -708,6 +715,9 @@ class TestEnd2End:
 
     @pytest.mark.parametrize("kind", ["zynq", "alveo"])
     def test_deploy(self, topology, wbits, abits, QONNX_export, kind):
+        # temporarily adding skip for alveo builds
+        if kind == "alveo":
+            pytest.skip("Alveo tests temporarily excluded")
         prev_chkpt_name = get_checkpoint_name(
             topology, wbits, abits, QONNX_export, "driver_" + kind
         )
@@ -731,6 +741,9 @@ class TestEnd2End:
 
     @pytest.mark.parametrize("kind", ["zynq", "alveo"])
     def test_run_on_hw(self, topology, wbits, abits, QONNX_export, kind):
+        # temporarily adding skip for alveo builds
+        if kind == "alveo":
+            pytest.skip("Alveo tests temporarily excluded")
         prev_chkpt_name = get_checkpoint_name(
             topology, wbits, abits, QONNX_export, "deploy_" + kind
         )
@@ -755,6 +768,9 @@ class TestEnd2End:
 
     @pytest.mark.parametrize("kind", ["zynq", "alveo"])
     def test_throughput_hw(self, topology, wbits, abits, QONNX_export, kind):
+        # temporarily adding skip for alveo builds
+        if kind == "alveo":
+            pytest.skip("Alveo tests temporarily excluded")
         prev_chkpt_name = get_checkpoint_name(
             topology, wbits, abits, QONNX_export, "deploy_" + kind
         )
diff --git a/tests/end2end/test_end2end_cybsec_mlp.py b/tests/end2end/test_end2end_cybsec_mlp.py
index 2da2ac467478d3a3a1427f1c3837d54dd250ae7f..b6482dc96c4d866618d19d810fa9385b20aa0222 100644
--- a/tests/end2end/test_end2end_cybsec_mlp.py
+++ b/tests/end2end/test_end2end_cybsec_mlp.py
@@ -43,12 +43,12 @@ from brevitas.core.quant import QuantType
 from brevitas.export.onnx.generic.manager import BrevitasONNXManager
 from brevitas.nn import QuantIdentity, QuantLinear, QuantReLU
 from brevitas.quant_tensor import QuantTensor
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
 from qonnx.util.cleanup import cleanup as qonnx_cleanup
 
 import finn.builder.build_dataflow as build
 import finn.builder.build_dataflow_config as build_cfg
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
 from finn.util.basic import make_build_dir
 from finn.util.test import get_build_env, load_test_checkpoint_or_skip
@@ -86,6 +86,7 @@ class CybSecMLPForExport(nn.Module):
 
 
 @pytest.mark.parametrize("QONNX_export", [False, True])
+@pytest.mark.end2end
 def test_end2end_cybsec_mlp_export(QONNX_export):
     assets_dir = pk.resource_filename("finn.qnn-data", "cybsec-mlp/")
     # load up trained net in Brevitas
@@ -180,6 +181,7 @@ def test_end2end_cybsec_mlp_export(QONNX_export):
 
 @pytest.mark.slow
 @pytest.mark.vivado
+@pytest.mark.end2end
 @pytest.mark.parametrize("QONNX_export", [False, True])
 def test_end2end_cybsec_mlp_build(QONNX_export):
     model_file = get_checkpoint_name("export", QONNX_export)
@@ -226,6 +228,7 @@ def test_end2end_cybsec_mlp_build(QONNX_export):
     shutil.copytree(output_dir + "/deploy", get_checkpoint_name("build", QONNX_export))
 
 
+@pytest.mark.end2end
 @pytest.mark.parametrize("QONNX_export", [False, True])
 def test_end2end_cybsec_mlp_run_on_hw(QONNX_export):
     build_env = get_build_env(build_kind, target_clk_ns)
diff --git a/tests/end2end/test_end2end_mobilenet_v1.py b/tests/end2end/test_end2end_mobilenet_v1.py
index 792da28a79f52166afbd8fb9d253a0668f4f9a6d..2f4df956acb79c2c4047e6430ccb6f17b76be2e0 100644
--- a/tests/end2end/test_end2end_mobilenet_v1.py
+++ b/tests/end2end/test_end2end_mobilenet_v1.py
@@ -33,36 +33,36 @@ import os
 import time
 import torch
 from PIL import Image
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.change_datalayout import ChangeDataLayoutQuantAvgPool2d
+from qonnx.transformation.double_to_single_float import DoubleToSingleFloat
+from qonnx.transformation.fold_constants import FoldConstants
+from qonnx.transformation.general import (
+    GiveReadableTensorNames,
+    GiveUniqueNodeNames,
+    GiveUniqueParameterTensors,
+    RemoveUnusedTensors,
+)
+from qonnx.transformation.infer_data_layouts import InferDataLayouts
+from qonnx.transformation.infer_datatypes import InferDataTypes
+from qonnx.transformation.infer_shapes import InferShapes
+from qonnx.transformation.insert_topk import InsertTopK
+from qonnx.transformation.lower_convs_to_matmul import LowerConvsToMatMul
+from qonnx.transformation.merge_onnx_models import MergeONNXModels
+from qonnx.transformation.remove import RemoveIdentityOps
 
 import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
 import finn.transformation.streamline.absorb as absorb
 import finn.transformation.streamline.reorder as reorder
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
 from finn.core.onnx_exec import execute_onnx
-from finn.custom_op.registry import getCustomOp
-from finn.transformation.change_datalayout import ChangeDataLayoutQuantAvgPool2d
-from finn.transformation.double_to_single_float import DoubleToSingleFloat
-from finn.transformation.fold_constants import FoldConstants
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.create_dataflow_partition import (
     CreateDataflowPartition,
 )
 from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-from finn.transformation.general import (
-    GiveReadableTensorNames,
-    GiveUniqueNodeNames,
-    GiveUniqueParameterTensors,
-    RemoveUnusedTensors,
-)
-from finn.transformation.infer_data_layouts import InferDataLayouts
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.insert_topk import InsertTopK
-from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
-from finn.transformation.merge_onnx_models import MergeONNXModels
-from finn.transformation.remove import RemoveIdentityOps
 from finn.transformation.streamline import Streamline
 from finn.transformation.streamline.collapse_repeated import CollapseRepeatedMul
 from finn.transformation.streamline.round_thresholds import RoundAndClipThresholds
@@ -87,6 +87,7 @@ extra_fold = 1
 first_layer_res_type = "dsp"
 
 
+@pytest.mark.end2end
 def test_end2end_mobilenet_export():
     # export preprocessing
     preproc_onnx = build_dir + "/end2end_mobilenet_preproc.onnx"
@@ -142,6 +143,7 @@ def test_end2end_mobilenet_export():
     assert os.path.isfile(build_dir + "/end2end_mobilenet_preproc.onnx")
 
 
+@pytest.mark.end2end
 def test_end2end_mobilenet_tidy_and_merge_with_preproc():
     preproc_model = load_test_checkpoint_or_skip(
         build_dir + "/end2end_mobilenet_preproc.onnx"
@@ -164,6 +166,7 @@ def test_end2end_mobilenet_tidy_and_merge_with_preproc():
     model.save(build_dir + "/end2end_mobilenet_tidy.onnx")
 
 
+@pytest.mark.end2end
 def test_end2end_mobilenet_streamline():
     model = load_test_checkpoint_or_skip(build_dir + "/end2end_mobilenet_tidy.onnx")
     model = model.transform(Streamline())
@@ -194,6 +197,7 @@ def test_end2end_mobilenet_streamline():
     assert len(model.get_nodes_by_op_type("Mul")) == 0  # no Mul ops remain
 
 
+@pytest.mark.end2end
 def test_end2end_mobilenet_lowering():
     model = load_test_checkpoint_or_skip(
         build_dir + "/end2end_mobilenet_streamlined.onnx"
@@ -208,6 +212,7 @@ def test_end2end_mobilenet_lowering():
     model.save(build_dir + "/end2end_mobilenet_lowered.onnx")
 
 
+@pytest.mark.end2end
 def test_end2end_mobilenet_convert_to_hls_layers():
     model = load_test_checkpoint_or_skip(build_dir + "/end2end_mobilenet_lowered.onnx")
     model = model.transform(to_hls.InferPool_Batch())
@@ -222,6 +227,7 @@ def test_end2end_mobilenet_convert_to_hls_layers():
     model.save(build_dir + "/end2end_mobilenet_hls_layers.onnx")
 
 
+@pytest.mark.end2end
 def test_end2end_mobilenet_folding():
     model = load_test_checkpoint_or_skip(
         build_dir + "/end2end_mobilenet_hls_layers.onnx"
@@ -285,6 +291,7 @@ def test_end2end_mobilenet_folding():
     model.save(build_dir + "/end2end_mobilenet_folded.onnx")
 
 
+@pytest.mark.end2end
 def test_end2end_mobilenet_create_dataflow_partition():
     model = load_test_checkpoint_or_skip(build_dir + "/end2end_mobilenet_folded.onnx")
     parent_model = model.transform(CreateDataflowPartition())
@@ -299,6 +306,7 @@ def test_end2end_mobilenet_create_dataflow_partition():
 
 @pytest.mark.slow
 @pytest.mark.vivado
+@pytest.mark.end2end
 @pytest.mark.xfail
 def test_end2end_mobilenet_cppsim():
     model = load_test_checkpoint_or_skip(build_dir + "/end2end_mobilenet_folded.onnx")
diff --git a/tests/end2end/test_ext_weights.py b/tests/end2end/test_ext_weights.py
index 550dab4d0321001547efe97487abc543271dcf2e..9483ccf0b27ebc385ed017d0a0b316ab189a1f96 100644
--- a/tests/end2end/test_ext_weights.py
+++ b/tests/end2end/test_ext_weights.py
@@ -68,6 +68,7 @@ def get_checkpoint_name(step):
         return build_dir + "/end2end_ext_weights_%s.onnx" % (step)
 
 
+@pytest.mark.end2end
 def test_end2end_ext_weights_download():
     if not os.path.isfile(onnx_zip_local):
         wget.download(onnx_zip_url, out=onnx_zip_local)
@@ -78,6 +79,7 @@ def test_end2end_ext_weights_download():
 
 @pytest.mark.slow
 @pytest.mark.vivado
+@pytest.mark.end2end
 def test_end2end_ext_weights_build():
     model_file = get_checkpoint_name("download")
     load_test_checkpoint_or_skip(model_file)
@@ -110,6 +112,7 @@ def test_end2end_ext_weights_build():
 
 
 @pytest.mark.board
+@pytest.mark.end2end
 def test_end2end_ext_weights_dataset():
     # make sure we have local copies of mnist dataset files
     subprocess.check_output(["mkdir", "-p", mnist_local])
@@ -125,6 +128,7 @@ def test_end2end_ext_weights_dataset():
     subprocess.check_output(rsync_dataset_cmd)
 
 
+@pytest.mark.end2end
 def test_end2end_ext_weights_run_on_hw():
     build_env = get_build_env(build_kind, target_clk_ns)
     deploy_dir = get_checkpoint_name("build")
diff --git a/tests/fpgadataflow/test_code_gen_trafo.py b/tests/fpgadataflow/test_code_gen_trafo.py
index f3649f5f00eb811df524c6ff28ca3728e6fc8faf..49ee32c71ee941ff7435d4c12ccadae3f8e55c5e 100644
--- a/tests/fpgadataflow/test_code_gen_trafo.py
+++ b/tests/fpgadataflow/test_code_gen_trafo.py
@@ -30,10 +30,10 @@ import pytest
 
 import os
 from onnx import TensorProto, helper
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.util.basic import gen_finn_dt_tensor, get_by_name
 
-import finn.util.basic as util
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 
 
@@ -76,12 +76,12 @@ def test_code_gen_trafo():
     model.set_tensor_datatype("inp", idt)
     model.set_tensor_datatype("outp", odt)
     model.set_tensor_datatype("weights", wdt)
-    W = util.gen_finn_dt_tensor(wdt, (mw, mh))
+    W = gen_finn_dt_tensor(wdt, (mw, mh))
     model.set_initializer("weights", W)
 
     model = model.transform(PrepareCppSim())
     for node in model.graph.node:
-        code_gen_attribute = util.get_by_name(node.attribute, "code_gen_dir_cppsim")
+        code_gen_attribute = get_by_name(node.attribute, "code_gen_dir_cppsim")
         tmp_dir = code_gen_attribute.s.decode("UTF-8")
         assert os.path.isdir(
             tmp_dir
diff --git a/tests/fpgadataflow/test_compilation_trafo.py b/tests/fpgadataflow/test_compilation_trafo.py
index c18fb2ede84b2b9a6401cf9e148e2c8cb8646329..9bafb101cedabc99d97356069c883cab4ed8a87f 100644
--- a/tests/fpgadataflow/test_compilation_trafo.py
+++ b/tests/fpgadataflow/test_compilation_trafo.py
@@ -30,10 +30,10 @@ import pytest
 
 import os
 from onnx import TensorProto, helper
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.util.basic import gen_finn_dt_tensor, get_by_name
 
-import finn.util.basic as util
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 
@@ -77,13 +77,13 @@ def test_compilation_trafo():
     model.set_tensor_datatype("inp", idt)
     model.set_tensor_datatype("outp", odt)
     model.set_tensor_datatype("weights", wdt)
-    W = util.gen_finn_dt_tensor(wdt, (mw, mh))
+    W = gen_finn_dt_tensor(wdt, (mw, mh))
     model.set_initializer("weights", W)
 
     model = model.transform(PrepareCppSim())
     model = model.transform(CompileCppSim())
     for node in model.graph.node:
-        compilation_attribute = util.get_by_name(node.attribute, "executable_path")
+        compilation_attribute = get_by_name(node.attribute, "executable_path")
         executable = compilation_attribute.s.decode("UTF-8")
         print(executable)
         assert os.path.isfile(
diff --git a/tests/fpgadataflow/test_convert_to_hls_1d_conv_layer.py b/tests/fpgadataflow/test_convert_to_hls_1d_conv_layer.py
index 8cb0360bae1790e6dd49f6d34f372ebaea0e79c8..7b3e20616410f54e4718290baec9a510a0d49c0d 100644
--- a/tests/fpgadataflow/test_convert_to_hls_1d_conv_layer.py
+++ b/tests/fpgadataflow/test_convert_to_hls_1d_conv_layer.py
@@ -30,25 +30,25 @@ import pytest
 
 import numpy as np
 from onnx import TensorProto, helper
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.general.im2col import compute_conv_output_dim
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.general import GiveUniqueNodeNames
+from qonnx.transformation.infer_datatypes import InferDataTypes
+from qonnx.transformation.infer_shapes import InferShapes
+from qonnx.transformation.lower_convs_to_matmul import LowerConvsToMatMul
+from qonnx.util.basic import gen_finn_dt_tensor
 
 import finn.core.onnx_exec as oxe
 import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
 from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
-from finn.custom_op.general.im2col import compute_conv_output_dim
-from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-from finn.transformation.general import GiveUniqueNodeNames
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
-from finn.util.basic import gen_finn_dt_tensor
 
 
 # conv_config:
diff --git a/tests/fpgadataflow/test_convert_to_hls_channelwise_layer.py b/tests/fpgadataflow/test_convert_to_hls_channelwise_layer.py
index 946b748e583297c2e2fa52d73fed5f13fcba14ab..0f19b6d79ab0ed77981022f286fabd430094d69f 100644
--- a/tests/fpgadataflow/test_convert_to_hls_channelwise_layer.py
+++ b/tests/fpgadataflow/test_convert_to_hls_channelwise_layer.py
@@ -30,21 +30,21 @@ import pytest
 
 import numpy as np
 from onnx import TensorProto, helper
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.general import GiveUniqueNodeNames
+from qonnx.transformation.infer_data_layouts import InferDataLayouts
+from qonnx.transformation.infer_shapes import InferShapes
+from qonnx.util.basic import gen_finn_dt_tensor
 
 import finn.core.onnx_exec as oxe
 import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-from finn.transformation.general import GiveUniqueNodeNames
-from finn.transformation.infer_data_layouts import InferDataLayouts
-from finn.transformation.infer_shapes import InferShapes
-from finn.util.basic import gen_finn_dt_tensor
 
 
 def prepare_inputs(input_tensor):
diff --git a/tests/fpgadataflow/test_convert_to_hls_conv_fc_transition.py b/tests/fpgadataflow/test_convert_to_hls_conv_fc_transition.py
index f24659f602fd2bac458dc0408c5aa603c62c1767..0760ff9b37487f4a1ac06853055d2e47b7269f9e 100755
--- a/tests/fpgadataflow/test_convert_to_hls_conv_fc_transition.py
+++ b/tests/fpgadataflow/test_convert_to_hls_conv_fc_transition.py
@@ -29,27 +29,27 @@
 import pytest
 
 import numpy as np
+import qonnx.core.data_layout as DataLayout
 from onnx import TensorProto, helper
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.general.im2col import compute_conv_output_dim
+from qonnx.transformation.general import GiveUniqueNodeNames, RemoveUnusedTensors
+from qonnx.transformation.infer_data_layouts import InferDataLayouts
+from qonnx.transformation.infer_datatypes import InferDataTypes
+from qonnx.transformation.infer_shapes import InferShapes
+from qonnx.transformation.lower_convs_to_matmul import LowerConvsToMatMul
+from qonnx.util.basic import gen_finn_dt_tensor
 
-import finn.core.data_layout as DataLayout
 import finn.core.onnx_exec as oxe
 import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
 import finn.transformation.streamline.absorb as absorb
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
-from finn.custom_op.general.im2col import compute_conv_output_dim
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-from finn.transformation.general import GiveUniqueNodeNames, RemoveUnusedTensors
-from finn.transformation.infer_data_layouts import InferDataLayouts
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
 from finn.transformation.move_reshape import RemoveCNVtoFCFlatten
 from finn.transformation.streamline import Streamline
 from finn.transformation.streamline.reorder import MoveScalarLinearPastInvariants
-from finn.util.basic import gen_finn_dt_tensor
 
 
 def get_multithreshold_rand_params(channels, num_of_thres, seed=None):
@@ -163,7 +163,7 @@ def test_convert_to_hls_conv_fc_transition(conv_config, depthwise, use_reshape):
                     "MultiThreshold",
                     ["conv_out", "thres1_param"],
                     ["thres1_out"],
-                    domain="finn.custom_op.general",
+                    domain="qonnx.custom_op.general",
                     out_dtype="UINT4",
                 ),
                 flatten_node,
@@ -174,7 +174,7 @@ def test_convert_to_hls_conv_fc_transition(conv_config, depthwise, use_reshape):
                     "MultiThreshold",
                     ["matmul_out", "thres2_param"],
                     ["global_out"],
-                    domain="finn.custom_op.general",
+                    domain="qonnx.custom_op.general",
                     out_dtype="UINT4",
                 ),
             ],
@@ -202,7 +202,7 @@ def test_convert_to_hls_conv_fc_transition(conv_config, depthwise, use_reshape):
     model.set_initializer(
         "matmul_param", gen_finn_dt_tensor(fc_weight_dt, fc_param_shape)
     )
-    model.set_initializer("reshape_shape", np.array([1, -1]))
+    model.set_initializer("reshape_shape", np.array([1, -1], dtype=np.int64))
 
     model = model.transform(InferShapes())
     model = model.transform(InferDataTypes())
diff --git a/tests/fpgadataflow/test_convert_to_hls_conv_layer.py b/tests/fpgadataflow/test_convert_to_hls_conv_layer.py
index dc102a0e550544a61536ea6fbfc8b0dba0c7457b..55dc77cafb898ead28a7cbb9641e0b40db276919 100644
--- a/tests/fpgadataflow/test_convert_to_hls_conv_layer.py
+++ b/tests/fpgadataflow/test_convert_to_hls_conv_layer.py
@@ -30,25 +30,25 @@ import pytest
 
 import numpy as np
 from onnx import TensorProto, helper
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.general.im2col import compute_conv_output_dim
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.general import GiveUniqueNodeNames
+from qonnx.transformation.infer_datatypes import InferDataTypes
+from qonnx.transformation.infer_shapes import InferShapes
+from qonnx.transformation.lower_convs_to_matmul import LowerConvsToMatMul
+from qonnx.util.basic import gen_finn_dt_tensor
 
 import finn.core.onnx_exec as oxe
 import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
 from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
-from finn.custom_op.general.im2col import compute_conv_output_dim
-from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-from finn.transformation.general import GiveUniqueNodeNames
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
-from finn.util.basic import gen_finn_dt_tensor
 
 # conv_config  kernel_size,stride, pad
 
diff --git a/tests/fpgadataflow/test_convert_to_hls_layers_cnv.py b/tests/fpgadataflow/test_convert_to_hls_layers_cnv.py
index 32c338b7189af2d16ba540df026c174f58821e05..9997f28438db113e85ce92138b3c08b223185a2c 100644
--- a/tests/fpgadataflow/test_convert_to_hls_layers_cnv.py
+++ b/tests/fpgadataflow/test_convert_to_hls_layers_cnv.py
@@ -33,21 +33,21 @@ import pytest
 import brevitas.onnx as bo
 import numpy as np
 import os
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
+from qonnx.transformation.fold_constants import FoldConstants
+from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
+from qonnx.transformation.infer_data_layouts import InferDataLayouts
+from qonnx.transformation.infer_shapes import InferShapes
+from qonnx.transformation.lower_convs_to_matmul import LowerConvsToMatMul
 
 import finn.core.onnx_exec as oxe
 import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
 import finn.transformation.streamline.absorb as absorb
-from finn.core.modelwrapper import ModelWrapper
-from finn.custom_op.registry import getCustomOp
-from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
-from finn.transformation.fold_constants import FoldConstants
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
-from finn.transformation.infer_data_layouts import InferDataLayouts
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
 from finn.transformation.streamline import Streamline
 from finn.transformation.streamline.reorder import MakeMaxPoolNHWC
 from finn.util.test import get_test_model_trained
diff --git a/tests/fpgadataflow/test_convert_to_hls_layers_fc.py b/tests/fpgadataflow/test_convert_to_hls_layers_fc.py
index 2f101fde4775e76caaf57970ab3083589789343a..fd4e3679d7f19471509f8144ac72b4964f5b4a52 100644
--- a/tests/fpgadataflow/test_convert_to_hls_layers_fc.py
+++ b/tests/fpgadataflow/test_convert_to_hls_layers_fc.py
@@ -35,19 +35,19 @@ import onnx.numpy_helper as nph
 import os
 import torch
 from pkgutil import get_data
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
+from qonnx.transformation.fold_constants import FoldConstants
+from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
+from qonnx.transformation.infer_shapes import InferShapes
 
 import finn.core.onnx_exec as oxe
 import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
 import finn.transformation.streamline.absorb as absorb
-from finn.core.modelwrapper import ModelWrapper
-from finn.custom_op.registry import getCustomOp
-from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
-from finn.transformation.fold_constants import FoldConstants
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
-from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.streamline import Streamline
 from finn.transformation.streamline.round_thresholds import RoundAndClipThresholds
 from finn.util.test import get_test_model_trained
@@ -111,7 +111,7 @@ def test_convert_to_hls_layers_tfc_w1a1():
     model = model.transform(CompileCppSim())
     model = model.transform(SetExecMode("cppsim"))
 
-    raw_i = get_data("finn.data", "onnx/mnist-conv/test_data_set_0/input_0.pb")
+    raw_i = get_data("qonnx.data", "onnx/mnist-conv/test_data_set_0/input_0.pb")
     input_tensor = onnx.load_tensor_from_string(raw_i)
     # run using FINN-based execution
     input_dict = {"global_in": nph.to_array(input_tensor)}
@@ -177,7 +177,7 @@ def test_convert_to_hls_layers_tfc_w1a2():
     model = model.transform(PrepareCppSim())
     model = model.transform(CompileCppSim())
     model = model.transform(SetExecMode("cppsim"))
-    raw_i = get_data("finn.data", "onnx/mnist-conv/test_data_set_0/input_0.pb")
+    raw_i = get_data("qonnx.data", "onnx/mnist-conv/test_data_set_0/input_0.pb")
     input_tensor = onnx.load_tensor_from_string(raw_i)
     # run using FINN-based execution
     input_dict = {"global_in": nph.to_array(input_tensor)}
diff --git a/tests/fpgadataflow/test_convert_to_hls_layers_synthetic.py b/tests/fpgadataflow/test_convert_to_hls_layers_synthetic.py
index 4d72dd0a8420a1e5f6b8fce7dde1905fadc433b7..79a48793e0c4f062654e43aadcaf09ebf6d7da5b 100644
--- a/tests/fpgadataflow/test_convert_to_hls_layers_synthetic.py
+++ b/tests/fpgadataflow/test_convert_to_hls_layers_synthetic.py
@@ -31,24 +31,25 @@ import pytest
 import numpy as np
 import os
 from onnx import TensorProto, helper
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.fold_constants import FoldConstants
+from qonnx.transformation.general import (
+    GiveReadableTensorNames,
+    GiveUniqueNodeNames,
+    SortGraph,
+)
+from qonnx.transformation.infer_data_layouts import InferDataLayouts
+from qonnx.transformation.infer_datatypes import InferDataTypes
+from qonnx.transformation.infer_shapes import InferShapes
+from qonnx.transformation.insert_topk import InsertTopK
+from qonnx.util.basic import gen_finn_dt_tensor
 
 import finn.core.onnx_exec as oxe
 import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.fold_constants import FoldConstants
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-from finn.transformation.general import (
-    GiveReadableTensorNames,
-    GiveUniqueNodeNames,
-    SortGraph,
-)
-from finn.transformation.infer_data_layouts import InferDataLayouts
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.insert_topk import InsertTopK
 from finn.transformation.streamline.absorb import (
     AbsorbConsecutiveTransposes,
     AbsorbScalarMulAddIntoTopK,
@@ -61,7 +62,6 @@ from finn.transformation.streamline.reorder import (
     MoveAddPastMul,
     MoveScalarLinearPastInvariants,
 )
-from finn.util.basic import gen_finn_dt_tensor
 from finn.util.test import soft_verify_topk
 
 export_onnx_path = "test_output_synthetic.onnx"
@@ -127,12 +127,12 @@ def make_model(ch, ifmdim):
     model = ModelWrapper(model)
 
     # set initializers for scalar add/mul nodes
-    model.set_initializer(add0_node.input[1], np.array([0.0]))
-    model.set_initializer(add1_node.input[1], np.array([7.0]))
-    model.set_initializer(add2_node.input[1], np.array([8.0]))
-    model.set_initializer(mul1_node.input[1], np.array([2.0]))
-    model.set_initializer(mul2_node.input[1], np.array([2.0]))
-    model.set_initializer(reshape_node.input[1], np.array([1, -1]))
+    model.set_initializer(add0_node.input[1], np.array([0.0], dtype=np.float32))
+    model.set_initializer(add1_node.input[1], np.array([7.0], dtype=np.float32))
+    model.set_initializer(add2_node.input[1], np.array([8.0], dtype=np.float32))
+    model.set_initializer(mul1_node.input[1], np.array([2.0], dtype=np.float32))
+    model.set_initializer(mul2_node.input[1], np.array([2.0], dtype=np.float32))
+    model.set_initializer(reshape_node.input[1], np.array([1, -1], dtype=np.int64))
 
     return model
 
diff --git a/tests/fpgadataflow/test_convert_to_hls_pool_batch.py b/tests/fpgadataflow/test_convert_to_hls_pool_batch.py
index 7595275c3be34e947f40415d050c0f3e4a9a7a58..ef9bd7a13dcecf7aa61ecb982ac6393d7813a4d5 100644
--- a/tests/fpgadataflow/test_convert_to_hls_pool_batch.py
+++ b/tests/fpgadataflow/test_convert_to_hls_pool_batch.py
@@ -30,22 +30,22 @@ import pytest
 
 import numpy as np
 from onnx import TensorProto, helper
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.general import GiveUniqueNodeNames
+from qonnx.transformation.infer_shapes import InferShapes
+from qonnx.util.basic import gen_finn_dt_tensor
 
 import finn.core.onnx_exec as oxe
 import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
 from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
-from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-from finn.transformation.general import GiveUniqueNodeNames
-from finn.transformation.infer_shapes import InferShapes
-from finn.util.basic import gen_finn_dt_tensor
 
 
 def make_single_maxpool_modelwrapper(
@@ -100,7 +100,7 @@ def make_single_quantavpool_modelwrapper(k, stride, ifm_ch, ifm_dim, ofm_dim, id
         "QuantAvgPool2d",
         ["inp"],
         ["outp"],
-        domain="finn.custom_op.general",
+        domain="qonnx.custom_op.general",
         stride=stride,
         kernel=k,
         ibits=idt.bitwidth(),
diff --git a/tests/fpgadataflow/test_depthwise_convolution.py b/tests/fpgadataflow/test_depthwise_convolution.py
index 24c8b0d028222380bd7fa36887c59383a75b0229..5228ade3d0f4db3bd99f5fcccb7aee41f57ed73b 100644
--- a/tests/fpgadataflow/test_depthwise_convolution.py
+++ b/tests/fpgadataflow/test_depthwise_convolution.py
@@ -31,12 +31,15 @@ import pytest
 import numpy as np
 import onnx.helper as oh
 from onnx import TensorProto
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.general.im2col import compute_conv_output_dim
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.general import GiveUniqueNodeNames
+from qonnx.transformation.infer_shapes import InferShapes
+from qonnx.util.basic import calculate_signed_dot_prod_range, gen_finn_dt_tensor
 
 import finn.core.onnx_exec as oxe
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
-from finn.custom_op.general.im2col import compute_conv_output_dim
-from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.convert_to_hls_layers import (
     InferConvInpGen,
@@ -47,9 +50,6 @@ from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-from finn.transformation.general import GiveUniqueNodeNames
-from finn.transformation.infer_shapes import InferShapes
-from finn.util.basic import calculate_signed_dot_prod_range, gen_finn_dt_tensor
 
 
 def set_up_reference_model(act, idt, wdt, k, ifm_dim, ifm_ch, stride, padding):
@@ -70,7 +70,7 @@ def set_up_reference_model(act, idt, wdt, k, ifm_dim, ifm_ch, stride, padding):
         tdt = DataType["INT32"]
         thresh_node = oh.make_node(
             "MultiThreshold",
-            domain="finn.custom_op.general",
+            domain="qonnx.custom_op.general",
             inputs=["outp", "T"],
             outputs=["out_act"],
             data_layout="NHWC",
@@ -93,7 +93,7 @@ def set_up_reference_model(act, idt, wdt, k, ifm_dim, ifm_ch, stride, padding):
 
     im2col_node = oh.make_node(
         "Im2Col",
-        domain="finn.custom_op.general",
+        domain="qonnx.custom_op.general",
         inputs=["inp"],
         outputs=["im2col_out"],
         kernel_size=[k, k],
@@ -133,7 +133,7 @@ def set_up_reference_model(act, idt, wdt, k, ifm_dim, ifm_ch, stride, padding):
 
     w_tensor = gen_finn_dt_tensor(wdt, [ofm_ch, 1, k, k])
     # create sparse matrix
-    W_matrix = np.zeros((ofm_ch, ifm_ch, k, k))
+    W_matrix = np.zeros((ofm_ch, ifm_ch, k, k), dtype=np.float32)
     for ch in range(ifm_ch):
         W_matrix[ch][ch] = w_tensor[ch][0]
     W_matrix = W_matrix.astype(np.float32)
diff --git a/tests/fpgadataflow/test_fpgadataflow_addstreams.py b/tests/fpgadataflow/test_fpgadataflow_addstreams.py
index a3927cd2aa6a9e87c32068f986ab6030fbacc559..6d881f45b60384d9a78b5d9f9705581a10b48e6c 100644
--- a/tests/fpgadataflow/test_fpgadataflow_addstreams.py
+++ b/tests/fpgadataflow/test_fpgadataflow_addstreams.py
@@ -30,20 +30,20 @@ import pytest
 
 import numpy as np
 from onnx import TensorProto, helper
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.general import GiveUniqueNodeNames
+from qonnx.util.basic import gen_finn_dt_tensor
 
 import finn.core.onnx_exec as oxe
 from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
-from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-from finn.transformation.general import GiveUniqueNodeNames
-from finn.util.basic import gen_finn_dt_tensor
 
 
 def make_addstreams_modelwrapper(ch, pe, idt):
diff --git a/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py b/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py
index f774a4ff53c636419d8eb7dcfba866fd601f0c98..ceafda90e54004c7aea8786d003b6adf1defab35 100644
--- a/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py
+++ b/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py
@@ -30,21 +30,21 @@ import pytest
 
 import numpy as np
 from onnx import TensorProto, helper
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.general import GiveUniqueNodeNames
+from qonnx.util.basic import gen_finn_dt_tensor
 
 import finn.core.onnx_exec as oxe
 from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
 from finn.analysis.fpgadataflow.hls_synth_res_estimation import hls_synth_res_estimation
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
-from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-from finn.transformation.general import GiveUniqueNodeNames
-from finn.util.basic import gen_finn_dt_tensor
 
 
 def make_modelwrapper(C, pe, idt, odt, pdt, func, vecs):
diff --git a/tests/fpgadataflow/test_fpgadataflow_checksum.py b/tests/fpgadataflow/test_fpgadataflow_checksum.py
new file mode 100644
index 0000000000000000000000000000000000000000..5e79ea2dad2aa4200f998fd8953672b9f49b2b86
--- /dev/null
+++ b/tests/fpgadataflow/test_fpgadataflow_checksum.py
@@ -0,0 +1,224 @@
+# Copyright (c) 2022, Xilinx, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import pytest
+
+import numpy as np
+from onnx import TensorProto, helper
+from pyverilator.util.axi_utils import axilite_read, axilite_write
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
+from qonnx.transformation.infer_shapes import InferShapes
+from qonnx.util.basic import gen_finn_dt_tensor
+
+import finn.core.onnx_exec as oxe
+from finn.core.rtlsim_exec import rtlsim_exec
+from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
+from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
+from finn.transformation.fpgadataflow.insert_hook import InsertHook
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
+
+test_fpga_part = "xczu3eg-sbva484-1-e"
+target_clk_ns = 5
+
+
+def create_two_fc_model():
+    # create a model with two MatrixVectorActivation instances
+    wdt = DataType["INT2"]
+    idt = DataType["INT32"]
+    odt = DataType["INT32"]
+    m = 4
+    actval = 0
+    no_act = 1
+    binary_xnor_mode = 0
+    pe = 2
+    simd = 2
+
+    inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, m])
+    mid = helper.make_tensor_value_info("mid", TensorProto.FLOAT, [1, m])
+    outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, m])
+
+    fc0 = helper.make_node(
+        "MatrixVectorActivation",
+        ["inp", "w0"],
+        ["mid"],
+        domain="finn.custom_op.fpgadataflow",
+        backend="fpgadataflow",
+        MW=m,
+        MH=m,
+        SIMD=simd,
+        PE=pe,
+        inputDataType=idt.name,
+        weightDataType=wdt.name,
+        outputDataType=odt.name,
+        ActVal=actval,
+        binaryXnorMode=binary_xnor_mode,
+        noActivation=no_act,
+        mem_mode="decoupled",
+    )
+
+    fc1 = helper.make_node(
+        "MatrixVectorActivation",
+        ["mid", "w1"],
+        ["outp"],
+        domain="finn.custom_op.fpgadataflow",
+        backend="fpgadataflow",
+        MW=m,
+        MH=m,
+        SIMD=simd,
+        PE=pe,
+        inputDataType=idt.name,
+        weightDataType=wdt.name,
+        outputDataType=odt.name,
+        ActVal=actval,
+        binaryXnorMode=binary_xnor_mode,
+        noActivation=no_act,
+        mem_mode="decoupled",
+    )
+
+    graph = helper.make_graph(
+        nodes=[fc0, fc1],
+        name="fclayer_graph",
+        inputs=[inp],
+        outputs=[outp],
+        value_info=[mid],
+    )
+
+    model = helper.make_model(graph, producer_name="fclayer-model")
+    model = ModelWrapper(model)
+
+    model.set_tensor_datatype("inp", idt)
+    model.set_tensor_datatype("mid", idt)
+    model.set_tensor_datatype("outp", odt)
+    model.set_tensor_datatype("w0", wdt)
+    model.set_tensor_datatype("w1", wdt)
+
+    # generate weights
+    w0 = np.eye(m, dtype=np.float32)
+    w1 = np.eye(m, dtype=np.float32)
+    model.set_initializer("w0", w0)
+    model.set_initializer("w1", w1)
+
+    return model
+
+
+@pytest.mark.fpgadataflow
+def test_fpgadataflow_checksum():
+    # use a graph consisting of two fc layers to test
+    # checksum node insertion
+    model = create_two_fc_model()
+
+    # set checksum output hook
+    for n in model.graph.node:
+        n0 = getCustomOp(n)
+        n0.set_nodeattr("output_hook", "checksum")
+
+    model = model.transform(InsertHook())
+    model = model.transform(GiveUniqueNodeNames())
+    model = model.transform(GiveReadableTensorNames())
+    model = model.transform(InferShapes())
+
+    assert (
+        len(model.get_nodes_by_op_type("CheckSum")) == 2
+    ), """Insertion of
+        checksum layers was unsuccessful"""
+
+    # to verify the functionality of the checksum layer
+    # cppsim and rtlsim will be compared
+
+    x = gen_finn_dt_tensor(DataType["INT32"], (1, 4))
+
+    # cppsim
+    model = model.transform(SetExecMode("cppsim"))
+    model = model.transform(PrepareCppSim())
+    model = model.transform(CompileCppSim())
+    inp = {"global_in": x}
+    y_cppsim = oxe.execute_onnx(model, inp, return_full_exec_context=True)
+    checksum0_cppsim = y_cppsim["CheckSum_0_out1"]
+    checksum1_cppsim = y_cppsim["CheckSum_1_out1"]
+
+    # in this test case scenario the checksums are equal
+    assert checksum0_cppsim == checksum1_cppsim, "CheckSums are not equal"
+
+    # rtlsim
+    model = model.transform(InsertFIFO(True))
+    model = model.transform(GiveUniqueNodeNames())
+    model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
+    model = model.transform(HLSSynthIP())
+    model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns))
+    model.set_metadata_prop("exec_mode", "rtlsim")
+
+    # define function to read out the checksums from axilite
+    checksums = []
+    drain = []
+
+    def read_checksum_and_drain(sim):
+        chk_addr = 16
+        drain_addr = 32
+        for i in range(len(model.get_nodes_by_op_type("CheckSum"))):
+            axi_name = "s_axi_checksum_{}_".format(i)
+            checksums.append(axilite_read(sim, chk_addr, basename=axi_name))
+            drain.append(axilite_read(sim, drain_addr, basename=axi_name))
+
+    drain_value = False
+
+    def write_drain(sim):
+        addr = 32
+        for i in range(len(model.get_nodes_by_op_type("CheckSum"))):
+            axi_name = "s_axi_checksum_{}_".format(i)
+            axilite_write(sim, addr, drain_value, basename=axi_name)
+
+    rtlsim_exec(model, inp, pre_hook=write_drain, post_hook=read_checksum_and_drain)
+    checksum0_rtlsim = int(checksums[0])
+    checksum1_rtlsim = int(checksums[1])
+    checksum0_drain = int(drain[0])
+    checksum1_drain = int(drain[1])
+
+    assert (
+        checksum0_rtlsim == checksum0_cppsim
+    ), """The first checksums do not
+        match in cppsim vs. rtlsim"""
+    assert (
+        checksum1_rtlsim == checksum1_cppsim
+    ), """The second checksums do not
+        match in cppsim vs. rtlsim"""
+
+    assert (
+        checksum0_drain == 0
+    ), "Drain read doesn't match drain write for first checksum"
+    assert (
+        checksum1_drain == 0
+    ), "Drain read doesn't match drain write for second checksum"
+
+    # TODO: test for drain set to true
diff --git a/tests/fpgadataflow/test_fpgadataflow_concat.py b/tests/fpgadataflow/test_fpgadataflow_concat.py
index 8a7d78610132ff71ff92ee6a69ad7e089604463b..dddc470ec2ed88faf078f19bd0d2a7a4a6b5b6cd 100644
--- a/tests/fpgadataflow/test_fpgadataflow_concat.py
+++ b/tests/fpgadataflow/test_fpgadataflow_concat.py
@@ -32,10 +32,12 @@ import numpy as np
 import onnx
 import torch
 from io import BytesIO
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.general import GiveUniqueNodeNames
+from qonnx.util.basic import gen_finn_dt_tensor
 from torch import nn
 
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
 from finn.core.onnx_exec import execute_onnx
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.convert_to_hls_layers import InferConcatLayer
@@ -46,8 +48,6 @@ from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-from finn.transformation.general import GiveUniqueNodeNames
-from finn.util.basic import gen_finn_dt_tensor
 
 
 def make_concat_model(i_shapes, idt):
diff --git a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py
index afac8dc6f30982b63827dcd5a9ee4b70c92235ae..a196ecbb61b74843ddc8efa4ac3c5ab8197e64fe 100644
--- a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py
+++ b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py
@@ -30,20 +30,20 @@ import pytest
 
 import numpy as np
 from onnx import TensorProto, helper
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.general import GiveUniqueNodeNames
+from qonnx.util.basic import gen_finn_dt_tensor
 
 import finn.core.onnx_exec as oxe
 from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
-from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-from finn.transformation.general import GiveUniqueNodeNames
-from finn.util.basic import gen_finn_dt_tensor
 
 
 def make_single_im2col_modelwrapper(
@@ -61,7 +61,7 @@ def make_single_im2col_modelwrapper(
         "Im2Col",
         ["inp"],
         ["outp"],
-        domain="finn.custom_op.general",
+        domain="qonnx.custom_op.general",
         stride=[stride, stride],
         kernel_size=[k, k],
         input_shape=str((1, ifm_dim, ifm_dim, ifm_ch)),
diff --git a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator1d.py b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator1d.py
index 0d8b26632307b2b514c2aacaa96b28989286cd0d..0fc3ca82cfa919079a324160e4876377ac4dc3b4 100644
--- a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator1d.py
+++ b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator1d.py
@@ -30,21 +30,21 @@ import pytest
 
 import numpy as np
 from onnx import TensorProto, helper
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.general.im2col import compute_conv_output_dim
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.general import GiveUniqueNodeNames
+from qonnx.util.basic import gen_finn_dt_tensor
 
 import finn.core.onnx_exec as oxe
 from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
-from finn.custom_op.general.im2col import compute_conv_output_dim
-from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-from finn.transformation.general import GiveUniqueNodeNames
-from finn.util.basic import gen_finn_dt_tensor
 
 fpga_part = "xczu3eg-sbva484-1-e"
 
@@ -70,7 +70,7 @@ def make_single_im2col_modelwrapper(
         "Im2Col",
         ["inp"],
         ["outp"],
-        domain="finn.custom_op.general",
+        domain="qonnx.custom_op.general",
         stride=[stride_h, stride_w],
         kernel_size=[k_h, k_w],
         input_shape=str((1, ifm_dim_h, ifm_dim_w, ifm_ch)),
diff --git a/tests/fpgadataflow/test_fpgadataflow_duplicatestreams.py b/tests/fpgadataflow/test_fpgadataflow_duplicatestreams.py
index 838dec81d32799d5a2afa6cfda8db632b2ac3355..7ec254405d23f0a972de7f9d02d2ab021ed3d959 100644
--- a/tests/fpgadataflow/test_fpgadataflow_duplicatestreams.py
+++ b/tests/fpgadataflow/test_fpgadataflow_duplicatestreams.py
@@ -30,22 +30,22 @@ import pytest
 
 import numpy as np
 from onnx import TensorProto, helper
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.general import GiveUniqueNodeNames
+from qonnx.transformation.infer_datatypes import InferDataTypes
+from qonnx.transformation.infer_shapes import InferShapes
+from qonnx.util.basic import gen_finn_dt_tensor
 
 import finn.core.onnx_exec as oxe
 from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
-from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-from finn.transformation.general import GiveUniqueNodeNames
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.infer_shapes import InferShapes
-from finn.util.basic import gen_finn_dt_tensor
 
 
 def make_dupstreams_modelwrapper(ch, pe, idim, idt, n_dupl):
diff --git a/tests/fpgadataflow/test_fpgadataflow_dwc.py b/tests/fpgadataflow/test_fpgadataflow_dwc.py
index 973bfcca2e9862769b2b973365682cbfbc4b4512..bcf2a1fe3d304ac27a06b544825a84f5757830c9 100644
--- a/tests/fpgadataflow/test_fpgadataflow_dwc.py
+++ b/tests/fpgadataflow/test_fpgadataflow_dwc.py
@@ -29,16 +29,16 @@
 import pytest
 
 from onnx import TensorProto, helper
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.general import GiveUniqueNodeNames
+from qonnx.util.basic import gen_finn_dt_tensor
 
 import finn.core.onnx_exec as oxe
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-from finn.transformation.general import GiveUniqueNodeNames
-from finn.util.basic import gen_finn_dt_tensor
 
 
 def make_single_dwc_modelwrapper(Shape, INWidth, OUTWidth, finn_dtype):
diff --git a/tests/fpgadataflow/test_fpgadataflow_fifo.py b/tests/fpgadataflow/test_fpgadataflow_fifo.py
index 15e7f594ee4916559324f35d42b07de9acc5a2c6..b9c74185d9f104e15355a5dd6021d7e74dac641e 100644
--- a/tests/fpgadataflow/test_fpgadataflow_fifo.py
+++ b/tests/fpgadataflow/test_fpgadataflow_fifo.py
@@ -30,16 +30,16 @@ import pytest
 
 import os
 from onnx import TensorProto, helper
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.general import GiveUniqueNodeNames
+from qonnx.util.basic import gen_finn_dt_tensor
 
 import finn.core.onnx_exec as oxe
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-from finn.transformation.general import GiveUniqueNodeNames
-from finn.util.basic import gen_finn_dt_tensor
 
 build_dir = os.environ["FINN_BUILD_DIR"]
 test_fpga_part = "xc7z020clg400-1"
diff --git a/tests/fpgadataflow/test_fpgadataflow_fmpadding.py b/tests/fpgadataflow/test_fpgadataflow_fmpadding.py
index ce21ea0c321587b4d73b64dbd2729090f141cce8..2e2da0da7a217091d76d0a59a2a36a8e6a28af8e 100644
--- a/tests/fpgadataflow/test_fpgadataflow_fmpadding.py
+++ b/tests/fpgadataflow/test_fpgadataflow_fmpadding.py
@@ -31,21 +31,22 @@ import pytest
 import numpy as np
 import os
 from onnx import TensorProto, helper
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.general import GiveUniqueNodeNames
+from qonnx.transformation.infer_shapes import InferShapes
+from qonnx.util.basic import gen_finn_dt_tensor
 
 import finn.core.onnx_exec as oxe
 from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
-from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-from finn.transformation.general import GiveUniqueNodeNames
-from finn.transformation.infer_shapes import InferShapes
-from finn.util.basic import gen_finn_dt_tensor, pynq_part_map
+from finn.util.basic import pynq_part_map
 
 test_pynq_board = os.getenv("PYNQ_BOARD", default="Pynq-Z1")
 test_fpga_part = pynq_part_map[test_pynq_board]
diff --git a/tests/fpgadataflow/test_fpgadataflow_globalaccpool.py b/tests/fpgadataflow/test_fpgadataflow_globalaccpool.py
index fc622b10e9abcc3b050e30fc275ca927b89c7d9c..a37e6e3271a9f7e033e6beaa6dbed01271365101 100644
--- a/tests/fpgadataflow/test_fpgadataflow_globalaccpool.py
+++ b/tests/fpgadataflow/test_fpgadataflow_globalaccpool.py
@@ -30,20 +30,20 @@ import pytest
 
 import numpy as np
 from onnx import TensorProto, helper
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.general import GiveUniqueNodeNames
+from qonnx.util.basic import gen_finn_dt_tensor
 
 import finn.core.onnx_exec as oxe
 from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
-from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-from finn.transformation.general import GiveUniqueNodeNames
-from finn.util.basic import gen_finn_dt_tensor
 
 
 def make_accpool_modelwrapper(ch, pe, idim, idt):
diff --git a/tests/fpgadataflow/test_fpgadataflow_ipstitch.py b/tests/fpgadataflow/test_fpgadataflow_ipstitch.py
index 5fc934f1cda9715bd77cf00a39c2fb4dc1268abe..a3809e61304ef031407e7fbec0f9037382d999ad 100644
--- a/tests/fpgadataflow/test_fpgadataflow_ipstitch.py
+++ b/tests/fpgadataflow/test_fpgadataflow_ipstitch.py
@@ -31,11 +31,14 @@ import pytest
 import numpy as np
 import os
 from onnx import TensorProto, helper
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.general import GiveUniqueNodeNames
+from qonnx.transformation.infer_data_layouts import InferDataLayouts
+from qonnx.util.basic import gen_finn_dt_tensor
 
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
 from finn.core.onnx_exec import execute_onnx
-from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.create_dataflow_partition import (
     CreateDataflowPartition,
 )
@@ -48,14 +51,7 @@ from finn.transformation.fpgadataflow.make_zynq_proj import ZynqBuild
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.synth_ooc import SynthOutOfContext
 from finn.transformation.fpgadataflow.vitis_build import VitisBuild
-from finn.transformation.general import GiveUniqueNodeNames
-from finn.transformation.infer_data_layouts import InferDataLayouts
-from finn.util.basic import (
-    alveo_default_platform,
-    alveo_part_map,
-    gen_finn_dt_tensor,
-    pynq_part_map,
-)
+from finn.util.basic import alveo_default_platform, alveo_part_map, pynq_part_map
 from finn.util.pyverilator import pyverilate_stitched_ip
 from finn.util.test import load_test_checkpoint_or_skip
 
@@ -364,8 +360,6 @@ def test_fpgadataflow_ipstitch_vitis_end2end(board, period_ns, extw):
 @pytest.mark.fpgadataflow
 @pytest.mark.slow
 @pytest.mark.vivado
-# temporarily marked as xfail
-@pytest.mark.xfail
 def test_fpgadataflow_ipstitch_zynqbuild_end2end(board):
     model = create_two_fc_model()
     if model.graph.node[0].op_type == "StreamingDataflowPartition":
diff --git a/tests/fpgadataflow/test_fpgadataflow_labelselect.py b/tests/fpgadataflow/test_fpgadataflow_labelselect.py
index 2858426d1ee4b1f91f5de807ccded4ffe35a3a40..a9b98ecaf80b4c86fc1e9ccec23e6d97c5982f55 100644
--- a/tests/fpgadataflow/test_fpgadataflow_labelselect.py
+++ b/tests/fpgadataflow/test_fpgadataflow_labelselect.py
@@ -30,18 +30,18 @@ import pytest
 
 import numpy as np
 from onnx import TensorProto, helper
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.general import GiveUniqueNodeNames
+from qonnx.util.basic import gen_finn_dt_tensor
 
 import finn.core.onnx_exec as oxe
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-from finn.transformation.general import GiveUniqueNodeNames
-from finn.util.basic import gen_finn_dt_tensor
 from finn.util.test import soft_verify_topk
 
 
diff --git a/tests/fpgadataflow/test_fpgadataflow_lookup.py b/tests/fpgadataflow/test_fpgadataflow_lookup.py
index 0c284a530319290eb406c6b54a80e4f52d7ed1fa..da4204c81ac55fcde317973602b73a0738e4ff01 100644
--- a/tests/fpgadataflow/test_fpgadataflow_lookup.py
+++ b/tests/fpgadataflow/test_fpgadataflow_lookup.py
@@ -31,12 +31,16 @@ import pytest
 import numpy as np
 import torch
 from brevitas.export import FINNManager
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.general import GiveUniqueNodeNames
+from qonnx.transformation.infer_datatypes import InferDataTypes
+from qonnx.transformation.infer_shapes import InferShapes
+from qonnx.util.basic import gen_finn_dt_tensor
 from torch import nn
 
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
 from finn.core.onnx_exec import execute_onnx
-from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.convert_to_hls_layers import InferLookupLayer
 from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
@@ -45,10 +49,6 @@ from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-from finn.transformation.general import GiveUniqueNodeNames
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.infer_shapes import InferShapes
-from finn.util.basic import gen_finn_dt_tensor
 
 
 def make_lookup_model(embeddings, ishape, idt, edt):
diff --git a/tests/fpgadataflow/test_fpgadataflow_mvau.py b/tests/fpgadataflow/test_fpgadataflow_mvau.py
index 2b638384412f6d1198f3a18949ec7a4c695bf0ed..d1895a12675dce69070d280381a9982060e20c21 100644
--- a/tests/fpgadataflow/test_fpgadataflow_mvau.py
+++ b/tests/fpgadataflow/test_fpgadataflow_mvau.py
@@ -29,24 +29,24 @@
 import pytest
 
 import numpy as np
+import qonnx.custom_op.general.xnorpopcount as xp
 from onnx import TensorProto, helper
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.general.multithreshold import multithreshold
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.general import GiveUniqueNodeNames
+from qonnx.util.basic import calculate_signed_dot_prod_range, gen_finn_dt_tensor
 
 import finn.core.onnx_exec as oxe
-import finn.custom_op.general.xnorpopcount as xp
 from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
 from finn.analysis.fpgadataflow.hls_synth_res_estimation import hls_synth_res_estimation
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
-from finn.custom_op.general.multithreshold import multithreshold
-from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-from finn.transformation.general import GiveUniqueNodeNames
-from finn.util.basic import calculate_signed_dot_prod_range, gen_finn_dt_tensor
 
 
 def make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T=None, tdt=None):
diff --git a/tests/fpgadataflow/test_fpgadataflow_res_estimate.py b/tests/fpgadataflow/test_fpgadataflow_res_estimate.py
index 292a7dc7a38ec3e5f18b4d3ad243a34b4909d63f..e3c79fa44fb57718d359b58d1a8716746f6668fb 100644
--- a/tests/fpgadataflow/test_fpgadataflow_res_estimate.py
+++ b/tests/fpgadataflow/test_fpgadataflow_res_estimate.py
@@ -29,14 +29,14 @@
 import pytest
 
 from onnx import TensorProto, helper
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.general import GiveUniqueNodeNames
 
 from finn.analysis.fpgadataflow.res_estimation import (
     res_estimation,
     res_estimation_complete,
 )
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.general import GiveUniqueNodeNames
 
 
 def check_two_dict_for_equality(dict1, dict2):
diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding.py b/tests/fpgadataflow/test_fpgadataflow_thresholding.py
index 7d95cba11accae4e126d69013850401add4ab9a4..706679b6809844d0b2924411440088ea892ba7a9 100644
--- a/tests/fpgadataflow/test_fpgadataflow_thresholding.py
+++ b/tests/fpgadataflow/test_fpgadataflow_thresholding.py
@@ -31,15 +31,18 @@ import pytest
 import numpy as np
 import os
 from onnx import TensorProto, helper
+from pyverilator.util.axi_utils import axilite_read, axilite_write
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.general.multithreshold import multithreshold
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.general import GiveUniqueNodeNames
+from qonnx.util.basic import gen_finn_dt_tensor
 
 import finn.core.onnx_exec as oxe
 from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
 from finn.analysis.fpgadataflow.hls_synth_res_estimation import hls_synth_res_estimation
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
 from finn.core.rtlsim_exec import rtlsim_exec
-from finn.custom_op.general.multithreshold import multithreshold
-from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
@@ -48,9 +51,6 @@ from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-from finn.transformation.general import GiveUniqueNodeNames
-from finn.util.basic import gen_finn_dt_tensor
-from finn.util.pyverilator import axilite_read, axilite_write
 
 test_fpga_part = "xczu3eg-sbva484-1-e"
 target_clk_ns = 5
diff --git a/tests/fpgadataflow/test_fpgadataflow_upsampler.py b/tests/fpgadataflow/test_fpgadataflow_upsampler.py
index 362d9def1028c46a8ebf1d79649971156b1d57a3..d1ef0b890a66524b7cbd055a413561961ebcb4a7 100644
--- a/tests/fpgadataflow/test_fpgadataflow_upsampler.py
+++ b/tests/fpgadataflow/test_fpgadataflow_upsampler.py
@@ -32,13 +32,18 @@ import numpy as np
 import os
 import torch
 from brevitas.export import FINNManager
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.base import Transformation
+from qonnx.transformation.general import GiveUniqueNodeNames
+from qonnx.transformation.infer_data_layouts import InferDataLayouts
+from qonnx.transformation.infer_datatypes import InferDataTypes
+from qonnx.transformation.infer_shapes import InferShapes
+from qonnx.transformation.make_input_chanlast import MakeInputChannelsLast
 from torch import nn
 
 import finn.core.onnx_exec as oxe
 import finn.transformation.streamline.absorb as absorb
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.base import Transformation
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.convert_to_hls_layers import InferUpsample
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
@@ -46,11 +51,6 @@ from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-from finn.transformation.general import GiveUniqueNodeNames
-from finn.transformation.infer_data_layouts import InferDataLayouts
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.make_input_chanlast import MakeInputChannelsLast
 
 tmpdir = os.environ["FINN_BUILD_DIR"]
 
diff --git a/tests/fpgadataflow/test_fpgadataflow_vvau.py b/tests/fpgadataflow/test_fpgadataflow_vvau.py
index 75e3eab40bac1c1c7edc74c8ae0082fc55b07907..c48448787d8a3bb926c1e94850be6e99e8c106d3 100644
--- a/tests/fpgadataflow/test_fpgadataflow_vvau.py
+++ b/tests/fpgadataflow/test_fpgadataflow_vvau.py
@@ -30,25 +30,25 @@ import pytest
 
 import numpy as np
 from onnx import TensorProto, helper
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.general.multithreshold import multithreshold
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.general import GiveUniqueNodeNames
+from qonnx.util.basic import gen_finn_dt_tensor
 
 import finn.core.onnx_exec as oxe
 from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
-from finn.custom_op.general.multithreshold import multithreshold
-from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-from finn.transformation.general import GiveUniqueNodeNames
-from finn.util.basic import gen_finn_dt_tensor
 
 
 def _infer_sparse_weight_tensor(W_conv, k_h, k_w, channels):
-    W_sparse = np.zeros((channels, channels, k_h, k_w))
+    W_sparse = np.zeros((channels, channels, k_h, k_w), dtype=np.float32)
     for ch in range(channels):
         W_sparse[ch][ch] = W_conv[ch][0]
     W_conv = W_sparse.astype(np.float32)
diff --git a/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py b/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py
index 494aea4dad000ff6d6bf61e9e38440b727d90dc7..55c90644dfbb23fbc2da10cf969461abe6d38bf3 100644
--- a/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py
+++ b/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py
@@ -28,16 +28,16 @@
 
 import pytest
 
-# import numpy as np
 from onnx import TensorProto, helper
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.general.maxpoolnhwc import compute_pool_output_dim
+from qonnx.transformation.general import GiveUniqueNodeNames
+from qonnx.transformation.infer_shapes import InferShapes
+from qonnx.util.basic import gen_finn_dt_tensor
 
 import finn.core.onnx_exec as oxe
 from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
-from finn.custom_op.general.maxpoolnhwc import compute_pool_output_dim
-
-# from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.convert_to_hls_layers import InferStreamingMaxPool
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
@@ -45,9 +45,6 @@ from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-from finn.transformation.general import GiveUniqueNodeNames
-from finn.transformation.infer_shapes import InferShapes
-from finn.util.basic import gen_finn_dt_tensor
 
 
 def make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt, ceil_mode):
@@ -66,7 +63,7 @@ def make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt, ceil_
         "MaxPoolNHWC",
         ["inp"],
         ["outp"],
-        domain="finn.custom_op.general",
+        domain="qonnx.custom_op.general",
         kernel_shape=[k_h, k_w],
         strides=[k_h, k_w],
         ceil_mode=ceil_mode,
diff --git a/tests/fpgadataflow/test_runtime_weights.py b/tests/fpgadataflow/test_runtime_weights.py
index f86c58d33568c68ba95c9cfbf31ad5b0c8c900f5..16fed5c3cb5c54a052b1a1b2ef2723d116243171 100644
--- a/tests/fpgadataflow/test_runtime_weights.py
+++ b/tests/fpgadataflow/test_runtime_weights.py
@@ -30,18 +30,18 @@ import pytest
 
 import numpy as np
 import os
+from pyverilator.util.axi_utils import axilite_read, axilite_write
+from qonnx.core.datatype import DataType
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.general import GiveUniqueNodeNames
+from qonnx.util.basic import gen_finn_dt_tensor
 
-from finn.core.datatype import DataType
 from finn.core.rtlsim_exec import rtlsim_exec
-from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.general import GiveUniqueNodeNames
-from finn.util.basic import gen_finn_dt_tensor
 from finn.util.create import hls_random_mlp_maker
-from finn.util.pyverilator import axilite_read, axilite_write
 
 test_fpga_part = "xczu3eg-sbva484-1-e"
 target_clk_ns = 5
diff --git a/tests/fpgadataflow/test_set_folding.py b/tests/fpgadataflow/test_set_folding.py
index 63612b9cc06d353b1f944e7ad0ba72add1d9e31f..8ea0e18f2cace10b6fefae50ce1e28845ab24050 100644
--- a/tests/fpgadataflow/test_set_folding.py
+++ b/tests/fpgadataflow/test_set_folding.py
@@ -30,16 +30,16 @@ import pytest
 
 import numpy as np
 from onnx import TensorProto, helper
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.registry import getCustomOp
+from qonnx.transformation.general import GiveUniqueNodeNames
 
 from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
-from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.create_dataflow_partition import (
     CreateDataflowPartition,
 )
 from finn.transformation.fpgadataflow.set_folding import SetFolding
-from finn.transformation.general import GiveUniqueNodeNames
 from finn.util.test import load_test_checkpoint_or_skip
 
 
diff --git a/tests/transformation/streamline/test_absorb_mul_into_topk.py b/tests/transformation/streamline/test_absorb_mul_into_topk.py
index e75f2d21db5cb2fe1b2f93e43ee0e61c7a7681c9..a6dff788dc58dba45536a280c7fe5f5c53edc4e1 100644
--- a/tests/transformation/streamline/test_absorb_mul_into_topk.py
+++ b/tests/transformation/streamline/test_absorb_mul_into_topk.py
@@ -29,13 +29,13 @@ import pytest
 
 import numpy as np
 from onnx import TensorProto, helper
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
+from qonnx.transformation.infer_datatypes import InferDataTypes
+from qonnx.transformation.infer_shapes import InferShapes
+from qonnx.transformation.insert_topk import InsertTopK
 
 import finn.core.onnx_exec as oxe
-from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.insert_topk import InsertTopK
 from finn.transformation.streamline.absorb import AbsorbScalarMulAddIntoTopK
 
 
diff --git a/tests/transformation/streamline/test_absorb_opposite_transposes.py b/tests/transformation/streamline/test_absorb_opposite_transposes.py
index ca5ed6ba6a85935604750ab35df0ccf30e032c2c..51ea5edfc420bf935de3e196df1b150934782a91 100644
--- a/tests/transformation/streamline/test_absorb_opposite_transposes.py
+++ b/tests/transformation/streamline/test_absorb_opposite_transposes.py
@@ -31,10 +31,10 @@ import pytest
 import numpy as np
 import onnx.helper as oh
 from onnx import TensorProto
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.infer_shapes import InferShapes
 
 import finn.core.onnx_exec as ox
-from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.streamline.absorb import AbsorbConsecutiveTransposes
 
 
diff --git a/tests/transformation/streamline/test_absorb_transp_into_flatten.py b/tests/transformation/streamline/test_absorb_transp_into_flatten.py
index 533dc693da0774e89d2dbb44aac52a6bef038990..1358d468c04c3edf08b11e7e9b858dda58965368 100644
--- a/tests/transformation/streamline/test_absorb_transp_into_flatten.py
+++ b/tests/transformation/streamline/test_absorb_transp_into_flatten.py
@@ -1,15 +1,15 @@
 import pytest
 
 import numpy as np
+import qonnx.core.data_layout as DataLayout
 from onnx import TensorProto, helper
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
+from qonnx.transformation.infer_data_layouts import InferDataLayouts
+from qonnx.transformation.infer_datatypes import InferDataTypes
+from qonnx.transformation.infer_shapes import InferShapes
 
-import finn.core.data_layout as DataLayout
 import finn.core.onnx_exec as oxe
-from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
-from finn.transformation.infer_data_layouts import InferDataLayouts
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.streamline.absorb import AbsorbTransposeIntoFlatten
 
 
diff --git a/tests/transformation/streamline/test_collapse_repeated_op.py b/tests/transformation/streamline/test_collapse_repeated_op.py
index d48d4ad3c2a30e005c1ccc02eee4f7edcaa8a57b..268e0ffc5c5cb342634ff51ac8fe02157ae8c7c6 100644
--- a/tests/transformation/streamline/test_collapse_repeated_op.py
+++ b/tests/transformation/streamline/test_collapse_repeated_op.py
@@ -31,10 +31,10 @@ import pytest
 import numpy as np
 import onnx.helper as oh
 from onnx import TensorProto
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.infer_shapes import InferShapes
 
 import finn.core.onnx_exec as ox
-from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.streamline import CollapseRepeatedAdd, CollapseRepeatedMul
 
 
diff --git a/tests/transformation/streamline/test_factor_out_mul_sign_magnitude.py b/tests/transformation/streamline/test_factor_out_mul_sign_magnitude.py
index 2e5ed2eebfcf7ac7c39ccd8c0f105dee8fb389a8..04ab9bf0b9c092bdf2c2a6c6268974fd78020eee 100644
--- a/tests/transformation/streamline/test_factor_out_mul_sign_magnitude.py
+++ b/tests/transformation/streamline/test_factor_out_mul_sign_magnitude.py
@@ -31,10 +31,10 @@ import pytest
 import numpy as np
 import onnx.helper as oh
 from onnx import TensorProto
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.infer_shapes import InferShapes
 
 import finn.core.onnx_exec as ox
-from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.streamline import FactorOutMulSignMagnitude
 
 
diff --git a/tests/transformation/streamline/test_linear_past_eltwise.py b/tests/transformation/streamline/test_linear_past_eltwise.py
index 0e4ad6237b3f293c2ee32dcb4963423f6e8d9f19..12633d750bb405757efca0c028dece92b289b472 100644
--- a/tests/transformation/streamline/test_linear_past_eltwise.py
+++ b/tests/transformation/streamline/test_linear_past_eltwise.py
@@ -31,15 +31,16 @@ import pytest
 import numpy as np
 import os
 from onnx import TensorProto, helper
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.fold_constants import FoldConstants
+from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
+from qonnx.transformation.infer_shapes import InferShapes
 
 import finn.core.onnx_exec as oxe
-from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.fold_constants import FoldConstants
-from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
-from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.streamline.reorder import MoveLinearPastEltwiseAdd
 
 export_onnx_path = "test_linear_past_eltwise.onnx"
+np_default_dtype = np.float32
 
 # construct a synthetic graph to test:
 # topk insertion, topk conversion to hls, add conversion to hls
@@ -81,10 +82,10 @@ def make_model(shape):
     model = ModelWrapper(model)
 
     # set initializers for scalar add/mul nodes
-    model.set_initializer(add1_node.input[1], np.array([7.0]))
-    model.set_initializer(add2_node.input[1], np.array([8.0]))
-    model.set_initializer(mul1_node.input[1], np.array([3.0]))
-    model.set_initializer(mul2_node.input[1], np.array([3.0]))
+    model.set_initializer(add1_node.input[1], np.array([7.0], dtype=np_default_dtype))
+    model.set_initializer(add2_node.input[1], np.array([8.0], dtype=np_default_dtype))
+    model.set_initializer(mul1_node.input[1], np.array([3.0], dtype=np_default_dtype))
+    model.set_initializer(mul2_node.input[1], np.array([3.0], dtype=np_default_dtype))
 
     return model
 
diff --git a/tests/transformation/streamline/test_maxpool_nhwc.py b/tests/transformation/streamline/test_maxpool_nhwc.py
index 446302be94d7c5e9c06da1c1fc926de7a3bff578..aa77b5cf1a6e77d67ff8351ca5f544a63eb47f29 100644
--- a/tests/transformation/streamline/test_maxpool_nhwc.py
+++ b/tests/transformation/streamline/test_maxpool_nhwc.py
@@ -3,14 +3,14 @@ import pytest
 import onnx
 import onnx.helper as oh
 from onnx import TensorProto
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.general.maxpoolnhwc import compute_pool_output_dim
+from qonnx.transformation.infer_shapes import InferShapes
+from qonnx.util.basic import gen_finn_dt_tensor
 
 import finn.core.onnx_exec as oxe
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
-from finn.custom_op.general.maxpoolnhwc import compute_pool_output_dim
-from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.streamline.reorder import MakeMaxPoolNHWC
-from finn.util.basic import gen_finn_dt_tensor
 
 
 def create_maxpool(ifm_dim, ifm_ch, kernel_shape, pads, strides, ceil_mode, idt):
diff --git a/tests/transformation/streamline/test_move_add_past_mul.py b/tests/transformation/streamline/test_move_add_past_mul.py
index e0ee449734e523b1e1742c85dd6b9d1bbdd32537..0fb4dd9f7a116d0d52578d7222421f251ac17ec1 100644
--- a/tests/transformation/streamline/test_move_add_past_mul.py
+++ b/tests/transformation/streamline/test_move_add_past_mul.py
@@ -31,10 +31,10 @@ import pytest
 import numpy as np
 import onnx.helper as oh
 from onnx import TensorProto
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.infer_shapes import InferShapes
 
 import finn.core.onnx_exec as ox
-from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.streamline import MoveAddPastMul
 
 
diff --git a/tests/transformation/streamline/test_move_chw_add_past_conv.py b/tests/transformation/streamline/test_move_chw_add_past_conv.py
index d43531fa7d48a67ed91d1e7843bbdfd726fcf14d..7eb7f9f1af67efa1a6934157b9c2b3f8a6a814c2 100644
--- a/tests/transformation/streamline/test_move_chw_add_past_conv.py
+++ b/tests/transformation/streamline/test_move_chw_add_past_conv.py
@@ -30,11 +30,11 @@ import pytest
 
 import numpy as np
 from onnx import TensorProto, helper
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.general.im2col import compute_conv_output_dim
+from qonnx.transformation.infer_shapes import InferShapes
 
 import finn.core.onnx_exec as oxe
-from finn.core.modelwrapper import ModelWrapper
-from finn.custom_op.general.im2col import compute_conv_output_dim
-from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.streamline.reorder import MoveAddPastConv
 
 
diff --git a/tests/transformation/streamline/test_move_flatten_past_affine.py b/tests/transformation/streamline/test_move_flatten_past_affine.py
index 1a4cecf1c46fddcb4427975cbf7e31a25628bf9a..8c3f71d1f35de1b03fb33e53e41599fae7e02304 100644
--- a/tests/transformation/streamline/test_move_flatten_past_affine.py
+++ b/tests/transformation/streamline/test_move_flatten_past_affine.py
@@ -28,18 +28,18 @@
 import pytest
 
 import numpy as np
+import qonnx.core.data_layout as DataLayout
 from onnx import TensorProto, helper
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
+from qonnx.transformation.infer_data_layouts import InferDataLayouts
+from qonnx.transformation.infer_datatypes import InferDataTypes
+from qonnx.transformation.infer_shapes import InferShapes
+from qonnx.util.basic import gen_finn_dt_tensor
 
-import finn.core.data_layout as DataLayout
 import finn.core.onnx_exec as oxe
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
-from finn.transformation.infer_data_layouts import InferDataLayouts
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.streamline.reorder import MoveFlattenPastAffine
-from finn.util.basic import gen_finn_dt_tensor
 
 
 @pytest.mark.streamline
diff --git a/tests/transformation/streamline/test_move_flatten_past_topk.py b/tests/transformation/streamline/test_move_flatten_past_topk.py
index e3d8c65434871ecfa87784e69c76d99330c3f554..83d7a28c05fbd95834e5d84ab7537ae82c285d17 100644
--- a/tests/transformation/streamline/test_move_flatten_past_topk.py
+++ b/tests/transformation/streamline/test_move_flatten_past_topk.py
@@ -27,19 +27,19 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 import pytest
 
+import qonnx.core.data_layout as DataLayout
 from onnx import TensorProto, helper
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
+from qonnx.transformation.infer_data_layouts import InferDataLayouts
+from qonnx.transformation.infer_datatypes import InferDataTypes
+from qonnx.transformation.infer_shapes import InferShapes
+from qonnx.transformation.insert_topk import InsertTopK
+from qonnx.util.basic import gen_finn_dt_tensor
 
-import finn.core.data_layout as DataLayout
 import finn.core.onnx_exec as oxe
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
-from finn.transformation.infer_data_layouts import InferDataLayouts
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.insert_topk import InsertTopK
 from finn.transformation.streamline.reorder import MoveFlattenPastTopK
-from finn.util.basic import gen_finn_dt_tensor
 
 
 @pytest.mark.streamline
diff --git a/tests/transformation/streamline/test_move_identical_op_past_join_op.py b/tests/transformation/streamline/test_move_identical_op_past_join_op.py
index 1d840ec15403e7a70c8da67a6f57076d8521d587..4986363ff4dba0b0126babdbd1f393faa2df5de3 100644
--- a/tests/transformation/streamline/test_move_identical_op_past_join_op.py
+++ b/tests/transformation/streamline/test_move_identical_op_past_join_op.py
@@ -1,12 +1,39 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 import pytest
 
 from onnx import TensorProto
 from onnx import helper as oh
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.util.basic import gen_finn_dt_tensor
 
 import finn.core.onnx_exec as oxe
-from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.streamline.reorder import MoveTransposePastJoinAdd
-from finn.util.basic import gen_finn_dt_tensor
 
 
 def create_model(perm):
diff --git a/tests/transformation/streamline/test_move_maxpool_past_multithreshold.py b/tests/transformation/streamline/test_move_maxpool_past_multithreshold.py
index 127f0fde7bc8423d7135a94f0d6f2ff1317bff76..bf25eee9e685d2536faf5bd25bc7b1aa36700463 100644
--- a/tests/transformation/streamline/test_move_maxpool_past_multithreshold.py
+++ b/tests/transformation/streamline/test_move_maxpool_past_multithreshold.py
@@ -1,12 +1,39 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 import pytest
 
 import numpy as np
 from onnx import TensorProto, helper
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.infer_datatypes import InferDataTypes
+from qonnx.transformation.infer_shapes import InferShapes
 
 import finn.core.onnx_exec as oxe
-from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.streamline.reorder import MoveMaxPoolPastMultiThreshold
 
 
@@ -19,6 +46,7 @@ def get_multithreshold_rand_params(channels, num_of_thres, seed=None):
     thres = ((thres - bias) * steps).astype(np.float32)
     return thres
 
+
 @pytest.mark.streamline
 def test_move_maxpool_past_multithreshold():
     # generate test vectors of correct shape
@@ -54,7 +82,7 @@ def test_move_maxpool_past_multithreshold():
             "MultiThreshold",
             ["t1", "thres1"],
             ["t2"],
-            domain="finn.custom_op.general",
+            domain="qonnx.custom_op.general",
             out_dtype="BIPOLAR",
             out_bias=-1.0,
             out_scale=1.0,
@@ -66,7 +94,7 @@ def test_move_maxpool_past_multithreshold():
             "MultiThreshold",
             ["t3", "thres2"],
             ["top_out"],
-            domain="finn.custom_op.general",
+            domain="qonnx.custom_op.general",
             out_dtype="UINT4",
         )
     ]
@@ -84,7 +112,7 @@ def test_move_maxpool_past_multithreshold():
     model = model.transform(InferShapes())
     model = model.transform(InferDataTypes())
 
-    model.set_initializer("thres1", np.array([[0]]))
+    model.set_initializer("thres1", np.array([[0]], dtype=np.float32))
     model.set_initializer(
         "thres2", get_multithreshold_rand_params(*thres2_shape, seed=0)
     )
diff --git a/tests/transformation/streamline/test_move_mul_past_dw_conv.py b/tests/transformation/streamline/test_move_mul_past_dw_conv.py
index ee7f840bb4461b9b32f25048c0678da9a68526b5..401631a728412e7676fa804626601cfc58b5a5e3 100644
--- a/tests/transformation/streamline/test_move_mul_past_dw_conv.py
+++ b/tests/transformation/streamline/test_move_mul_past_dw_conv.py
@@ -1,15 +1,42 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 import pytest
 
 from onnx import TensorProto, helper
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.general.im2col import compute_conv_output_dim
+from qonnx.transformation.infer_datatypes import InferDataTypes
+from qonnx.transformation.infer_shapes import InferShapes
+from qonnx.util.basic import gen_finn_dt_tensor
 
 import finn.core.onnx_exec as oxe
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
-from finn.custom_op.general.im2col import compute_conv_output_dim
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.streamline.reorder import MoveMulPastDWConv
-from finn.util.basic import gen_finn_dt_tensor
 
 
 @pytest.mark.streamline
diff --git a/tests/transformation/streamline/test_move_mul_past_maxpool.py b/tests/transformation/streamline/test_move_mul_past_maxpool.py
index 5f92c514c05b8ea9d75e6c3813dfee998fd8b08b..fcc1b6513230c548bdcc04a40aad793b64c6faf2 100755
--- a/tests/transformation/streamline/test_move_mul_past_maxpool.py
+++ b/tests/transformation/streamline/test_move_mul_past_maxpool.py
@@ -1,16 +1,43 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 import pytest
 
 import numpy as np
 from onnx import TensorProto, helper
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.general.maxpoolnhwc import compute_pool_output_dim
+from qonnx.transformation.infer_datatypes import InferDataTypes
+from qonnx.transformation.infer_shapes import InferShapes
+from qonnx.util.basic import gen_finn_dt_tensor
 
 import finn.core.onnx_exec as oxe
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
-from finn.custom_op.general.maxpoolnhwc import compute_pool_output_dim
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.streamline.reorder import MoveMulPastMaxPool
-from finn.util.basic import gen_finn_dt_tensor
 
 
 @pytest.mark.streamline
diff --git a/tests/transformation/streamline/test_move_past_fork.py b/tests/transformation/streamline/test_move_past_fork.py
index f578234d6200936502e2e00c841b49707a99656b..5064fa3fca869a245c87cf0c1680d1357e5de60b 100644
--- a/tests/transformation/streamline/test_move_past_fork.py
+++ b/tests/transformation/streamline/test_move_past_fork.py
@@ -1,11 +1,38 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 import pytest
 
 import numpy as np
 from onnx import TensorProto, helper
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.infer_shapes import InferShapes
 
 import finn.core.onnx_exec as oxe
-from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.streamline.reorder import MoveLinearPastFork
 
 
diff --git a/tests/transformation/streamline/test_move_scalar_past_conv.py b/tests/transformation/streamline/test_move_scalar_past_conv.py
index 8f725db91a4dadc938fb9296606e7214f02dcb6e..59b8b8f8b2fee99bbb77c6d354620406a108cb54 100644
--- a/tests/transformation/streamline/test_move_scalar_past_conv.py
+++ b/tests/transformation/streamline/test_move_scalar_past_conv.py
@@ -1,12 +1,39 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 import pytest
 
 import numpy as np
 import onnx.helper as oh
 from onnx import TensorProto
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.infer_shapes import InferShapes
 
 import finn.core.onnx_exec as ox
-from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.streamline import MoveAddPastConv, MoveScalarMulPastConv
 
 
diff --git a/tests/transformation/streamline/test_move_scalar_past_matmul.py b/tests/transformation/streamline/test_move_scalar_past_matmul.py
index 4d6dd95173485c234fd6d231e524d30b50ab56de..6fdaaadfaea5862b566fd3a8d060ac28acadf1cd 100644
--- a/tests/transformation/streamline/test_move_scalar_past_matmul.py
+++ b/tests/transformation/streamline/test_move_scalar_past_matmul.py
@@ -31,10 +31,10 @@ import pytest
 import numpy as np
 import onnx.helper as oh
 from onnx import TensorProto
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.infer_shapes import InferShapes
 
 import finn.core.onnx_exec as ox
-from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.streamline import (
     MoveScalarAddPastMatMul,
     MoveScalarMulPastMatMul,
diff --git a/tests/transformation/streamline/test_move_transpose_past_scalar_mul.py b/tests/transformation/streamline/test_move_transpose_past_scalar_mul.py
index ad174a4909202f2d62fa2a3c31a7da8ead900e0b..9662ba8a908e9bb793e0c0c2b078cf26adb5cef3 100644
--- a/tests/transformation/streamline/test_move_transpose_past_scalar_mul.py
+++ b/tests/transformation/streamline/test_move_transpose_past_scalar_mul.py
@@ -1,15 +1,43 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 import pytest
 
 import numpy as np
+import qonnx.core.data_layout as DataLayout
 from onnx import TensorProto, helper
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
+from qonnx.transformation.infer_data_layouts import InferDataLayouts
+from qonnx.transformation.infer_datatypes import InferDataTypes
+from qonnx.transformation.infer_shapes import InferShapes
 
-import finn.core.data_layout as DataLayout
 import finn.core.onnx_exec as oxe
-from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
-from finn.transformation.infer_data_layouts import InferDataLayouts
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.streamline.reorder import MoveTransposePastScalarMul
 
 
diff --git a/tests/transformation/streamline/test_round_thresholds.py b/tests/transformation/streamline/test_round_thresholds.py
index 3a533b0694fa81bae846d2d2f6e8dbcb41a8ee6c..1ec5f02e878a540a89cc37179b2e6dd76ede882c 100644
--- a/tests/transformation/streamline/test_round_thresholds.py
+++ b/tests/transformation/streamline/test_round_thresholds.py
@@ -30,10 +30,10 @@ import pytest
 
 import numpy as np
 from onnx import TensorProto, helper
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
 
 import finn.core.onnx_exec as oxe
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.streamline import RoundAndClipThresholds
 
 
@@ -43,7 +43,7 @@ def test_round_thresholds():
     thresholds = helper.make_tensor_value_info("thresholds", TensorProto.FLOAT, [4, 1])
     out = helper.make_tensor_value_info("out", TensorProto.FLOAT, [1, 4])
     node_def = helper.make_node(
-        "MultiThreshold", ["v", "thresholds"], ["out"], domain="finn.custom_op.general"
+        "MultiThreshold", ["v", "thresholds"], ["out"], domain="qonnx.custom_op.general"
     )
     graph_def = helper.make_graph([node_def], "test_model", [v, thresholds], [out])
     model_def = helper.make_model(graph_def)
diff --git a/tests/transformation/streamline/test_sign_to_thres.py b/tests/transformation/streamline/test_sign_to_thres.py
index aa9254e8d605bbcd1d8a61da4d79cc6d582a1764..839680bd7ad2d40cb622b313257e819737027a2f 100644
--- a/tests/transformation/streamline/test_sign_to_thres.py
+++ b/tests/transformation/streamline/test_sign_to_thres.py
@@ -33,11 +33,11 @@ import onnx
 import onnx.numpy_helper as nph
 import os
 from pkgutil import get_data
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.fold_constants import FoldConstants
+from qonnx.transformation.infer_shapes import InferShapes
 
 import finn.core.onnx_exec as oxe
-from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.fold_constants import FoldConstants
-from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.streamline import ConvertSignToThres
 from finn.util.test import get_test_model_trained
 
@@ -54,7 +54,7 @@ def test_sign_to_thres():
     new_model = model.transform(ConvertSignToThres())
     assert new_model.graph.node[3].op_type == "MultiThreshold"
     # load one of the test vectors
-    raw_i = get_data("finn.data", "onnx/mnist-conv/test_data_set_0/input_0.pb")
+    raw_i = get_data("qonnx.data", "onnx/mnist-conv/test_data_set_0/input_0.pb")
     input_tensor = onnx.load_tensor_from_string(raw_i)
     input_dict = {"0": nph.to_array(input_tensor)}
     assert oxe.compare_execution(model, new_model, input_dict)
diff --git a/tests/transformation/streamline/test_streamline_cnv.py b/tests/transformation/streamline/test_streamline_cnv.py
index f2c4921c9ae55fa2206abbbb2661fe20e6068b93..6a829250127ee289733ec8ce1b08b63de7a573c5 100644
--- a/tests/transformation/streamline/test_streamline_cnv.py
+++ b/tests/transformation/streamline/test_streamline_cnv.py
@@ -32,17 +32,17 @@ import pytest
 
 import brevitas.onnx as bo
 import numpy as np
-
-import finn.core.onnx_exec as oxe
-from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.fold_constants import FoldConstants
-from finn.transformation.general import (
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.fold_constants import FoldConstants
+from qonnx.transformation.general import (
     GiveReadableTensorNames,
     GiveUniqueNodeNames,
     RemoveStaticGraphInputs,
     RemoveUnusedTensors,
 )
-from finn.transformation.infer_shapes import InferShapes
+from qonnx.transformation.infer_shapes import InferShapes
+
+import finn.core.onnx_exec as oxe
 from finn.transformation.streamline import Streamline
 from finn.util.basic import make_build_dir
 from finn.util.test import get_test_model_trained
diff --git a/tests/transformation/streamline/test_streamline_fc.py b/tests/transformation/streamline/test_streamline_fc.py
index 875a1c46029b83f59211556dc79c9bac26ff927f..90008214352d1a75fba61130f5aedbc358e1fe74 100644
--- a/tests/transformation/streamline/test_streamline_fc.py
+++ b/tests/transformation/streamline/test_streamline_fc.py
@@ -33,17 +33,17 @@ import numpy as np
 import onnx
 import onnx.numpy_helper as nph
 from pkgutil import get_data
-
-import finn.core.onnx_exec as oxe
-from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.fold_constants import FoldConstants
-from finn.transformation.general import (
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.fold_constants import FoldConstants
+from qonnx.transformation.general import (
     GiveReadableTensorNames,
     GiveUniqueNodeNames,
     RemoveStaticGraphInputs,
     RemoveUnusedTensors,
 )
-from finn.transformation.infer_shapes import InferShapes
+from qonnx.transformation.infer_shapes import InferShapes
+
+import finn.core.onnx_exec as oxe
 from finn.transformation.streamline import Streamline
 from finn.util.basic import make_build_dir
 from finn.util.test import get_test_model_trained
@@ -74,7 +74,7 @@ def test_streamline_fc(size, wbits, abits):
     model = model.transform(GiveReadableTensorNames())
     model = model.transform(RemoveStaticGraphInputs())
     # load one of the test vectors
-    raw_i = get_data("finn.data", "onnx/mnist-conv/test_data_set_0/input_0.pb")
+    raw_i = get_data("qonnx.data", "onnx/mnist-conv/test_data_set_0/input_0.pb")
     input_tensor = onnx.load_tensor_from_string(raw_i)
     # run using FINN-based execution
     input_dict = {"global_in": nph.to_array(input_tensor)}
diff --git a/tests/transformation/test_batchnorm_to_affine_bnn_pynq.py b/tests/transformation/test_batchnorm_to_affine_bnn_pynq.py
index bdb988e2aa508ed7464aee33d30b671fa38ebacb..fd4e37807c860058a8503439a04a58879edc7954 100644
--- a/tests/transformation/test_batchnorm_to_affine_bnn_pynq.py
+++ b/tests/transformation/test_batchnorm_to_affine_bnn_pynq.py
@@ -26,26 +26,27 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import pytest
-
 import pkg_resources as pk
 
+import pytest
+
 import brevitas.onnx as bo
 import numpy as np
 import onnx
 import onnx.numpy_helper as nph
 import os
 from pkgutil import get_data
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.batchnorm_to_affine import BatchNormToAffine
+from qonnx.transformation.fold_constants import FoldConstants
+from qonnx.transformation.infer_shapes import InferShapes
 
 import finn.core.onnx_exec as oxe
-from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.batchnorm_to_affine import BatchNormToAffine
-from finn.transformation.fold_constants import FoldConstants
-from finn.transformation.infer_shapes import InferShapes
 from finn.util.test import get_test_model_trained
 
 export_onnx_path = "test_output_bn2affine.onnx"
 
+
 @pytest.mark.transform
 def test_batchnorm_to_affine_cnv_w1a1():
     lfc = get_test_model_trained("CNV", 1, 1)
@@ -80,7 +81,7 @@ def test_batchnorm_to_affine_lfc_w1a1():
     model = model.transform(FoldConstants())
     new_model = model.transform(BatchNormToAffine())
     # load one of the test vectors
-    raw_i = get_data("finn.data", "onnx/mnist-conv/test_data_set_0/input_0.pb")
+    raw_i = get_data("qonnx.data", "onnx/mnist-conv/test_data_set_0/input_0.pb")
     input_tensor = onnx.load_tensor_from_string(raw_i)
     input_dict = {"0": nph.to_array(input_tensor)}
     assert oxe.compare_execution(model, new_model, input_dict)
diff --git a/tests/transformation/test_infer_data_layouts_cnv.py b/tests/transformation/test_infer_data_layouts_cnv.py
index 99f6efd4d2bb358508b592e26e691300ef5a784e..952ce306a447ba0b4d46256ec6e80e5da79be4bc 100644
--- a/tests/transformation/test_infer_data_layouts_cnv.py
+++ b/tests/transformation/test_infer_data_layouts_cnv.py
@@ -30,17 +30,17 @@ import pytest
 
 import brevitas.onnx as bo
 import os
+import qonnx.core.data_layout as DataLayout
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
+from qonnx.transformation.fold_constants import FoldConstants
+from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
+from qonnx.transformation.infer_data_layouts import InferDataLayouts
+from qonnx.transformation.infer_shapes import InferShapes
+from qonnx.transformation.lower_convs_to_matmul import LowerConvsToMatMul
 
-import finn.core.data_layout as DataLayout
 import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
 import finn.transformation.streamline.absorb as absorb
-from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
-from finn.transformation.fold_constants import FoldConstants
-from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
-from finn.transformation.infer_data_layouts import InferDataLayouts
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
 from finn.transformation.streamline import Streamline
 from finn.transformation.streamline.reorder import MakeMaxPoolNHWC
 from finn.util.test import get_test_model_trained
diff --git a/tests/transformation/test_infer_datatypes_lfc.py b/tests/transformation/test_infer_datatypes_lfc.py
index 3758485860cf0176143fe6f55b71508327ffe762..979800534951abbc77d203aa6b5bd9c797aa9028 100644
--- a/tests/transformation/test_infer_datatypes_lfc.py
+++ b/tests/transformation/test_infer_datatypes_lfc.py
@@ -30,13 +30,13 @@ import pytest
 
 import brevitas.onnx as bo
 import os
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.fold_constants import FoldConstants
+from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
+from qonnx.transformation.infer_datatypes import InferDataTypes
+from qonnx.transformation.infer_shapes import InferShapes
 
-from finn.core.datatype import DataType
-from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.fold_constants import FoldConstants
-from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.infer_shapes import InferShapes
 from finn.util.test import get_test_model_trained
 
 export_onnx_path = "test_infer_datatypes.onnx"
diff --git a/tests/transformation/test_qonnx_to_finn.py b/tests/transformation/test_qonnx_to_finn.py
index d9443e381677273d15bcb06832b009990a6ad11a..43055f6704732866569ac4770202f1b4ff6bfb22 100644
--- a/tests/transformation/test_qonnx_to_finn.py
+++ b/tests/transformation/test_qonnx_to_finn.py
@@ -38,14 +38,14 @@ import onnx
 import onnx.numpy_helper as nph
 import torch
 from pkgutil import get_data
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.fold_constants import FoldConstants
+from qonnx.transformation.general import GiveUniqueNodeNames, RemoveStaticGraphInputs
+from qonnx.transformation.infer_shapes import InferShapes
 from qonnx.util.cleanup import cleanup
 from tempfile import TemporaryDirectory
 
 import finn.core.onnx_exec as oxe
-from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.fold_constants import FoldConstants
-from finn.transformation.general import GiveUniqueNodeNames, RemoveStaticGraphInputs
-from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
 from finn.util.test import get_test_model_trained
 
@@ -53,7 +53,7 @@ from finn.util.test import get_test_model_trained
 def get_brev_model_and_sample_inputs(model_name, wbits, abits):
     if "FC" in model_name:
         in_shape = (1, 1, 28, 28)
-        raw_i = get_data("finn.data", "onnx/mnist-conv/test_data_set_0/input_0.pb")
+        raw_i = get_data("qonnx.data", "onnx/mnist-conv/test_data_set_0/input_0.pb")
         input_tensor = onnx.load_tensor_from_string(raw_i)
         input_tensor = nph.to_array(input_tensor)
         brev_model = get_test_model_trained(model_name, wbits, abits)
@@ -134,7 +134,7 @@ def test_QONNX_to_FINN(model_name, wbits, abits):
         ).all(), "The output of the Brevitas model and the FINN model should match."
 
     # Get the equivalent QONNX model
-    b_onnx.function.DOMAIN_STRING = "finn.custom_op.general"
+    b_onnx.function.DOMAIN_STRING = "qonnx.custom_op.general"
     _ = b_onnx.manager.BrevitasONNXManager.export(
         brev_model, in_shape, qonnx_base_path.format("raw")
     )
diff --git a/tests/util/test_create.py b/tests/util/test_create.py
index 655c01f06eecca84d414ce3b995cfe4d1ba58170..dc44e4bd459a7bdff72b63a8500edc3cb7331c1d 100644
--- a/tests/util/test_create.py
+++ b/tests/util/test_create.py
@@ -28,8 +28,9 @@
 
 import pytest
 
+from qonnx.core.datatype import DataType
+
 import finn.util.create as create
-from finn.core.datatype import DataType
 
 
 @pytest.mark.util
diff --git a/tests/util/test_data_packing_hls.py b/tests/util/test_data_packing_hls.py
index a29d2ae3f87eaca3e57abe638849f066c7ed671e..859b926543c153b69c17ba307a1e8f2fdfd6bc82 100644
--- a/tests/util/test_data_packing_hls.py
+++ b/tests/util/test_data_packing_hls.py
@@ -32,9 +32,10 @@ import numpy as np
 import os
 import shutil
 import subprocess
+from qonnx.core.datatype import DataType
+from qonnx.util.basic import gen_finn_dt_tensor
 
-import finn.util.basic as cutil
-from finn.core.datatype import DataType
+from finn.util.basic import make_build_dir
 from finn.util.data_packing import numpy_to_hls_code
 
 
@@ -52,8 +53,8 @@ from finn.util.data_packing import numpy_to_hls_code
 @pytest.mark.parametrize("test_shape", [(1, 2, 4), (1, 1, 64), (2, 64)])
 @pytest.mark.vivado
 def test_npy2apintstream(test_shape, dtype):
-    ndarray = cutil.gen_finn_dt_tensor(dtype, test_shape)
-    test_dir = cutil.make_build_dir(prefix="test_npy2apintstream_")
+    ndarray = gen_finn_dt_tensor(dtype, test_shape)
+    test_dir = make_build_dir(prefix="test_npy2apintstream_")
     shape = ndarray.shape
     elem_bits = dtype.bitwidth()
     packed_bits = shape[-1] * elem_bits
diff --git a/tutorials/fpga_flow/build.py b/tutorials/fpga_flow/build.py
index 1f8e27ef773e033933543cdc46de475c907a04eb..8b50a3114425925b4c13cdc0722f03d23dde45ed 100644
--- a/tutorials/fpga_flow/build.py
+++ b/tutorials/fpga_flow/build.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 Advanced Micro Devices, Inc.
+# Copyright (c) 2022 Xilinx, Inc.
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -34,11 +34,11 @@
 
 import numpy as np
 import os
+from qonnx.custom_op.registry import getCustomOp
 
 import finn.builder.build_dataflow as build
 import finn.builder.build_dataflow_config as build_cfg
 import finn.util.data_packing as dpk
-from finn.custom_op.registry import getCustomOp
 
 model_name = "tfc_w1a1"
 platform_name = "fpga"
diff --git a/tutorials/fpga_flow/templates/finn_testbench.template.sv b/tutorials/fpga_flow/templates/finn_testbench.template.sv
index e9476249f3e12f7c52925e5d781a40fdafdf0739..0d8c08efd729d9f08b67da4ce4192cd91b09ade1 100644
--- a/tutorials/fpga_flow/templates/finn_testbench.template.sv
+++ b/tutorials/fpga_flow/templates/finn_testbench.template.sv
@@ -1,4 +1,4 @@
-// Copyright (c) 2022 Advanced Micro Devices, Inc.
+// Copyright (c) 2022 Xilinx, Inc.
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -11,7 +11,7 @@
 //   this list of conditions and the following disclaimer in the documentation
 //   and/or other materials provided with the distribution.
 //
-// * Neither the name of AMD nor the names of its
+// * Neither the name of Xilinx nor the names of its
 //   contributors may be used to endorse or promote products derived from
 //   this software without specific prior written permission.
 //
diff --git a/tutorials/fpga_flow/templates/make_sim_proj.template.tcl b/tutorials/fpga_flow/templates/make_sim_proj.template.tcl
index e39031023e9a065551180b70787cdca720049898..9dae5a02a9921616c62606d209ba2da9ad1c49ed 100644
--- a/tutorials/fpga_flow/templates/make_sim_proj.template.tcl
+++ b/tutorials/fpga_flow/templates/make_sim_proj.template.tcl
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 Advanced Micro Devices, Inc.
+# Copyright (c) 2022 Xilinx, Inc.
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without