diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
new file mode 100644
index 0000000000000000000000000000000000000000..88ae4659c8f2ebb10c3bdb1146d2355665484259
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,46 @@
+---
+name: Bug report
+about: Something isn't working as expected
+title: ''
+labels: bug
+assignees: ''
+
+---
+
+
+## Prerequisites
+Please make sure to check off these prerequisites before submitting a bug report.
+- [ ] Test that the bug appears on the current version of the dev-branch. Make sure to include the commit hash of the commit you checked out.
+- [ ] Check that the issue hasn't already been reported, by checking the currently open issues.
+- [ ] If there are steps to reproduce the problem, make sure to write them down below.
+- [ ] If relevant, please include the ONNX files, which were created directly before and/or after the bug.
+
+## Quick summary
+Please give a brief and concise description of the bug.
+
+## Details
+Please add to the following sections to describe the bug as accurately as possible.
+
+### Steps to Reproduce
+Add what needs to be done to reproduce the bug. Add code examples where useful
+and make sure to include the resulting ONNX files, and the commit hash you are working on.
+
+1. Clone the FINN repository
+2. Checkout the dev branch, with commit hash: [...]
+3. Start the docker container with the command: [...]
+4. Run transformation [...] on ONNX file [...] or run the dataflow builder with the following settings: [...]
+5. [Further steps ...]
+
+### Expected behavior
+Please add a brief description of what you expected to happen.
+
+### Actual behavior
+Describe what actually happens instead.
+
+## Optional
+
+### Possible fix
+If you already know where the issue stems from, or you have a hint please let us know.
+
+### Additional context
+Add any other context about the problem here.
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
new file mode 100644
index 0000000000000000000000000000000000000000..57e3d5495215bf04e7863729a3b8de1d331a1b2f
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1,8 @@
+blank_issues_enabled: false
+contact_links:
+  - name: Getting started with FINN
+    url: https://finn.readthedocs.io/en/latest/getting_started.html
+    about: Documentation about how to get up and running with FINN.
+  - name: Ask for help and get in touch with the community
+    url: https://gitter.im/xilinx-finn/community
+    about: Check out our gitter channel, if you have a question about FINN or a general problem that is likely not a bug.
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
new file mode 100644
index 0000000000000000000000000000000000000000..dfd71f43087473c7972b972c5b547b10b51fc496
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,27 @@
+---
+name: Feature request
+about: Suggest an idea for FINN
+title: ''
+labels: enhancement
+assignees: ''
+
+---
+
+## Prerequisites
+Please make sure to check that the idea is not already being worked on
+by looking at the currently open issues and the [project Kanbans](https://github.com/Xilinx/finn/projects).
+
+Even if an idea is already being worked on you can still create a feature request,
+if you would like to open a discussion about the feature or want to contribute to it.
+
+## Details
+Please add to the following sections to describe the feature as accurately as possible.
+
+### New behavior
+Please add a brief and concise description of what you would like to happen in FINN in the future.
+
+### Motivation
+Please tell us why this feature is important to the FINN community.
+
+### Parts of FINN affected
+Please describe which parts of FINN would be affected by this feature.
diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml
new file mode 100644
index 0000000000000000000000000000000000000000..4374111f22a12e586c5c5233a7eee096b848b86e
--- /dev/null
+++ b/.github/workflows/docker-image.yml
@@ -0,0 +1,33 @@
+name: DockerImage
+
+on:
+  push:
+    branches:
+      - 'dev'
+
+jobs:
+  docker:
+    runs-on: ubuntu-18.04
+    steps:
+      -
+        name: checkout
+        uses: actions/checkout@v2
+      -
+        name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v1
+      -
+        name: Login to DockerHub
+        uses: docker/login-action@v1
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+      - name: Build and push
+        uses: docker/build-push-action@v2
+        with:
+          file: docker/Dockerfile.finn
+          context: .
+          push: true
+          tags: maltanar/finn:dev_latest
+      -
+        name: Image digest
+        run: echo ${{ steps.docker_build.outputs.digest }}
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
new file mode 100644
index 0000000000000000000000000000000000000000..2fbb9265beb49644f08a2c6e916ab9c23d4bd339
--- /dev/null
+++ b/.github/workflows/pre-commit.yml
@@ -0,0 +1,22 @@
+name: Pre-commit
+
+on:
+  pull_request:
+    branches: [ main, dev ]
+  push:
+    branches: [ main, dev ]
+
+jobs:
+  lint:
+    name: Lint PR or Push to DEV
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+
+      - name: Setup Python
+        uses: actions/setup-python@v2
+
+      - name: Run Lint
+        uses: pre-commit/action@v2.0.0
diff --git a/.github/workflows/quicktest-dev-pr.yml b/.github/workflows/quicktest-dev-pr.yml
index 960abd675bbb185ce2fadfab954ec2b4fd6ff94e..80ac0b61e6d1a6f469f7d5bb0f1d50ce9c56565b 100644
--- a/.github/workflows/quicktest-dev-pr.yml
+++ b/.github/workflows/quicktest-dev-pr.yml
@@ -11,13 +11,43 @@ jobs:
 
   test:
     name: Run quicktest on PR branch
-    runs-on: ubuntu-16.04
+    runs-on: ubuntu-18.04
 
     steps:
       - name: checkout
         uses: actions/checkout@v2
 
+      - name: set up Docker Buildx
+        uses: docker/setup-buildx-action@v1
+
+      - name: cache Docker layers
+        uses: actions/cache@v2
+        with:
+          path: /tmp/.buildx-cache
+          key: ${{ runner.os }}-buildx-${{ github.sha }}
+          restore-keys: |
+            ${{ runner.os }}-buildx-
+
+      - name: Build and push
+        uses: docker/build-push-action@v2
+        with:
+          file: docker/Dockerfile.finn
+          context: .
+          push: false
+          load: true
+          tags: finn_gha
+          cache-from: type=local,src=/tmp/.buildx-cache
+          cache-to: type=local,dest=/tmp/.buildx-cache-new
+      -
+        # Temp fix
+        # https://github.com/docker/build-push-action/issues/252
+        # https://github.com/moby/buildkit/issues/1896
+        name: Move cache
+        run: |
+          rm -rf /tmp/.buildx-cache
+          mv /tmp/.buildx-cache-new /tmp/.buildx-cache
+
+
       - name: DockerRunQuicktest
         run: |
-          docker build -t finn_gha -f docker/Dockerfile.finn_ci --build-arg BUILD_PATH=/tmp/finn_gha .
           docker run --init --hostname finn_gha -v $(pwd):/workspace/finn -e FINN_BUILD_DIR=/tmp/finn_gha -e FINN_INST_NAME=finn_gha finn_gha quicktest.sh
diff --git a/.isort.cfg b/.isort.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..6cfe1c8919fd8488c5e6f00a816e500b1f88a784
--- /dev/null
+++ b/.isort.cfg
@@ -0,0 +1,11 @@
+[settings]
+line_length=88
+indent='    '
+skip=.tox,.venv,build,dist
+known_standard_library=setuptools,pkg_resources
+known_test=pytest
+known_first_party=finn
+sections=FUTURE,STDLIB,TEST,THIRDPARTY,FIRSTPARTY,LOCALFOLDER
+default_section=THIRDPARTY
+multi_line_output=3
+profile=black
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index c513c5493d674b067b82fdae9e675d7f9b6eb024..143514b36ba31cb2b292f3a1961187709798efbf 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -27,28 +27,44 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 exclude: '^docs/conf.py'
+
 default_language_version:
     python: python3
+
 repos:
-# black
-- repo: https://github.com/ambv/black
-  rev: stable
-  hooks:
-  - id: black
-    language_version: python3
 - repo: git://github.com/pre-commit/pre-commit-hooks
-  rev: v2.2.3
+  rev: v3.2.0
   hooks:
   - id: trailing-whitespace
+    exclude: '\.dat$'
+  - id: check-added-large-files
   - id: check-ast
   - id: check-json
   - id: check-merge-conflict
   - id: check-xml
   - id: check-yaml
   - id: debug-statements
+    exclude: '^src/finn/builder/build_dataflow.py$'
   - id: end-of-file-fixer
   - id: requirements-txt-fixer
   - id: mixed-line-ending
     args: ['--fix=no']
+
+- repo: git://github.com/PyCQA/isort
+  rev: 5.5.3
+  hooks:
+  - id: isort
+
+- repo: git://github.com/psf/black
+  rev: stable
+  hooks:
+  - id: black
+    language_version: python3
+
+- repo: https://gitlab.com/pycqa/flake8
+  rev: 3.8.3
+  hooks:
   - id: flake8
-    args: ['--max-line-length=88']  # default of Black
+    # black-compatible flake-8 config
+    args: ['--max-line-length=88',  # black default
+           '--extend-ignore=E203']  # E203 is not PEP8 compliant
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3601fcdccff675e6f850d4636ebbfc0726f7cd4d
--- /dev/null
+++ b/.readthedocs.yaml
@@ -0,0 +1,43 @@
+# Copyright (c) 2021, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# .readthedocs.yaml
+# Read the Docs configuration file
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+version: 2
+
+sphinx:
+   configuration: docs/finn/conf.py
+
+python:
+   version: 3.7
+   install:
+    - method: pip
+      path: .
+      extra_requirements:
+        - docs
diff --git a/AUTHORS.rst b/AUTHORS.rst
index eb1e06e54b7eb6deedd3e7f8392bb3aa257e7dc6..533ed62e1dbda2799f74805f2100769f9c4fecfc 100644
--- a/AUTHORS.rst
+++ b/AUTHORS.rst
@@ -2,9 +2,14 @@
 Contributors
 ============
 
-* Yaman Umuroglu (@maltanar)
+* Yaman Umuroglu (@maltanar) (maintainer)
 * Jakoba Petri-Koenig (@auphelia)
 * Andrea Rigoni (@AndreaRigoni)
 * Hendrik Borras (@HenniOVP)
 * Lucian Petrica (@quetric)
 * Tobias Alonso (@Tobi-Alonso)
+* Felix Paul Jentzsch (@felixpj)
+* Mirza Mrahorovic (@mmrahorovic)
+* Suranga Mahesh (@surangamh)
+* Peter Lehnhardt (@pete-lennart)
+* Neil Kim Nielsen (@neilkimn)
diff --git a/README.md b/README.md
index 473885184fdc252ca16d859f14d7c42ed82ba540..10ac25cb8f9e23520830efa4f2f7a58a21370e29 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,8 @@
-## <img src=https://raw.githubusercontent.com/Xilinx/finn/master/docs/img/finn-logo.png width=128/> Fast, Scalable Quantized Neural Network Inference on FPGAs
+## <img src=https://raw.githubusercontent.com/Xilinx/finn/github-pages/docs/img/finn-logo.png width=128/> Fast, Scalable Quantized Neural Network Inference on FPGAs
 
 
 
-<img align="left" src="https://raw.githubusercontent.com/Xilinx/finn/master/docs/img/finn-stack.png" alt="drawing" style="margin-right: 20px" width="250"/>
+<img align="left" src="https://raw.githubusercontent.com/Xilinx/finn/github-pages/docs/img/finn-stack.png" alt="drawing" style="margin-right: 20px" width="250"/>
 
 [![Gitter](https://badges.gitter.im/xilinx-finn/community.svg)](https://gitter.im/xilinx-finn/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge)
 [![ReadTheDocs](https://readthedocs.org/projects/finn/badge/?version=latest&style=plastic)](http://finn.readthedocs.io/)
@@ -24,11 +24,9 @@ Please see the [Getting Started](https://finn.readthedocs.io/en/latest/getting_s
 
 ## What's New in FINN?
 
+* **2021-06-15:** v0.6 is released, with ResNet-50 on U250 and ZCU104 MobileNet-v1 in finn-examples showcasing new features plus a lot more. Read more on the [v0.6 release blog post](https://xilinx.github.io/finn//2021/06/15/finn-v06-is-released.html).
 * **2020-12-17:** v0.5b (beta) is released, with a new [examples repo](https://github.com/Xilinx/finn-examples) including MobileNet-v1. Read more on the <a href="https://xilinx.github.io/finn/2020/12/17/finn-v05b-beta-is-released.html">release blog post</a>.
 * **2020-09-21:** v0.4b (beta) is released. Read more on the <a href="https://xilinx.github.io/finn/2020/09/21/finn-v04b-beta-is-released.html">release blog post</a>.
-* **2020-05-08:** v0.3b (beta) is released, with initial support for convolutions, parallel transformations, more flexible memory allocation for MVAUs, throughput testing and many other smaller improvements and bugfixes. Read more on the <a href="https://xilinx.github.io/finn/2020/05/08/finn-v03b-beta-is-released.html">release blog post</a>.
-* **2020-04-15:** FINN v0.2.1b (beta): use fixed commit versions for dependency repos, otherwise identical to 0.2b
-* **2020-02-28:** FINN v0.2b (beta) is released, which is a clean-slate reimplementation of the framework. Currently only fully-connected networks are supported for the end-to-end flow. Please see the release blog post for a summary of the key features.
 
 ## Documentation
 
diff --git a/docker/Dockerfile.finn b/docker/Dockerfile.finn
new file mode 100644
index 0000000000000000000000000000000000000000..309f517e9d4d5ac412ba50467dd732aad8844011
--- /dev/null
+++ b/docker/Dockerfile.finn
@@ -0,0 +1,151 @@
+# Copyright (c) 2021, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+FROM pytorch/pytorch:1.7.1-cuda11.0-cudnn8-runtime
+LABEL maintainer="Yaman Umuroglu <yamanu@xilinx.com>"
+
+# XRT version to be installed
+ARG XRT_DEB_VERSION="xrt_202010.2.7.766_18.04-amd64-xrt"
+
+WORKDIR /workspace
+
+# some Vitis deps require a timezone to be specified, which hangs in Docker
+# use workaround from https://grigorkh.medium.com/fix-tzdata-hangs-docker-image-build-cdb52cc3360d
+ENV TZ="Europe/Dublin"
+RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
+
+RUN apt-get update
+RUN apt-get -y upgrade
+RUN apt-get install -y build-essential
+RUN apt-get install -y libglib2.0-0
+RUN apt-get install -y libsm6
+RUN apt-get install -y libxext6
+RUN apt-get install -y libxrender-dev
+RUN apt-get install -y verilator
+RUN apt-get install -y nano
+RUN apt-get install -y zsh
+RUN apt-get install -y rsync
+RUN apt-get install -y git
+RUN apt-get install -y sshpass
+RUN apt-get install -y wget
+RUN apt-get install -y sudo
+RUN apt-get install -y unzip
+RUN apt-get install -y zip
+RUN echo "StrictHostKeyChecking no" >> /etc/ssh/ssh_config
+
+# install XRT
+RUN wget https://www.xilinx.com/bin/public/openDownload?filename=$XRT_DEB_VERSION.deb -O /tmp/$XRT_DEB_VERSION.deb
+RUN apt install -y /tmp/$XRT_DEB_VERSION.deb
+RUN rm /tmp/$XRT_DEB_VERSION.deb
+
+# versioned Python package requirements for FINN compiler
+# these are given in requirements.txt
+COPY requirements.txt .
+RUN pip install -r requirements.txt
+RUN rm requirements.txt
+# extra Python package dependencies (for testing and interaction)
+RUN pip install pygments==2.4.1
+RUN pip install ipykernel==5.5.5
+RUN pip install jupyter==1.0.0
+RUN pip install matplotlib==3.3.1 --ignore-installed
+RUN pip install pytest-dependency==0.5.1
+RUN pip install sphinx==3.1.2
+RUN pip install sphinx_rtd_theme==0.5.0
+RUN pip install pytest-xdist==2.0.0
+RUN pip install pytest-parallel==0.1.0
+RUN pip install "netron>=5.0.0"
+RUN pip install pandas==1.1.5
+RUN pip install scikit-learn==0.24.1
+RUN pip install tqdm==4.31.1
+RUN pip install -e git+https://github.com/fbcotter/dataset_loading.git@0.0.4#egg=dataset_loading
+
+# git-based Python repo dependencies
+# these are installed in editable mode for easier co-development
+ARG FINN_BASE_COMMIT="ac0b86a63eb937b869bfa453a996a8a8b8506546"
+ARG FINN_EXP_COMMIT="f82c0d9868bb88ea045dfadb28508d327d287221"
+ARG BREVITAS_COMMIT="462f86cdc60f9915baf13afd1676fb21da44c2ee"
+ARG PYVERILATOR_COMMIT="e2ff74030de3992dcac54bf1b6aad2915946e8cb"
+ARG CNPY_COMMIT="4e8810b1a8637695171ed346ce68f6984e585ef4"
+ARG HLSLIB_COMMIT="a30fb1a59ddf35952ac1184e74b1f81e8351328d"
+ARG OMX_COMMIT="1dfc4aa2f2895632742cd5751520c6b472feb74e"
+ARG AVNET_BDF_COMMIT="2d49cfc25766f07792c0b314489f21fe916b639b"
+# finn-base
+RUN git clone https://github.com/Xilinx/finn-base.git /workspace/finn-base
+RUN git -C /workspace/finn-base checkout $FINN_BASE_COMMIT
+RUN pip install -e /workspace/finn-base
+# finn-experimental
+RUN git clone https://github.com/Xilinx/finn-experimental.git /workspace/finn-experimental
+RUN git -C /workspace/finn-experimental checkout $FINN_EXP_COMMIT
+RUN pip install -e /workspace/finn-experimental
+# brevitas
+RUN git clone https://github.com/Xilinx/brevitas.git /workspace/brevitas
+RUN git -C /workspace/brevitas checkout $BREVITAS_COMMIT
+RUN pip install -e /workspace/brevitas
+# pyverilator
+RUN git clone https://github.com/maltanar/pyverilator.git /workspace/pyverilator
+RUN git -C /workspace/pyverilator checkout $PYVERILATOR_COMMIT
+RUN pip install -e /workspace/pyverilator
+# other git-based dependencies (non-Python)
+# cnpy
+RUN git clone https://github.com/rogersce/cnpy.git /workspace/cnpy
+RUN git -C /workspace/cnpy checkout $CNPY_COMMIT
+# finn-hlslib
+RUN git clone https://github.com/Xilinx/finn-hlslib.git /workspace/finn-hlslib
+RUN git -C /workspace/finn-hlslib checkout $HLSLIB_COMMIT
+# oh-my-xilinx
+RUN git clone https://bitbucket.org/maltanar/oh-my-xilinx.git /workspace/oh-my-xilinx
+RUN git -C /workspace/oh-my-xilinx checkout $OMX_COMMIT
+# board files
+RUN cd /tmp; \
+    wget -q https://github.com/cathalmccabe/pynq-z1_board_files/raw/master/pynq-z1.zip; \
+    wget -q https://dpoauwgwqsy2x.cloudfront.net/Download/pynq-z2.zip; \
+    unzip -q pynq-z1.zip; \
+    unzip -q pynq-z2.zip; \
+    mkdir /workspace/board_files; \
+    mv pynq-z1/ /workspace/board_files/; \
+    mv pynq-z2/ /workspace/board_files/; \
+    rm pynq-z1.zip; \
+    rm pynq-z2.zip; \
+    git clone https://github.com/Avnet/bdf.git /workspace/avnet-bdf; \
+    git -C /workspace/avnet-bdf checkout  $AVNET_BDF_COMMIT; \
+    mv /workspace/avnet-bdf/* /workspace/board_files/;
+
+
+# extra environment variables for FINN compiler
+ENV VIVADO_IP_CACHE "/tmp/vivado_ip_cache"
+ENV PATH "${PATH}:/workspace/oh-my-xilinx"
+ENV OHMYXILINX "/workspace/oh-my-xilinx"
+
+WORKDIR /workspace/finn
+
+COPY docker/finn_entrypoint.sh /usr/local/bin/
+COPY docker/quicktest.sh /usr/local/bin/
+RUN chmod 755 /usr/local/bin/finn_entrypoint.sh
+RUN chmod 755 /usr/local/bin/quicktest.sh
+ENTRYPOINT ["finn_entrypoint.sh"]
+CMD ["bash"]
diff --git a/docker/Dockerfile.finn_ci b/docker/Dockerfile.finn_ci
deleted file mode 100644
index 925b54a08ee16fd142e68340447fcde932dfdbfc..0000000000000000000000000000000000000000
--- a/docker/Dockerfile.finn_ci
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright (c) 2020, Xilinx
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# * Redistributions of source code must retain the above copyright notice, this
-#   list of conditions and the following disclaimer.
-#
-# * Redistributions in binary form must reproduce the above copyright notice,
-#   this list of conditions and the following disclaimer in the documentation
-#   and/or other materials provided with the distribution.
-#
-# * Neither the name of FINN nor the names of its
-#   contributors may be used to endorse or promote products derived from
-#   this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-FROM pytorch/pytorch:1.1.0-cuda10.0-cudnn7.5-devel
-MAINTAINER Yaman Umuroglu <yamanu@xilinx.com>
-
-WORKDIR /workspace
-
-RUN apt-get update
-RUN apt-get -y upgrade
-RUN apt-get install -y build-essential libglib2.0-0 libsm6 libxext6 libxrender-dev
-RUN apt-get install -y verilator zsh nano g++-multilib
-RUN apt-get install -y sshpass wget unzip
-RUN echo "StrictHostKeyChecking no" >> /etc/ssh/ssh_config
-
-# XRT deps
-RUN wget https://raw.githubusercontent.com/Xilinx/XRT/master/src/runtime_src/tools/scripts/xrtdeps.sh
-RUN apt-get update
-RUN bash xrtdeps.sh -docker
-RUN rm xrtdeps.sh
-
-# cloning dependency repos
-# finn-base
-RUN git clone https://github.com/Xilinx/finn-base.git /workspace/finn-base
-# Brevitas
-RUN git clone https://github.com/Xilinx/brevitas.git /workspace/brevitas
-# CNPY
-RUN git clone https://github.com/rogersce/cnpy.git /workspace/cnpy
-# FINN hlslib
-RUN git clone https://github.com/Xilinx/finn-hlslib.git /workspace/finn-hlslib
-# PyVerilator
-RUN git clone https://github.com/maltanar/pyverilator /workspace/pyverilator
-# oh-my-xilinx
-RUN git clone https://bitbucket.org/maltanar/oh-my-xilinx.git /workspace/oh-my-xilinx
-
-COPY requirements.txt .
-RUN pip install -r requirements.txt
-RUN rm requirements.txt
-RUN pip install pytest-dependency
-RUN pip install pytest-xdist
-RUN pip install pytest-parallel
-RUN pip install -e git+https://github.com/fbcotter/dataset_loading.git@0.0.4#egg=dataset_loading
-
-ENV VIVADO_IP_CACHE "/tmp/vivado_ip_cache"
-ENV PATH "${PATH}:/workspace/oh-my-xilinx"
-ENV OHMYXILINX "/workspace/oh-my-xilinx"
-
-# colorful terminal output
-RUN echo "PS1='\[\033[1;36m\]\u\[\033[1;31m\]@\[\033[1;32m\]\h:\[\033[1;35m\]\w\[\033[1;31m\]\$\[\033[0m\] '" >>  /root/.bashrc
-
-WORKDIR /workspace/finn
-
-COPY docker/finn_entrypoint.sh /usr/local/bin/
-COPY docker/quicktest.sh /usr/local/bin/
-RUN chmod 755 /usr/local/bin/finn_entrypoint.sh
-RUN chmod 755 /usr/local/bin/quicktest.sh
-ENTRYPOINT ["finn_entrypoint.sh"]
-CMD ["bash"]
diff --git a/docker/Dockerfile.finn_dev b/docker/Dockerfile.finn_dev
deleted file mode 100644
index a2abaac17c3bbdfb9440644fc371ffb4461621f6..0000000000000000000000000000000000000000
--- a/docker/Dockerfile.finn_dev
+++ /dev/null
@@ -1,117 +0,0 @@
-# Copyright (c) 2020, Xilinx
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# * Redistributions of source code must retain the above copyright notice, this
-#   list of conditions and the following disclaimer.
-#
-# * Redistributions in binary form must reproduce the above copyright notice,
-#   this list of conditions and the following disclaimer in the documentation
-#   and/or other materials provided with the distribution.
-#
-# * Neither the name of FINN nor the names of its
-#   contributors may be used to endorse or promote products derived from
-#   this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-FROM pytorch/pytorch:1.1.0-cuda10.0-cudnn7.5-devel
-MAINTAINER Yaman Umuroglu <yamanu@xilinx.com>
-ARG GID
-ARG GNAME
-ARG UNAME
-ARG UID
-ARG PASSWD
-
-WORKDIR /workspace
-
-RUN apt-get update
-RUN apt-get -y upgrade
-RUN apt-get install -y build-essential libglib2.0-0 libsm6 libxext6 libxrender-dev
-RUN apt-get install -y verilator nano zsh rsync g++-multilib
-RUN apt-get -y install sshpass wget unzip
-RUN echo "StrictHostKeyChecking no" >> /etc/ssh/ssh_config
-
-COPY requirements.txt .
-RUN pip install -r requirements.txt
-RUN rm requirements.txt
-RUN pip install pygments==2.4.1
-RUN pip install jupyter==1.0.0
-RUN pip install matplotlib==3.3.1 --ignore-installed
-RUN pip install pytest-dependency==0.5.1
-RUN pip install sphinx==3.1.2
-RUN pip install sphinx_rtd_theme==0.5.0
-RUN pip install pytest-xdist==2.0.0
-RUN pip install pytest-parallel==0.1.0
-RUN pip install netron>=4.7.9
-RUN pip install pandas==1.1.5
-RUN pip install scikit-learn==0.24.1
-RUN pip install tqdm==4.31.1
-RUN pip install -e git+https://github.com/fbcotter/dataset_loading.git@0.0.4#egg=dataset_loading
-
-# switch user
-RUN groupadd -g $GID $GNAME
-RUN useradd -M -u $UID $UNAME -g $GNAME
-RUN usermod -aG sudo $UNAME
-RUN echo "$UNAME:$PASSWD" | chpasswd
-RUN echo "root:$PASSWD" | chpasswd
-RUN ln -s /workspace /home/$UNAME
-RUN chown -R $UNAME:$GNAME /home/$UNAME
-USER $UNAME
-
-# cloning dependency repos (as user)
-# finn-base
-RUN git clone https://github.com/Xilinx/finn-base.git /workspace/finn-base
-# Brevitas
-RUN git clone https://github.com/Xilinx/brevitas.git /workspace/brevitas
-# CNPY
-RUN git clone https://github.com/rogersce/cnpy.git /workspace/cnpy
-# FINN hlslib
-RUN git clone https://github.com/Xilinx/finn-hlslib.git /workspace/finn-hlslib
-# PyVerilator
-RUN git clone https://github.com/maltanar/pyverilator /workspace/pyverilator
-# oh-my-xilinx
-RUN git clone https://bitbucket.org/maltanar/oh-my-xilinx.git /workspace/oh-my-xilinx
-
-# for this developer-oriented Docker container we assume the FINN repo is cloned and mounted from the host
-# at /workspace/finn -- see run-docker.sh for an example of how to do this.
-ENV PATH "${PATH}:/workspace/oh-my-xilinx:/home/$UNAME/.local/bin"
-ENV OHMYXILINX "/workspace/oh-my-xilinx"
-
-WORKDIR /home/$UNAME/finn
-RUN echo "PS1='\[\033[1;36m\]\u\[\033[1;31m\]@\[\033[1;32m\]\h:\[\033[1;35m\]\w\[\033[1;31m\]\$\[\033[0m\] '" >>  /home/$UNAME/.bashrc
-RUN echo "source \$VIVADO_PATH/settings64.sh" >> /home/$UNAME/.bashrc
-
-# copy entrypoint script
-USER root
-COPY docker/finn_entrypoint.sh /usr/local/bin/
-COPY docker/quicktest.sh /usr/local/bin/
-RUN chmod 755 /usr/local/bin/finn_entrypoint.sh
-RUN chmod 755 /usr/local/bin/quicktest.sh
-# install vitis deps if required
-ARG INSTALL_XRT_DEPS
-RUN if [ "$INSTALL_XRT_DEPS" = "1" ] ; then \
-    echo "Installing XRT dependencies"; \
-    wget https://raw.githubusercontent.com/Xilinx/XRT/master/src/runtime_src/tools/scripts/xrtdeps.sh; \
-    apt-get update; \
-    bash xrtdeps.sh -docker; \
-    rm xrtdeps.sh; \
-  else \
-    echo "Skipping installation of XRT dependencies"; \
-  fi
-
-USER $UNAME
-
-ENTRYPOINT ["finn_entrypoint.sh"]
-CMD ["bash"]
diff --git a/docker/finn_entrypoint.sh b/docker/finn_entrypoint.sh
index 55ee33706bb84810ef0eead87c414d442d131f7a..a2312d025b616acd285b94f1b56b83f0c35cc0ae 100644
--- a/docker/finn_entrypoint.sh
+++ b/docker/finn_entrypoint.sh
@@ -1,95 +1,94 @@
 #!/bin/bash
+# Copyright (c) 2021, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 
-export SHELL=/bin/bash
 export FINN_ROOT=/workspace/finn
+export HOME=/tmp/home_dir
+export SHELL=/bin/bash
+# colorful terminal output
+export PS1='\[\033[1;36m\]\u\[\033[1;31m\]@\[\033[1;32m\]\h:\[\033[1;35m\]\w\[\033[1;31m\]\$\[\033[0m\] '
 
+YELLOW='\033[0;33m'
 GREEN='\033[0;32m'
+RED='\033[0;31m'
 NC='\033[0m' # No Color
 
+yecho () {
+  echo -e "${YELLOW}WARNING: $1${NC}"
+}
+
 gecho () {
   echo -e "${GREEN}$1${NC}"
 }
 
-# checkout the correct dependency repo commits
-# the repos themselves are cloned in the Dockerfile
-FINN_BASE_COMMIT=94beb27de0decb58d31555823860a24da5f09c5a
-BREVITAS_COMMIT=aff49758ec445d77c75721c7de3091a2a1797ca8
-CNPY_COMMIT=4e8810b1a8637695171ed346ce68f6984e585ef4
-HLSLIB_COMMIT=1b0bb309256d51d10a8bcdb380caf09de679c8f7
-PYVERILATOR_COMMIT=e2ff74030de3992dcac54bf1b6aad2915946e8cb
-OMX_COMMIT=1bae737669901e762f581af73348332b5c4b2ada
-
-gecho "Setting up known-good commit versions for FINN dependencies"
-# finn-base
-gecho "finn-base @ $FINN_BASE_COMMIT"
-git -C /workspace/finn-base pull --quiet
-git -C /workspace/finn-base checkout $FINN_BASE_COMMIT --quiet
-pip install --user -e /workspace/finn-base
-# Brevitas
-gecho "brevitas @ $BREVITAS_COMMIT"
-git -C /workspace/brevitas pull --quiet
-git -C /workspace/brevitas checkout $BREVITAS_COMMIT --quiet
-pip install --user -e /workspace/brevitas
-# CNPY
-gecho "cnpy @ $CNPY_COMMIT"
-git -C /workspace/cnpy pull --quiet
-git -C /workspace/cnpy checkout $CNPY_COMMIT --quiet
-# FINN hlslib
-gecho "finn-hlslib @ $HLSLIB_COMMIT"
-git -C /workspace/finn-hlslib pull --quiet
-git -C /workspace/finn-hlslib checkout $HLSLIB_COMMIT --quiet
-# PyVerilator
-gecho "PyVerilator @ $PYVERILATOR_COMMIT"
-git -C /workspace/pyverilator pull --quiet
-git -C /workspace/pyverilator checkout $PYVERILATOR_COMMIT --quiet
-pip install --user -e /workspace/pyverilator
-# oh-my-xilinx
-gecho "oh-my-xilinx @ $OMX_COMMIT"
-git -C /workspace/oh-my-xilinx pull --quiet
-git -C /workspace/oh-my-xilinx checkout $OMX_COMMIT --quiet
-# remove old version egg-info, if any
-rm -rf $FINN_ROOT/src/FINN.egg-info
-# run pip install for finn
-pip install --user -e $FINN_ROOT
+recho () {
+  echo -e "${RED}ERROR: $1${NC}"
+}
 
-if [ ! -z "$VIVADO_PATH" ];then
-  # source Vivado env.vars
-  export XILINX_VIVADO=$VIVADO_PATH
-  source $VIVADO_PATH/settings64.sh
+if [ -f "$FINN_ROOT/setup.py" ];then
+  # run pip install for finn
+  pip install --user -e $FINN_ROOT
+else
+  recho "Unable to find FINN source code in /workspace/finn"
+  recho "Ensure you have passed -v <path-to-finn-repo>:/workspace/finn to the docker run command"
+  exit -1
 fi
 
-# download PYNQ board files if not already there
-if [ ! -d "/workspace/finn/board_files" ]; then
-    gecho "Downloading PYNQ board files for Vivado"
-    OLD_PWD=$(pwd)
-    cd /workspace/finn
-    wget -q https://github.com/cathalmccabe/pynq-z1_board_files/raw/master/pynq-z1.zip
-    wget -q https://d2m32eurp10079.cloudfront.net/Download/pynq-z2.zip
-    unzip -q pynq-z1.zip
-    unzip -q pynq-z2.zip
-    mkdir /workspace/finn/board_files
-    mv pynq-z1/ board_files/
-    mv pynq-z2/ board_files/
-    rm pynq-z1.zip
-    rm pynq-z2.zip
-    cd $OLD_PWD
-fi
-if [ ! -d "/workspace/finn/board_files/ultra96v1" ]; then
-    gecho "Downloading Avnet BDF files into board_files"
-    OLD_PWD=$(pwd)
-    cd /workspace/finn
-    git clone https://github.com/Avnet/bdf.git
-    mv /workspace/finn/bdf/* /workspace/finn/board_files/
-    rm -rf /workspace/finn/bdf
-    cd $OLD_PWD
-fi
-if [ ! -z "$VITIS_PATH" ];then
+if [ -f "$VITIS_PATH/settings64.sh" ];then
   # source Vitis env.vars
   export XILINX_VITIS=$VITIS_PATH
+  export XILINX_XRT=/opt/xilinx/xrt
   source $VITIS_PATH/settings64.sh
-  if [ ! -z "$XILINX_XRT" ];then
+  gecho "Found Vitis at $VITIS_PATH"
+  if [ -f "$XILINX_XRT/setup.sh" ];then
     # source XRT
     source $XILINX_XRT/setup.sh
+    gecho "Found XRT at $XILINX_XRT"
+  else
+    recho "XRT not found on $XILINX_XRT, did the installation fail?"
+    exit -1
+  fi
+else
+  yecho "Unable to find $VITIS_PATH/settings64.sh"
+  yecho "Functionality dependent on Vitis will not be available."
+  yecho "If you need Vitis, ensure VITIS_PATH is set correctly and mounted into the Docker container."
+  if [ -f "$VIVADO_PATH/settings64.sh" ];then
+    # source Vivado env.vars
+    export XILINX_VIVADO=$VIVADO_PATH
+    source $VIVADO_PATH/settings64.sh
+    gecho "Found Vivado at $VIVADO_PATH"
+  else
+    yecho "Unable to find $VIVADO_PATH/settings64.sh"
+    yecho "Functionality dependent on Vivado will not be available."
+    yecho "If you need Vivado, ensure VIVADO_PATH is set correctly and mounted into the Docker container."
   fi
 fi
+
+# execute the provided command(s) as root
 exec "$@"
diff --git a/docker/Dockerfile.jenkins b/docker/jenkins/Dockerfile.jenkins
similarity index 100%
rename from docker/Dockerfile.jenkins
rename to docker/jenkins/Dockerfile.jenkins
diff --git a/docker/Jenkinsfile b/docker/jenkins/Jenkinsfile
similarity index 85%
rename from docker/Jenkinsfile
rename to docker/jenkins/Jenkinsfile
index b2d3102bd4aa3c00620f41c102af5a8b385cede7..f3211941890d634b12142ed13c0f0cf49a9003d8 100644
--- a/docker/Jenkinsfile
+++ b/docker/jenkins/Jenkinsfile
@@ -2,7 +2,8 @@ pipeline {
     agent any
     parameters {
         string(name: 'FINN_CI_BRANCH', defaultValue: '', description: 'FINN branch to build')
-        string(name: 'VIVADO_PATH', defaultValue: '', description: 'Path to Vivado installation')
+        string(name: 'FINN_XILINX_PATH', defaultValue: '', description: 'Path to Xilinx tool installation')
+        string(name: 'FINN_XILINX_VERSION', defaultValue: '2020.1', description: 'Xilinx tool version')
         string(name: 'PYNQ_BOARD', defaultValue: 'Pynq-Z1', description: 'PYNQ board type')
         string(name: 'PYNQ_IP', defaultValue: '', description: 'PYNQ board IP address')
         string(name: 'PYNQ_USERNAME', defaultValue: 'xilinx', description: 'PYNQ board username')
@@ -22,6 +23,8 @@ pipeline {
         DOCKER_TAG='finn_ci:$BUILD_ID'
         DOCKER_INST_NAME='finn_ci'
         BUILD_PATH='/tmp/finn_ci'
+        VIVADO_PATH=${params.FINN_XILINX_PATH}/Vivado/${params.FINN_XILINX_VERSION}
+        VITIS_PATH=${params.FINN_XILINX_PATH}/Vitis/${params.FINN_XILINX_VERSION}
     }
     stages {
         stage("Clone") {
@@ -45,10 +48,11 @@ pipeline {
                 docker run --init \
                 --hostname $DOCKER_INST_NAME \
                 -v ${params.WORKSPACE_MOUNT}:/workspace/finn \
-                -v ${params.VIVADO_PATH}:${params.VIVADO_PATH}:ro \
+                -v ${params.FINN_XILINX_PATH}:${params.FINN_XILINX_PATH}:ro \
                 -e NUM_DEFAULT_WORKERS=1 \
                 -e FINN_INST_NAME=$DOCKER_INST_NAME \
-                -e VIVADO_PATH=${params.VIVADO_PATH} \
+                -e VIVADO_PATH=$VIVADO_PATH \
+                -e VITIS_PATH=$VITIS_PATH \
                 -e PYNQ_BOARD=${params.PYNQ_BOARD} \
                 -e PYNQ_IP=${params.PYNQ_IP} \
                 -e PYNQ_USERNAME=${params.PYNQ_USERNAME} \
@@ -65,10 +69,11 @@ pipeline {
                 docker run --init \
                 --hostname $DOCKER_INST_NAME \
                 -v ${params.WORKSPACE_MOUNT}:/workspace/finn \
-                -v ${params.VIVADO_PATH}:${params.VIVADO_PATH}:ro \
+                -v $VIVADO_PATH:$VIVADO_PATH:ro \
                 -e NUM_DEFAULT_WORKERS=1 \
                 -e FINN_INST_NAME=$DOCKER_INST_NAME \
-                -e VIVADO_PATH=${params.VIVADO_PATH} \
+                -e VIVADO_PATH=$VIVADO_PATH \
+                -e VITIS_PATH=$VITIS_PATH \
                 -e PYNQ_BOARD=${params.PYNQ_BOARD} \
                 -e PYNQ_IP=${params.PYNQ_IP} \
                 -e PYNQ_USERNAME=${params.PYNQ_USERNAME} \
@@ -85,10 +90,11 @@ pipeline {
                 docker run --init \
                 --hostname $DOCKER_INST_NAME \
                 -v ${params.WORKSPACE_MOUNT}:/workspace/finn \
-                -v ${params.VIVADO_PATH}:${params.VIVADO_PATH}:ro \
+                -v $VIVADO_PATH:$VIVADO_PATH:ro \
                 -e NUM_DEFAULT_WORKERS=${params.NUM_DEFAULT_WORKERS} \
                 -e FINN_INST_NAME=$DOCKER_INST_NAME \
-                -e VIVADO_PATH=${params.VIVADO_PATH} \
+                -e VIVADO_PATH=$VIVADO_PATH \
+                -e VITIS_PATH=$VITIS_PATH \
                 -e PYNQ_BOARD=${params.PYNQ_BOARD} \
                 -e PYNQ_IP=${params.PYNQ_IP} \
                 -e PYNQ_USERNAME=${params.PYNQ_USERNAME} \
diff --git a/docker/launch-jenkins.sh b/docker/jenkins/launch-jenkins.sh
similarity index 100%
rename from docker/launch-jenkins.sh
rename to docker/jenkins/launch-jenkins.sh
diff --git a/docker/quicktest.sh b/docker/quicktest.sh
index b06feccdc578a59c8ef00531871e1211c2a407e5..b4ad37232fa69754a86e9064d7592d7474e8617e 100755
--- a/docker/quicktest.sh
+++ b/docker/quicktest.sh
@@ -5,8 +5,8 @@
 cd $FINN_ROOT
 # check if command line argument is empty or not present
 if [ -z $1 ]; then
-  echo "Running quicktest: not (vivado or slow) with pytest-xdist"
-  python setup.py test --addopts "-m 'not (vivado or slow or vitis)' --dist=loadfile -n $PYTEST_PARALLEL"
+  echo "Running quicktest: not (vivado or slow or board) with pytest-xdist"
+  python setup.py test --addopts "-m 'not (vivado or slow or vitis or board)' --dist=loadfile -n $PYTEST_PARALLEL"
 elif [ $1 = "main" ]; then
   echo "Running main test suite: not (rtlsim or end2end) with pytest-xdist"
   python setup.py test --addopts "-k 'not (rtlsim or end2end)' --dist=loadfile -n $PYTEST_PARALLEL"
diff --git a/docs/finn/conf.py b/docs/finn/conf.py
index 1bd179c3f7904ba102f7a9b4f2edc2739ba58183..47ba99fb5fca55e99b3da6403532f145ac8ebbca 100644
--- a/docs/finn/conf.py
+++ b/docs/finn/conf.py
@@ -12,14 +12,15 @@
 #
 import os
 import sys
-sys.path.insert(0, os.path.abspath('../../src/'))
+
+sys.path.insert(0, os.path.abspath("../../src/"))
 
 
 # -- Project information -----------------------------------------------------
 
-project = 'FINN'
-copyright = '2020, Xilinx'
-author = 'Y. Umuroglu and J. Petri-Koenig'
+project = "FINN"
+copyright = "2020, Xilinx"
+author = "Y. Umuroglu and J. Petri-Koenig"
 
 
 # -- General configuration ---------------------------------------------------
@@ -27,17 +28,17 @@ author = 'Y. Umuroglu and J. Petri-Koenig'
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
-extensions = [
-]
-extensions.append('sphinx.ext.autodoc')
+extensions = []
+extensions.append("sphinx.ext.autodoc")
+extensions.append("sphinx.ext.autosectionlabel")
 
 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
 
 
 # -- Options for HTML output -------------------------------------------------
@@ -45,11 +46,11 @@ exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 #
-html_theme = 'sphinx_rtd_theme'
+html_theme = "sphinx_rtd_theme"
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = ["_static"]
 
-master_doc = 'index'
+master_doc = "index"
diff --git a/docs/finn/faq.rst b/docs/finn/faq.rst
index 093344e70331572a425a09d34c5a68d7313bc521..87e36e0722e4db6b2efd5de5df343b7bdf68a719 100644
--- a/docs/finn/faq.rst
+++ b/docs/finn/faq.rst
@@ -9,25 +9,25 @@ Frequently Asked Questions
 Can I install FINN out of the Docker container?
 ===============================================
 
-We do not support out of the Docker implementations at the moment. This is due 
+We do not support out of the Docker implementations at the moment. This is due
 to the high complexity of the FINN project dependencies.
 
 Since FINN uses ONNX, can I compile any model from the ONNX Model Zoo to an FPGA accelerator?
 =============================================================================================
 
-The short answer is no. FINN uses ONNX in a specific (non-standard) way, including custom layer 
+The short answer is no. FINN uses ONNX in a specific (non-standard) way, including custom layer
 types and quantization annotations. Networks must be first quantized using Brevitas and exported
 to FINN-ONNX to be converted to FPGA accelerators.
 
 
-Can I deploy custom NNs with arbitrary precisions and layers using FINN? 
+Can I deploy custom NNs with arbitrary precisions and layers using FINN?
 =========================================================================
 
 Yes, though the effort required and quality of results will vary.
-Although we do support arbitrary 
-precision, the way we create the hardware isn't typically practical for more than 
-4 bits, or very large networks for a single FPGA. 
-In terms of layers, only a subset of quantized layers covered by the various FINN examples 
+Although we do support arbitrary
+precision, the way we create the hardware isn't typically practical for more than
+4 bits, or very large networks for a single FPGA.
+In terms of layers, only a subset of quantized layers covered by the various FINN examples
 are currently supported.
 It is possible to add support for new layers, though we don't have tutorials for this in place
 just yet.
@@ -35,16 +35,16 @@ just yet.
 Does FINN only work with the example networks?
 ==============================================
 
-FINN isn't restricted to the example networks; 
-rather, it's restricted to certain patterns (e.g. certain layer types and their combinations). 
-The current best practice for custom networks is to take a working network and gradually modify it. 
+FINN isn't restricted to the example networks;
+rather, it's restricted to certain patterns (e.g. certain layer types and their combinations).
+The current best practice for custom networks is to take a working network and gradually modify it.
 
 What is the expected background for using FINN?
 ===============================================
 
 Some general knowledge of Python, Docker, machine learning with neural networks and Jupyter notebooks
 is expected.
-Our goal is to make the tool in a shape and form so that no hardware/FPGA background 
+Our goal is to make the tool in a shape and form so that no hardware/FPGA background
 should be necessary, although having some knowledge would give better results.
 
 What operating systems are supported by FINN?
@@ -66,6 +66,6 @@ What board do you recommend to start working with FINN?
 Our preferred target platforms are those supported by  `PYNQ <http://www.pynq.io/board.html>`_.
 For those boards we can offer end-to-end (DNN-to-bitstream) deployment,
 see the `finn-examples <https://github.com/Xilinx/finn-examples>`_ repository for some examples.
-However, FINN also supports Vivado IP Integrator designs. The IPs connect using AXI stream (FIFO) 
+However, FINN also supports Vivado IP Integrator designs. The IPs connect using AXI stream (FIFO)
 in-and-out interfaces. This means that it can be integrated onto any Xilinx FPGA board,
 though you will have to do the system integration manually.
diff --git a/docs/finn/getting_started.rst b/docs/finn/getting_started.rst
index bff31cde45122ebc25f515422ffc523f4f78e3be..14a1ec44a00fc9448b067bae6480091897f47472 100644
--- a/docs/finn/getting_started.rst
+++ b/docs/finn/getting_started.rst
@@ -4,91 +4,64 @@
 Getting Started
 ***************
 
-How to use the FINN compiler
-============================
-Currently, it's best to think of the FINN compiler as *compiler infrastructure*
-instead of a full *compiler* like `gcc` (although the aim is to get there).
-Although we provide a :ref:`command_line` entry for building dataflow
-accelerators, this only exposes a basic flow that works for simpler networks.
-A better way of looking at the FINN compiler is as a collection of scripts/tools that will help
-you convert a QNN into a custom FPGA accelerator that performs high-performance inference.
-
-**So where do I get started?** The best way of getting started with the FINN
-compiler is to follow the existing
-`Jupyter notebooks <tutorials>`_ and check out the prebuilt
-`examples <https://github.com/Xilinx/finn-examples>`_.
-
-**How do I compile my custom network?**
-This depends on how similar your custom network is to the examples we provide.
+Quickstart
+==========
+
+1. Install Docker to run `without root <https://docs.docker.com/engine/install/linux-postinstall/#manage-docker-as-a-non-root-user>`_
+2. Set up ``FINN_XILINX_PATH`` and ``FINN_XILINX_VERSION`` environment variables pointing respectively to the Xilinx tools installation directory and version (e.g. ``FINN_XILINX_PATH=/opt/Xilinx`` and ``FINN_XILINX_VERSION=2020.1``)
+3. Clone the FINN compiler from the repo: ``git clone https://github.com/Xilinx/finn/`` and go into the directory where it is cloned
+4. Execute ``./run-docker.sh quicktest`` to verify your installation.
+5. Optionally, follow the instructions on :ref:`PYNQ board first-time setup` or :ref:`Alveo first-time setup` for board setup.
+6. All done! See :ref:`Running FINN in Docker` for the various options on how to run the FINN compiler.
+
+
+How do I use FINN?
+==================
+
+We strongly recommend that you first watch one of the pre-recorded `FINN tutorial <https://www.youtube.com/watch?v=zw2aG4PhzmA&amp%3Bindex=2>`_
+videos, then follow the Jupyter notebook tutorials for `training and deploying an MLP for network intrusion detection <https://github.com/Xilinx/finn/tree/master/notebooks/end2end_example/cybersecurity>`_ .
+You may also want to check out the other :ref:`tutorials`, and the `FINN examples repository <https://github.com/Xilinx/finn-examples>`_ .
+
+Our aim in FINN is *not* to accelerate common off-the-shelf neural networks, but instead provide you with a set of tools
+to train *customized* networks and create highly-efficient FPGA implementations from them.
+In general, the approach for using the FINN framework is as follows:
+
+1. Train your own quantized neural network (QNN) in `Brevitas <https://github.com/Xilinx/brevitas>`_. We have some `guidelines <https://bit.ly/finn-hls4ml-qat-guidelines>`_ on quantization-aware training (QAT).
+2. Export to FINN-ONNX by following `this tutorial <https://github.com/Xilinx/finn/blob/master/notebooks/basics/1_brevitas_network_import.ipynb>`_ .
+3. Use FINN's ``build_dataflow`` system on the exported model by following `this tutorial <https://github.com/Xilinx/finn/blob/master/notebooks/end2end_example/cybersecurity/3-build-accelerator-with-finn.ipynb>`_
+4. Adjust your QNN topology, quantization settings and ``build_dataflow`` configuration to get the desired results.
+
+Please note that the framework is still under development, and how well this works will depend on how similar your custom network is to the examples we provide.
 If there are substantial differences, you will most likely have to write your own
 Python scripts that call the appropriate FINN compiler
 functions that process your design correctly, or adding new functions (including
 Vivado HLS layers)
 as required.
-For custom networks, we recommend making a copy of the end-to-end
-Jupyter notebook as a starting point, visualizing the model at intermediate
+The `advanced FINN tutorials <https://github.com/Xilinx/finn/tree/master/notebooks/advanced>`_ can be useful here.
+For custom networks, we recommend making a copy of the `BNN-PYNQ end-to-end
+Jupyter notebook tutorials <https://github.com/Xilinx/finn/tree/master/notebooks/end2end_example/bnn-pynq>`_ as a starting point, visualizing the model at intermediate
 steps and adding calls to new transformations as needed.
 Once you have a working flow, you can implement a command line entry for this
 by using the "advanced mode" described in the :ref:`command_line` section.
 
-
-
-
-System Requirements
-====================
-
-* Ubuntu 18.04 with ``bash`` installed
-* Docker `without root <https://docs.docker.com/engine/install/linux-postinstall/#manage-docker-as-a-non-root-user>`_
-* A working Vivado 2019.1 or 2020.1 installation
-* A ``VIVADO_PATH`` environment variable pointing to the Vivado installation directory (e.g. the directory where settings64.sh is located)
-* *(optional)* A PYNQ board with a network connection
-   * the ``bitstring`` package must be installed on the PYNQ: ``sudo pip3 install bitstring``
-* *(optional)* An Alveo board, and a working Vitis 2020.1 installation if you want to use Vitis and Alveo (see `Alveo first-time setup`_ below)
-
-We also recommend running the FINN compiler on a system with sufficiently
-strong hardware:
-
-* **RAM.** Depending on your target FPGA platform, your system must have sufficient RAM to be
-  able to run Vivado/Vitis synthesis for that part. See `this page <https://www.xilinx.com/products/design-tools/vivado/memory.html>`_
-  for more information. For targeting Zynq and Zynq UltraScale+ parts, at least 8 GB is recommended. Larger parts may require up to 16 GB.
-  For targeting Alveo parts with Vitis, at least 64 GB RAM is recommended.
-
-* **CPU.** FINN can parallelize HLS synthesis and several other operations for different
-  layers, so using a multi-core CPU is recommended. However, this should be balanced
-  against the memory usage as a high degree of parallelization will require more
-  memory. See the ``NUM_DEFAULT_WORKERS`` environment variable below for more on
-  how to control the degree of parallelization.
-
-* **Storage.** While going through the build steps, FINN will generate many files as part of
-  the process. For larger networks, you may need 10s of GB of space for the temporary
-  files generated during the build.
-  By default, these generated files will be placed under ``/tmp/finn_dev_<username>``.
-  You can override this location by using the ``FINN_HOST_BUILD_DIR`` environment
-  variable.
-  Mapping the generated file dir to a fast SSD will result in quicker builds.
-
-
 Running FINN in Docker
 ======================
-We use Docker extensively for developing and deploying FINN. If you are not familiar with Docker, there are many excellent `online resources <https://docker-curriculum.com/>`_ to get started. There is a Dockerfile in the root of the repository, as well as a `run-docker.sh` script that can be launched in the following modes:
+FINN only running inside a Docker container, and comes with a script to easily build and launch the container. If you are not familiar with Docker, there are many excellent `online resources <https://docker-curriculum.com/>`_ to get started.
+You may want to review the :ref:`General FINN Docker tips` and :ref:`Environment variables` as well.
+If you want to use prebuilt images, read :ref:`Using a prebuilt image`.
+The ``run-docker.sh`` script that can be launched in the following modes:
 
-Getting an interactive shell for development or experimentation
-***************************************************************
-.. warning:: Do not use ``sudo`` to launch the FINN Docker. Instead, setup Docker to run `without root <https://docs.docker.com/engine/install/linux-postinstall/#manage-docker-as-a-non-root-user>`_
+Launch interactive shell
+************************
+Simply running sh run-docker.sh without any additional arguments will create a Docker container with all dependencies and give you a terminal with you can use for development for experimentation:
 
 ::
 
   bash ./run_docker.sh
 
-Simply running sh run-docker.sh without any additional arguments will clone the dependency repos, create a Docker container and give you a terminal with you can use for development for experimentation.
-If you want a new terminal on an already-running container, you can do this with `docker exec -it finn_dev_<username> bash`.
-
-.. warning:: The Docker container is spawned with the `--rm` option, so make sure that any important files you created inside the container are either in the /workspace/finn folder (which is mounted from the host computer) or otherwise backed up.
-
-.. note:: **Develop from host, run inside container:** The FINN repository directory will be mounted from the host, so that you can use a text editor on your host computer to develop and the changes will be reflected directly inside the container.
 
-Command Line Entry
-*******************
+Launch a Build with ``build_dataflow``
+**************************************
 FINN is currently more compiler infrastructure than compiler, but we do offer
 a :ref:`command_line` entry for certain use-cases. These run a predefined flow
 or a user-defined flow from the command line as follows:
@@ -99,16 +72,17 @@ or a user-defined flow from the command line as follows:
   bash ./run_docker.sh build_custom <path/to/custom_build_dir/>
 
 
-Running the Jupyter notebooks
-*****************************
+Launch Jupyter notebooks
+************************
+FINN comes with numerous Jupyter notebook tutorials, which you can launch with:
+
 ::
 
   bash ./run-docker.sh notebook
 
 This will launch the `Jupyter notebook <https://jupyter.org/>`_ server inside a Docker container, and print a link on the terminal that you can open in your browser to run the FINN notebooks or create new ones.
 .. note:: The link will look something like this (the token you get will be different):
-http://127.0.0.1:8888/?token=f5c6bd32ae93ec103a88152214baedff4ce1850d81065bfc
-
+http://127.0.0.1:8888/?token=f5c6bd32ae93ec103a88152214baedff4ce1850d81065bfc.
 The ``run-docker.sh`` script forwards ports 8888 for Jupyter and 8081 for Netron, and launches the notebook server with appropriate arguments.
 
 
@@ -118,45 +92,125 @@ Environment variables
 Prior to running the `run-docker.sh` script, there are several environment variables you can set to configure certain aspects of FINN.
 These are summarized below:
 
-* ``VIVADO_PATH`` points to your Vivado installation on the host
-* (optional, for Vitis & Alveo only) ``VITIS_PATH``, ``PLATFORM_REPO_PATHS`` and ``XILINX_XRT`` respectively point to your Vitis installation, the Vitis platform files, and Xilinx XRT
+* (required) ``FINN_XILINX_PATH`` points to your Xilinx tools installation on the host (e.g. ``/opt/Xilinx``)
+* (required) ``FINN_XILINX_VERSION`` sets the Xilinx tools version to be used (e.g. ``2020.1``)
+* (required for Alveo) ``PLATFORM_REPO_PATHS`` points to the Vitis platform files (DSA).
+* (required for Alveo) ``XRT_DEB_VERSION`` specifies the .deb to be installed for XRT inside the container (see default value in ``run-docker.sh``).
+* (optional) ``NUM_DEFAULT_WORKERS`` (default 4) specifies the degree of parallelization for the transformations that can be run in parallel, potentially reducing build time
+* (optional) ``FINN_HOST_BUILD_DIR`` specifies which directory on the host will be used as the build directory. Defaults to ``/tmp/finn_dev_<username>``
 * (optional) ``JUPYTER_PORT`` (default 8888) changes the port for Jupyter inside Docker
 * (optional) ``JUPYTER_PASSWD_HASH`` (default "") Set the Jupyter notebook password hash. If set to empty string, token authentication will be used (token printed in terminal on launch).
 * (optional) ``LOCALHOST_URL`` (default localhost) sets the base URL for accessing e.g. Netron from inside the container. Useful when running FINN remotely.
 * (optional) ``NETRON_PORT`` (default 8081) changes the port for Netron inside Docker
-* (optional) ``NUM_DEFAULT_WORKERS`` (default 1) specifies the degree of parallelization for the transformations that can be run in parallel
 * (optional) ``PYNQ_BOARD`` or ``ALVEO_BOARD`` specifies the type of PYNQ/Alveo board used (see "supported hardware" below) for the test suite
 * (optional) ``PYNQ_IP`` and ``PYNQ_PORT`` (or ``ALVEO_IP`` and ``ALVEO_PORT``) specify ip address and port number to access the PYNQ board / Alveo target
 * (optional) ``PYNQ_USERNAME`` and ``PYNQ_PASSWORD`` (or ``ALVEO_USERNAME`` and ``ALVEO_PASSWORD``) specify the PYNQ board / Alveo host access credentials for the test suite. For PYNQ, password is always needed to run as sudo. For Alveo, you can leave the password empty and place your ssh private key in the ``finn/ssh_keys`` folder to use keypair authentication.
 * (optional) ``PYNQ_TARGET_DIR`` (or ``ALVEO_TARGET_DIR``) specifies the target dir on the PYNQ board / Alveo host for the test suite
-* (optional) ``FINN_HOST_BUILD_DIR`` specifies which directory on the host will be used as the build directory. Defaults to ``/tmp/finn_dev_<username>``
 * (optional) ``IMAGENET_VAL_PATH`` specifies the path to the ImageNet validation directory for tests.
+* (optional) ``FINN_DOCKER_PREBUILT`` (default 0) if set to 1 then skip Docker image building and use the image tagged with ``FINN_DOCKER_TAG``.
+* (optional) ``FINN_DOCKER_TAG`` (autogenerated) specifies the Docker image tag to use.
+* (optional) ``FINN_DOCKER_RUN_AS_ROOT`` (default 0) if set to 1 then run Docker container as root, default is the current user.
+* (optional) ``FINN_DOCKER_GPU`` (autodetected) if not 0 then expose all Nvidia GPUs or those selected by ``NVIDIA_VISIBLE_DEVICES`` to Docker container for accelerated DNN training. Requires `Nvidia Container Toolkit <https://github.com/NVIDIA/nvidia-docker>`_
+* (optional) ``NVIDIA_VISIBLE_DEVICES`` (default "") specifies specific Nvidia GPUs to use in Docker container. Possible values are a comma-separated list of GPU UUID(s) or index(es) e.g. ``0,1,2``, ``all``, ``none``, or void/empty/unset.
+* (optional) ``DOCKER_BUILDKIT`` (default "1") enables `Docker BuildKit <https://docs.docker.com/develop/develop-images/build_enhancements/>`_ for faster Docker image rebuilding (recommended).
+
+General FINN Docker tips
+************************
+* Several folders including the root directory of the FINN compiler and the ``FINN_HOST_BUILD_DIR`` will be mounted into the Docker container and can be used to exchange files.
+* Do not use ``sudo`` to launch the FINN Docker. Instead, setup Docker to run `without root <https://docs.docker.com/engine/install/linux-postinstall/#manage-docker-as-a-non-root-user>`_.
+* If you want a new terminal on an already-running container, you can do this with `docker exec -it <name_of_container> bash`.
+* The container is spawned with the `--rm` option, so make sure that any important files you created inside the container are either in the /workspace/finn folder (which is mounted from the host computer) or otherwise backed up.
+
+Using a prebuilt image
+**********************
+
+By default the ``run-docker.sh`` script tries to re-build the Docker image with each run. After the first run this should go quite fast thanks to Docker caching.
+If you are having trouble building the Docker image or need offline access, you can use prebuilt images by following these steps:
+
+1. Pull a prebuilt Docker image with ``docker pull maltanar/finn:<tag>`` where ``<tag>`` can be ``dev_latest`` or ``main_latest``
+2. Set the ``FINN_DOCKER_TAG`` to the name of the image you just pulled e.g. ``FINN_DOCKER_TAG=maltanar/finn:dev_latest``
+3. Set ``FINN_DOCKER_PREBUILT=1``
+4. You can now launch the Docker image in all modes without re-building or any internet access.
+
 
-Supported Hardware
-===================
+Supported FPGA Hardware
+=======================
 **Shell-integrated accelerator + driver:** For quick deployment, we target boards supported by  `PYNQ <https://pynq.io/>`_ . For these platforms, we can build a full bitfile including DMAs to move data into and out of the FINN-generated accelerator, as well as a Python driver to launch the accelerator. We support the Pynq-Z1, Pynq-Z2, Ultra96, ZCU102 and ZCU104 boards.
 As of FINN v0.4b we also have preliminary support for `Xilinx Alveo boards <https://www.xilinx.com/products/boards-and-kits/alveo.html>`_ using PYNQ and Vitis, see instructions below for Alveo setup.
 
 **Vivado IPI support for any Xilinx FPGA:** FINN generates a Vivado IP Integrator (IPI) design from the neural network with AXI stream (FIFO) in-out interfaces, which can be integrated onto any Xilinx FPGA as part of a larger system. It's up to you to take the FINN-generated accelerator (what we call "stitched IP" in the tutorials), wire it up to your FPGA design and send/receive neural network data to/from the accelerator.
 
+PYNQ board first-time setup
+****************************
+We use *host* to refer to the PC running the FINN Docker environment, which will build the accelerator+driver and package it up, and *target* to refer to the PYNQ board. To be able to access the target from the host, you'll need to set up SSH public key authentication:
+
+Start on the target side:
+
+1. Note down the IP address of your PYNQ board. This IP address must be accessible from the host.
+2. Ensure the ``bitstring`` package is installed: ``sudo pip3 install bitstring``
+
+Continue on the host side (replace the ``<PYNQ_IP>`` and ``<PYNQ_USERNAME>`` with the IP address and username of your board from the first step):
+
+1. Launch the Docker container from where you cloned finn with ``./run-docker.sh``
+2. Go into the `ssh_keys` directory  (e.g. ``cd /workspace/finn/ssh_keys``)
+3. Run ``ssh-keygen`` to create a key pair e.g. ``id_rsa`` private and ``id_rsa.pub`` public key
+4. Run ``ssh-copy-id -i id_rsa.pub <PYNQ_USERNAME>@<PYNQ_IP>`` to install the keys on the remote system
+5. Test that you can ``ssh <PYNQ_USERNAME>@<PYNQ_IP>`` without having to enter the password. Pass the ``-v`` flag to the ssh command if it doesn't work to help you debug.
+
+
 Alveo first-time setup
 **********************
 We use *host* to refer to the PC running the FINN Docker environment, which will build the accelerator+driver and package it up, and *target* to refer to the PC where the Alveo card is installed. These two can be the same PC, or connected over the network -- FINN includes some utilities to make it easier to test on remote PCs too. Prior to first usage, you need to set up both the host and the target in the following manner:
 
 On the target side:
 
-1. Install Xilinx XRT and set up the ``XILINX_XRT`` environment variable to point to your installation, for instance ``/opt/xilinx/xrt``.
+1. Install Xilinx XRT.
 2. Install the Vitis platform files for Alveo and set up the ``PLATFORM_REPO_PATHS`` environment variable to point to your installation, for instance ``/opt/xilinx/platforms``.
 3. Create a conda environment named *finn-pynq-alveo* by following this guide `to set up PYNQ for Alveo <https://pynq.readthedocs.io/en/latest/getting_started/alveo_getting_started.html>`_. It's best to follow the recommended environment.yml (set of package versions) in this guide.
 4. Activate the environment with `conda activate finn-pynq-alveo` and install the bitstring package with ``pip install bitstring``.
 5. Done! You should now be able to e.g. ``import pynq`` in Python scripts.
-6. (optional) If you don't want to specify the ``ALVEO_PASSWORD`` environment variable, you can `set up public key authentication <https://www.digitalocean.com/community/tutorials/how-to-configure-ssh-key-based-authentication-on-a-linux-server>`_. Copy your private key to the ``finn/ssh_keys`` folder on the host to get password-less deployment and remote execution.
+
 
 
 On the host side:
 
 1. Install Vitis 2020.1 and set up the ``VITIS_PATH`` environment variable to point to your installation.
-2. Install Xilinx XRT and set up the ``XILINX_XRT`` environment variable to point to your installation. *This must be the same path as the target's XRT (target step 1)*
+2. Install Xilinx XRT. Ensure that the ``XRT_DEB_VERSION`` environment variable reflects which version of XRT you have installed.
 3. Install the Vitis platform files for Alveo and set up the ``PLATFORM_REPO_PATHS`` environment variable to point to your installation. *This must be the same path as the target's platform files (target step 2)*
 4. Set up the ``ALVEO_*`` environment variables accordingly for your target, see description of environment variables above.
-5. Done! You can try the ``test_end2end_vitis`` tests in the FINN Docker to verify your setup, although this will take some time.
+5. `Set up public key authentication <https://www.digitalocean.com/community/tutorials/how-to-configure-ssh-key-based-authentication-on-a-linux-server>`_. Copy your private key to the ``finn/ssh_keys`` folder on the host to get password-less deployment and remote execution.
+6. Done! You can try the ``test_end2end_vitis`` tests in the FINN Docker to verify your setup, although this will take some time.
+
+
+
+System Requirements
+====================
+
+* Ubuntu 18.04 with ``bash`` installed
+* Docker `without root <https://docs.docker.com/engine/install/linux-postinstall/#manage-docker-as-a-non-root-user>`_
+* A working Vivado 2019.1 or 2020.1 installation
+* A ``VIVADO_PATH`` environment variable pointing to the Vivado installation directory (e.g. the directory where settings64.sh is located)
+* *(optional)* A PYNQ board with a network connection, see `PYNQ board first-time setup`_
+* *(optional)* An Alveo board, and a working Vitis 2020.1 installation if you want to use Vitis and Alveo (see `Alveo first-time setup`_ )
+
+We also recommend running the FINN compiler on a system with sufficiently
+strong hardware:
+
+* **RAM.** Depending on your target FPGA platform, your system must have sufficient RAM to be
+  able to run Vivado/Vitis synthesis for that part. See `this page <https://www.xilinx.com/products/design-tools/vivado/memory.html>`_
+  for more information. For targeting Zynq and Zynq UltraScale+ parts, at least 8 GB is recommended. Larger parts may require up to 16 GB.
+  For targeting Alveo parts with Vitis, at least 64 GB RAM is recommended.
+
+* **CPU.** FINN can parallelize HLS synthesis and several other operations for different
+  layers, so using a multi-core CPU is recommended. However, this should be balanced
+  against the memory usage as a high degree of parallelization will require more
+  memory. See the ``NUM_DEFAULT_WORKERS`` environment variable below for more on
+  how to control the degree of parallelization.
+
+* **Storage.** While going through the build steps, FINN will generate many files as part of
+  the process. For larger networks, you may need 10s of GB of space for the temporary
+  files generated during the build.
+  By default, these generated files will be placed under ``/tmp/finn_dev_<username>``.
+  You can override this location by using the ``FINN_HOST_BUILD_DIR`` environment
+  variable.
+  Mapping the generated file dir to a fast SSD will result in quicker builds.
diff --git a/docs/finn/index.rst b/docs/finn/index.rst
index 320cd88fe91af857c5a3948ef36a587ea305040f..751b105bb4ec35c880664e85a9550207e8a1f076 100644
--- a/docs/finn/index.rst
+++ b/docs/finn/index.rst
@@ -12,20 +12,20 @@ What is FINN?
 
 'FINN' is colloquially used to refer to two separate but highly related things:
 
-* The FINN **project**, which is an experimental framework from Xilinx Research Labs
-to explore deep neural network inference on FPGAs. It specifically targets
-quantized neural networks (QNNs), with emphasis on generating dataflow-style
-architectures customized for each network.
-The key components are illustrated in the figure above;
-including tools for training
-quantized neural networks (Brevitas), the FINN compiler, and the finn-hlslib
-Vivado HLS library of FPGA components for QNNs.
-Read more on the `FINN project homepage <https://xilinx.github.io/finn/>`_.
-
-* The FINN **compiler**, which this Read the Docs website is the documentation for.
-The compiler is a central part of the FINN project (above) that maps QNNs to
-dataflow-style FPGA architectures.
-You can find the FINN compiler in this `GitHub repository <https://github.com/Xilinx/finn>`_.
+*  The FINN **project**, which is an experimental framework from Xilinx Research Labs
+   to explore deep neural network inference on FPGAs. It specifically targets
+   quantized neural networks (QNNs), with emphasis on generating dataflow-style
+   architectures customized for each network.
+   The key components are illustrated in the figure above;
+   including tools for training
+   quantized neural networks (Brevitas), the FINN compiler, and the finn-hlslib
+   Vivado HLS library of FPGA components for QNNs.
+   Read more on the `FINN project homepage <https://xilinx.github.io/finn/>`_.
+
+*  The FINN **compiler**, which this Read the Docs website is the documentation for.
+   The compiler is a central part of the FINN project (above) that maps QNNs to
+   dataflow-style FPGA architectures.
+   You can find the FINN compiler in this `GitHub repository <https://github.com/Xilinx/finn>`_.
 
 
 More FINN Resources
diff --git a/finn-rtllib/memstream/hdl/mux.v b/finn-rtllib/memstream/hdl/mux.v
index c5b89aeb4e7eb7b2858d062c18b693d9bd685fb2..f7087f9735771a73aa532ae19baf18569e9de663 100644
--- a/finn-rtllib/memstream/hdl/mux.v
+++ b/finn-rtllib/memstream/hdl/mux.v
@@ -1,44 +1,44 @@
-/*
- Copyright (c) 2020, Xilinx
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright notice, this
-   list of conditions and the following disclaimer.
-
- * Redistributions in binary form must reproduce the above copyright notice,
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution.
-
- * Neither the name of FINN nor the names of its
-   contributors may be used to endorse or promote products derived from
-   this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
- FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-module mux
-#(
-    parameter NINPUTS = 1,
-	parameter WIDTH = 16
-)
-(
-	input [NINPUTS*WIDTH-1:0] in,
-	output [WIDTH-1:0] out,
-	input [$clog2(NINPUTS)-1:0] sel
-);
-
-assign out = in >> (sel*WIDTH);
-
-endmodule
\ No newline at end of file
+/*
+ Copyright (c) 2020, Xilinx
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+ * Neither the name of FINN nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+module mux
+#(
+    parameter NINPUTS = 1,
+	parameter WIDTH = 16
+)
+(
+	input [NINPUTS*WIDTH-1:0] in,
+	output [WIDTH-1:0] out,
+	input [$clog2(NINPUTS)-1:0] sel
+);
+
+assign out = in >> (sel*WIDTH);
+
+endmodule
diff --git a/finn-rtllib/memstream/sim/gen_memblocks.sh b/finn-rtllib/memstream/sim/gen_memblocks.sh
index 05962f7be8fe4afd7790640cf4d280600dcf43d1..b6e6b656ad1ea7846666108a1d4b79eae295490f 100644
--- a/finn-rtllib/memstream/sim/gen_memblocks.sh
+++ b/finn-rtllib/memstream/sim/gen_memblocks.sh
@@ -36,4 +36,4 @@ for (( i=0; i<$NBLOCKS; i++ ))
 do
     START=$(( 1 + $i * 1024 ))
     tail -n +$START $1 | head -n 1024 >> memblock_$i.dat
-done
\ No newline at end of file
+done
diff --git a/finn-rtllib/memstream/sim/tb_memstream.v b/finn-rtllib/memstream/sim/tb_memstream.v
index d63fa30046d7c5f2c50f509174b4937374e70c13..ad3efad5bd70c37a860ddb0ec5bff1c2e72c15f0 100644
--- a/finn-rtllib/memstream/sim/tb_memstream.v
+++ b/finn-rtllib/memstream/sim/tb_memstream.v
@@ -1,369 +1,369 @@
-/*
- Copyright (c) 2020, Xilinx
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright notice, this
-   list of conditions and the following disclaimer.
-
- * Redistributions in binary form must reproduce the above copyright notice,
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution.
-
- * Neither the name of FINN nor the names of its
-   contributors may be used to endorse or promote products derived from
-   this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
- FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-`timescale 1ns/10ps
-
-module tb_memstream;
-
-//parameters to enable/disable axi-mm, set number of streams, set readmemh for memory, set per-stream offsets in memory, set per-stream widths
-parameter CONFIG_EN = 1;
-parameter NSTREAMS = 4;//1 up to 6
-
-parameter MEM_DEPTH = 9216;
-parameter MEM_WIDTH = 32;
-parameter MEM_INIT = "./";
-parameter MEM_CHECK = "golden.dat";
-
-//widths per stream
-parameter STRM0_WIDTH = 32;
-parameter STRM1_WIDTH = 32;
-parameter STRM2_WIDTH = 32;
-parameter STRM3_WIDTH = 32;
-parameter STRM4_WIDTH = 1;
-parameter STRM5_WIDTH = 1;
-
-//depths per stream
-parameter STRM0_DEPTH = 2304;
-parameter STRM1_DEPTH = 2304;
-parameter STRM2_DEPTH = 2304;
-parameter STRM3_DEPTH = 2304;
-parameter STRM4_DEPTH = 1;
-parameter STRM5_DEPTH = 1;
-
-//offsets for each stream
-parameter STRM0_OFFSET = 0;
-parameter STRM1_OFFSET = 2304;
-parameter STRM2_OFFSET = 4608;
-parameter STRM3_OFFSET = 6912;
-parameter STRM4_OFFSET = 0;
-parameter STRM5_OFFSET = 0;
-
-
-reg clk;
-reg rst;
-
-reg [31:0] config_address = 0;
-reg config_ce = 0;
-reg config_we = 0;
-reg [31:0] config_d0 = 0;
-wire [31:0] config_q0;
-
-//multiple wire AXI Streams
-reg m_axis_0_afull;
-reg m_axis_0_tready;
-wire m_axis_0_tvalid;
-wire [STRM0_WIDTH-1:0] m_axis_0_tdata;
-
-reg m_axis_1_afull;
-reg m_axis_1_tready;
-wire m_axis_1_tvalid;
-wire [STRM1_WIDTH-1:0] m_axis_1_tdata;
-
-reg m_axis_2_afull;
-reg m_axis_2_tready;
-wire m_axis_2_tvalid;
-wire [STRM2_WIDTH-1:0] m_axis_2_tdata;
-
-reg m_axis_3_afull;
-reg m_axis_3_tready;
-wire m_axis_3_tvalid;
-wire [STRM3_WIDTH-1:0] m_axis_3_tdata;
-
-reg m_axis_4_afull;
-reg m_axis_4_tready;
-wire m_axis_4_tvalid;
-wire [STRM4_WIDTH-1:0] m_axis_4_tdata;
-
-reg m_axis_5_afull;
-reg m_axis_5_tready;
-wire m_axis_5_tvalid;
-wire [STRM5_WIDTH-1:0] m_axis_5_tdata;
-
-reg [MEM_WIDTH-1:0] golden[MEM_DEPTH-1:0];
-integer ptr0, ptr1, ptr2, ptr3, ptr4, ptr5;
-integer done = 0;
-reg [5:0] rng;
-
-//clock
-initial begin
-    clk = 0;
-    forever #5 clk = ~clk;
-end
-
-initial begin
-    rst = 1;
-	config_ce = 0;
-    m_axis_0_afull = 0;
-    m_axis_1_afull = 0;
-    m_axis_2_afull = 0;
-    m_axis_3_afull = 0;
-    m_axis_4_afull = 0;
-    m_axis_5_afull = 0;
-    m_axis_0_tready = 1;
-    m_axis_1_tready = 1;
-    m_axis_2_tready = 1;
-    m_axis_3_tready = 1;
-    m_axis_4_tready = 1;
-    m_axis_5_tready = 1;
-    repeat(100) @(negedge clk);
-    rst = 0;
-    #100
-    fork
-	    begin
-		    $display("Starting to generate random AFULL");
-			while(~done) begin
-			    rng = $random;
-				m_axis_0_afull = rng[0];
-				m_axis_1_afull = rng[1];
-				m_axis_2_afull = rng[2];
-				m_axis_3_afull = rng[3];
-				m_axis_4_afull = rng[4];
-				m_axis_5_afull = rng[5];
-				@(negedge clk);
-			end
-		end
-	join
-end
-
-
-//DUT
-memstream
-#(
-    CONFIG_EN,
-    NSTREAMS,
-    MEM_DEPTH,
-    MEM_WIDTH,
-    MEM_INIT,
-    
-    //widths per stream
-    STRM0_WIDTH,
-    STRM1_WIDTH,
-    STRM2_WIDTH,
-    STRM3_WIDTH,
-    STRM4_WIDTH,
-    STRM5_WIDTH,
-    
-    //depths per stream
-    STRM0_DEPTH,
-    STRM1_DEPTH,
-    STRM2_DEPTH,
-    STRM3_DEPTH,
-    STRM4_DEPTH,
-    STRM5_DEPTH,
-    
-    //offsets for each stream
-    STRM0_OFFSET,
-    STRM1_OFFSET,
-    STRM2_OFFSET,
-    STRM3_OFFSET,
-    STRM4_OFFSET,
-    STRM5_OFFSET
-)
-dut
-(
-    clk,
-    ~rst,
-
-    //optional AXI-Lite interface
-    config_address,
-    config_ce,
-    config_we,
-    config_d0,
-    config_q0,
-
-    //multiple output AXI Streams
-    m_axis_0_afull,
-    m_axis_0_tready,
-    m_axis_0_tvalid,
-    m_axis_0_tdata,
-    
-    m_axis_1_afull,
-    m_axis_1_tready,
-    m_axis_1_tvalid,
-    m_axis_1_tdata,
-    
-    m_axis_2_afull,
-    m_axis_2_tready,
-    m_axis_2_tvalid,
-    m_axis_2_tdata,
-    
-    m_axis_3_afull,
-    m_axis_3_tready,
-    m_axis_3_tvalid,
-    m_axis_3_tdata,
-    
-    m_axis_4_afull,
-    m_axis_4_tready,
-    m_axis_4_tvalid,
-    m_axis_4_tdata,
-    
-    m_axis_5_afull,
-    m_axis_5_tready,
-    m_axis_5_tvalid,
-    m_axis_5_tdata
-    
-
-);
-
-//stream checkers
-initial begin
-    ptr0 = STRM0_OFFSET;
-	ptr1 = STRM1_OFFSET;
-	ptr2 = STRM2_OFFSET;
-	ptr3 = STRM3_OFFSET;
-	ptr4 = STRM4_OFFSET;
-	ptr5 = STRM5_OFFSET;
-    fork
-		//check stream 0
-	    begin
-		    $display("Starting stream 0 checker");
-		    while(~done & (NSTREAMS > 0)) begin
-				@(negedge clk);
-				if(m_axis_0_tvalid) begin
-					if(m_axis_0_tdata != golden[ptr0]) begin
-						$display("Mismatch on stream 0");
-						$stop();
-					end
-					//increment pointer
-					ptr0 = ptr0 + 1;
-					//rewind pointer if it's reached end
-					if(ptr0 == (STRM0_OFFSET + STRM0_DEPTH))
-				    ptr0 = STRM0_OFFSET;
-				end
-			end
-		end
-		//check stream 1
-	    begin
-		    $display("Starting stream 1 checker");
-		    while(~done & (NSTREAMS > 1)) begin
-				@(negedge clk);
-				if(m_axis_1_tvalid) begin
-					if(m_axis_1_tdata != golden[ptr1]) begin
-						$display("Mismatch on stream 1");
-						$stop();
-					end
-					//increment pointer
-					ptr1 = ptr1 + 1;
-					//rewind pointer if it's reached end
-					if(ptr1 == (STRM1_OFFSET + STRM1_DEPTH))
-						ptr1 = STRM1_OFFSET;
-				end
-			end
-		end
-		
-		//check stream 2
-	    begin
-		    $display("Starting stream 2 checker");
-		    while(~done & (NSTREAMS > 2)) begin
-				@(negedge clk);
-				if(m_axis_2_tvalid) begin
-					if(m_axis_2_tdata != golden[ptr2]) begin
-						$display("Mismatch on stream 2");
-						$stop();
-					end
-					//increment pointer
-					ptr2 = ptr2 + 1;
-					//rewind pointer if it's reached end
-					if(ptr2 == (STRM2_OFFSET + STRM2_DEPTH))
-						ptr2 = STRM2_OFFSET;
-				end
-			end
-		end
-		//check stream 3
-	    begin
-		    $display("Starting stream 3 checker");
-		    while(~done & (NSTREAMS > 3)) begin
-				@(negedge clk);
-				if(m_axis_3_tvalid) begin
-					if(m_axis_3_tdata != golden[ptr3]) begin
-						$display("Mismatch on stream 3");
-						$stop();
-					end
-					//increment pointer
-					ptr3 = ptr3 + 1;
-					//rewind pointer if it's reached end
-					if(ptr3 == (STRM3_OFFSET + STRM3_DEPTH))
-						ptr3 = STRM3_OFFSET;
-				end
-			end
-		end
-		//check stream 4
-	    begin
-		    $display("Starting stream 4 checker");
-		    while(~done & (NSTREAMS > 4)) begin
-				@(negedge clk);
-				if(m_axis_4_tvalid) begin
-					if(m_axis_4_tdata != golden[ptr4]) begin
-						$display("Mismatch on stream 4");
-						$stop();
-					end
-					//increment pointer
-					ptr4 = ptr4 + 1;
-					//rewind pointer if it's reached end
-					if(ptr4 == (STRM4_OFFSET + STRM4_DEPTH))
-						ptr4 = STRM4_OFFSET;
-				end
-			end
-		end
-		//check stream 5
-	    begin
-		    $display("Starting stream 5 checker");
-		    while(~done & (NSTREAMS > 5)) begin
-				@(negedge clk);
-				if(m_axis_5_tvalid) begin
-					if(m_axis_5_tdata != golden[ptr5]) begin
-						$display("Mismatch on stream 5");
-						$stop();
-					end
-					//increment pointer
-					ptr5 = ptr5 + 1;
-					//rewind pointer if it's reached end
-					if(ptr5 == (STRM5_OFFSET + STRM5_DEPTH))
-						ptr5 = STRM5_OFFSET;
-				end
-			end
-		end
-	join
-end
-
-initial begin
-    done = 0;
-	$readmemh(MEM_CHECK,golden);
-//    $dumpfile("wave.vcd");
-//    $dumpvars(0,tb_memstream);
-    @(negedge rst);
-    #10000000
-	$display("Test done!");
-	done = 1;
-	#1000
-    $finish();
-end
-
-endmodule
+/*
+ Copyright (c) 2020, Xilinx
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+ * Neither the name of FINN nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+`timescale 1ns/10ps
+
+module tb_memstream;
+
+//parameters to enable/disable axi-mm, set number of streams, set readmemh for memory, set per-stream offsets in memory, set per-stream widths
+parameter CONFIG_EN = 1;
+parameter NSTREAMS = 4;//1 up to 6
+
+parameter MEM_DEPTH = 9216;
+parameter MEM_WIDTH = 32;
+parameter MEM_INIT = "./";
+parameter MEM_CHECK = "golden.dat";
+
+//widths per stream
+parameter STRM0_WIDTH = 32;
+parameter STRM1_WIDTH = 32;
+parameter STRM2_WIDTH = 32;
+parameter STRM3_WIDTH = 32;
+parameter STRM4_WIDTH = 1;
+parameter STRM5_WIDTH = 1;
+
+//depths per stream
+parameter STRM0_DEPTH = 2304;
+parameter STRM1_DEPTH = 2304;
+parameter STRM2_DEPTH = 2304;
+parameter STRM3_DEPTH = 2304;
+parameter STRM4_DEPTH = 1;
+parameter STRM5_DEPTH = 1;
+
+//offsets for each stream
+parameter STRM0_OFFSET = 0;
+parameter STRM1_OFFSET = 2304;
+parameter STRM2_OFFSET = 4608;
+parameter STRM3_OFFSET = 6912;
+parameter STRM4_OFFSET = 0;
+parameter STRM5_OFFSET = 0;
+
+
+reg clk;
+reg rst;
+
+reg [31:0] config_address = 0;
+reg config_ce = 0;
+reg config_we = 0;
+reg [31:0] config_d0 = 0;
+wire [31:0] config_q0;
+
+//multiple wire AXI Streams
+reg m_axis_0_afull;
+reg m_axis_0_tready;
+wire m_axis_0_tvalid;
+wire [STRM0_WIDTH-1:0] m_axis_0_tdata;
+
+reg m_axis_1_afull;
+reg m_axis_1_tready;
+wire m_axis_1_tvalid;
+wire [STRM1_WIDTH-1:0] m_axis_1_tdata;
+
+reg m_axis_2_afull;
+reg m_axis_2_tready;
+wire m_axis_2_tvalid;
+wire [STRM2_WIDTH-1:0] m_axis_2_tdata;
+
+reg m_axis_3_afull;
+reg m_axis_3_tready;
+wire m_axis_3_tvalid;
+wire [STRM3_WIDTH-1:0] m_axis_3_tdata;
+
+reg m_axis_4_afull;
+reg m_axis_4_tready;
+wire m_axis_4_tvalid;
+wire [STRM4_WIDTH-1:0] m_axis_4_tdata;
+
+reg m_axis_5_afull;
+reg m_axis_5_tready;
+wire m_axis_5_tvalid;
+wire [STRM5_WIDTH-1:0] m_axis_5_tdata;
+
+reg [MEM_WIDTH-1:0] golden[MEM_DEPTH-1:0];
+integer ptr0, ptr1, ptr2, ptr3, ptr4, ptr5;
+integer done = 0;
+reg [5:0] rng;
+
+//clock
+initial begin
+    clk = 0;
+    forever #5 clk = ~clk;
+end
+
+initial begin
+    rst = 1;
+	config_ce = 0;
+    m_axis_0_afull = 0;
+    m_axis_1_afull = 0;
+    m_axis_2_afull = 0;
+    m_axis_3_afull = 0;
+    m_axis_4_afull = 0;
+    m_axis_5_afull = 0;
+    m_axis_0_tready = 1;
+    m_axis_1_tready = 1;
+    m_axis_2_tready = 1;
+    m_axis_3_tready = 1;
+    m_axis_4_tready = 1;
+    m_axis_5_tready = 1;
+    repeat(100) @(negedge clk);
+    rst = 0;
+    #100
+    fork
+	    begin
+		    $display("Starting to generate random AFULL");
+			while(~done) begin
+			    rng = $random;
+				m_axis_0_afull = rng[0];
+				m_axis_1_afull = rng[1];
+				m_axis_2_afull = rng[2];
+				m_axis_3_afull = rng[3];
+				m_axis_4_afull = rng[4];
+				m_axis_5_afull = rng[5];
+				@(negedge clk);
+			end
+		end
+	join
+end
+
+
+//DUT
+memstream
+#(
+    CONFIG_EN,
+    NSTREAMS,
+    MEM_DEPTH,
+    MEM_WIDTH,
+    MEM_INIT,
+
+    //widths per stream
+    STRM0_WIDTH,
+    STRM1_WIDTH,
+    STRM2_WIDTH,
+    STRM3_WIDTH,
+    STRM4_WIDTH,
+    STRM5_WIDTH,
+
+    //depths per stream
+    STRM0_DEPTH,
+    STRM1_DEPTH,
+    STRM2_DEPTH,
+    STRM3_DEPTH,
+    STRM4_DEPTH,
+    STRM5_DEPTH,
+
+    //offsets for each stream
+    STRM0_OFFSET,
+    STRM1_OFFSET,
+    STRM2_OFFSET,
+    STRM3_OFFSET,
+    STRM4_OFFSET,
+    STRM5_OFFSET
+)
+dut
+(
+    clk,
+    ~rst,
+
+    //optional AXI-Lite interface
+    config_address,
+    config_ce,
+    config_we,
+    config_d0,
+    config_q0,
+
+    //multiple output AXI Streams
+    m_axis_0_afull,
+    m_axis_0_tready,
+    m_axis_0_tvalid,
+    m_axis_0_tdata,
+
+    m_axis_1_afull,
+    m_axis_1_tready,
+    m_axis_1_tvalid,
+    m_axis_1_tdata,
+
+    m_axis_2_afull,
+    m_axis_2_tready,
+    m_axis_2_tvalid,
+    m_axis_2_tdata,
+
+    m_axis_3_afull,
+    m_axis_3_tready,
+    m_axis_3_tvalid,
+    m_axis_3_tdata,
+
+    m_axis_4_afull,
+    m_axis_4_tready,
+    m_axis_4_tvalid,
+    m_axis_4_tdata,
+
+    m_axis_5_afull,
+    m_axis_5_tready,
+    m_axis_5_tvalid,
+    m_axis_5_tdata
+
+
+);
+
+//stream checkers
+initial begin
+    ptr0 = STRM0_OFFSET;
+	ptr1 = STRM1_OFFSET;
+	ptr2 = STRM2_OFFSET;
+	ptr3 = STRM3_OFFSET;
+	ptr4 = STRM4_OFFSET;
+	ptr5 = STRM5_OFFSET;
+    fork
+		//check stream 0
+	    begin
+		    $display("Starting stream 0 checker");
+		    while(~done & (NSTREAMS > 0)) begin
+				@(negedge clk);
+				if(m_axis_0_tvalid) begin
+					if(m_axis_0_tdata != golden[ptr0]) begin
+						$display("Mismatch on stream 0");
+						$stop();
+					end
+					//increment pointer
+					ptr0 = ptr0 + 1;
+					//rewind pointer if it's reached end
+					if(ptr0 == (STRM0_OFFSET + STRM0_DEPTH))
+				    ptr0 = STRM0_OFFSET;
+				end
+			end
+		end
+		//check stream 1
+	    begin
+		    $display("Starting stream 1 checker");
+		    while(~done & (NSTREAMS > 1)) begin
+				@(negedge clk);
+				if(m_axis_1_tvalid) begin
+					if(m_axis_1_tdata != golden[ptr1]) begin
+						$display("Mismatch on stream 1");
+						$stop();
+					end
+					//increment pointer
+					ptr1 = ptr1 + 1;
+					//rewind pointer if it's reached end
+					if(ptr1 == (STRM1_OFFSET + STRM1_DEPTH))
+						ptr1 = STRM1_OFFSET;
+				end
+			end
+		end
+
+		//check stream 2
+	    begin
+		    $display("Starting stream 2 checker");
+		    while(~done & (NSTREAMS > 2)) begin
+				@(negedge clk);
+				if(m_axis_2_tvalid) begin
+					if(m_axis_2_tdata != golden[ptr2]) begin
+						$display("Mismatch on stream 2");
+						$stop();
+					end
+					//increment pointer
+					ptr2 = ptr2 + 1;
+					//rewind pointer if it's reached end
+					if(ptr2 == (STRM2_OFFSET + STRM2_DEPTH))
+						ptr2 = STRM2_OFFSET;
+				end
+			end
+		end
+		//check stream 3
+	    begin
+		    $display("Starting stream 3 checker");
+		    while(~done & (NSTREAMS > 3)) begin
+				@(negedge clk);
+				if(m_axis_3_tvalid) begin
+					if(m_axis_3_tdata != golden[ptr3]) begin
+						$display("Mismatch on stream 3");
+						$stop();
+					end
+					//increment pointer
+					ptr3 = ptr3 + 1;
+					//rewind pointer if it's reached end
+					if(ptr3 == (STRM3_OFFSET + STRM3_DEPTH))
+						ptr3 = STRM3_OFFSET;
+				end
+			end
+		end
+		//check stream 4
+	    begin
+		    $display("Starting stream 4 checker");
+		    while(~done & (NSTREAMS > 4)) begin
+				@(negedge clk);
+				if(m_axis_4_tvalid) begin
+					if(m_axis_4_tdata != golden[ptr4]) begin
+						$display("Mismatch on stream 4");
+						$stop();
+					end
+					//increment pointer
+					ptr4 = ptr4 + 1;
+					//rewind pointer if it's reached end
+					if(ptr4 == (STRM4_OFFSET + STRM4_DEPTH))
+						ptr4 = STRM4_OFFSET;
+				end
+			end
+		end
+		//check stream 5
+	    begin
+		    $display("Starting stream 5 checker");
+		    while(~done & (NSTREAMS > 5)) begin
+				@(negedge clk);
+				if(m_axis_5_tvalid) begin
+					if(m_axis_5_tdata != golden[ptr5]) begin
+						$display("Mismatch on stream 5");
+						$stop();
+					end
+					//increment pointer
+					ptr5 = ptr5 + 1;
+					//rewind pointer if it's reached end
+					if(ptr5 == (STRM5_OFFSET + STRM5_DEPTH))
+						ptr5 = STRM5_OFFSET;
+				end
+			end
+		end
+	join
+end
+
+initial begin
+    done = 0;
+	$readmemh(MEM_CHECK,golden);
+//    $dumpfile("wave.vcd");
+//    $dumpvars(0,tb_memstream);
+    @(negedge rst);
+    #10000000
+	$display("Test done!");
+	done = 1;
+	#1000
+    $finish();
+end
+
+endmodule
diff --git a/finn-rtllib/memstream/sim/tb_memstream_writes.v b/finn-rtllib/memstream/sim/tb_memstream_writes.v
index a6ac747e967e594ac010f25a2827ebf7a7fcaa0f..c66807454b9a7f8ff7ab7008a504938740fb03a0 100644
--- a/finn-rtllib/memstream/sim/tb_memstream_writes.v
+++ b/finn-rtllib/memstream/sim/tb_memstream_writes.v
@@ -1,486 +1,486 @@
-/*
- Copyright (c) 2020, Xilinx
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright notice, this
-   list of conditions and the following disclaimer.
-
- * Redistributions in binary form must reproduce the above copyright notice,
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution.
-
- * Neither the name of FINN nor the names of its
-   contributors may be used to endorse or promote products derived from
-   this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
- FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-`timescale 1ns/10ps
-
-module tb_memstream_writes;
-
-//parameters to enable/disable axi-mm, set number of streams, set readmemh for memory, set per-stream offsets in memory, set per-stream widths
-parameter CONFIG_EN = 1;
-parameter NSTREAMS = 2;//1 up to 6
-
-parameter MEM_DEPTH = 40;
-parameter MEM_WIDTH = 70;
-
-//widths per stream
-parameter STRM0_WIDTH = 70;
-parameter STRM1_WIDTH = 32;
-parameter STRM2_WIDTH = 32;
-parameter STRM3_WIDTH = 32;
-parameter STRM4_WIDTH = 1;
-parameter STRM5_WIDTH = 1;
-
-//depths per stream
-parameter STRM0_DEPTH = 20;
-parameter STRM1_DEPTH = 20;
-parameter STRM2_DEPTH = 2304;
-parameter STRM3_DEPTH = 2304;
-parameter STRM4_DEPTH = 1;
-parameter STRM5_DEPTH = 1;
-
-//offsets for each stream
-parameter STRM0_OFFSET = 0;
-parameter STRM1_OFFSET = 20;
-parameter STRM2_OFFSET = 4608;
-parameter STRM3_OFFSET = 6912;
-parameter STRM4_OFFSET = 0;
-parameter STRM5_OFFSET = 0;
-
-
-reg clk;
-reg rst;
-
-wire        awready;
-reg         awvalid;
-reg [31:0]  awaddr;
-reg [2:0]   awprot;
-//write data
-wire        wready;
-reg         wvalid;
-reg [31:0]  wdata;
-reg [3:0]   wstrb;
-//burst response
-reg         bready;
-wire        bvalid;
-wire [1:0]  bresp;
-
-//Read channels
-//read address
-wire        arready;
-reg         arvalid;
-reg [31:0]  araddr;
-reg [2:0]   arprot;
-//read data
-reg         rready;
-wire        rvalid;
-wire [1:0]  rresp;
-wire [31:0] rdata;
-
-//multiple wire AXI Streams
-reg m_axis_0_afull;
-reg m_axis_0_tready;
-wire m_axis_0_tvalid;
-wire [STRM0_WIDTH-1:0] m_axis_0_tdata;
-
-reg m_axis_1_afull;
-reg m_axis_1_tready;
-wire m_axis_1_tvalid;
-wire [STRM1_WIDTH-1:0] m_axis_1_tdata;
-
-reg m_axis_2_afull;
-reg m_axis_2_tready;
-wire m_axis_2_tvalid;
-wire [STRM2_WIDTH-1:0] m_axis_2_tdata;
-
-reg m_axis_3_afull;
-reg m_axis_3_tready;
-wire m_axis_3_tvalid;
-wire [STRM3_WIDTH-1:0] m_axis_3_tdata;
-
-reg m_axis_4_afull;
-reg m_axis_4_tready;
-wire m_axis_4_tvalid;
-wire [STRM4_WIDTH-1:0] m_axis_4_tdata;
-
-reg m_axis_5_afull;
-reg m_axis_5_tready;
-wire m_axis_5_tvalid;
-wire [STRM5_WIDTH-1:0] m_axis_5_tdata;
-
-reg [MEM_WIDTH-1:0] golden[MEM_DEPTH-1:0];
-reg [MEM_WIDTH-1:0] gword;
-integer ptr0, ptr1, ptr2, ptr3, ptr4, ptr5;
-integer done = 0;
-integer i, j;
-reg [5:0] rng;
-
-parameter NFOLDS_PER_WORD = (MEM_WIDTH+31)/32;
-
-task axi_write;
-    input [MEM_WIDTH-1:0] data;
-    input [31:0] adr;
-    begin
-        for(j=0; j<(1<<$clog2(NFOLDS_PER_WORD)); j=j+1) begin
-            @(negedge clk);
-            awvalid = 1;
-            wvalid = 1;
-            wdata = data>>(j*32);
-            awaddr = (adr*(1<<$clog2(NFOLDS_PER_WORD))+j)*4;
-            fork
-                begin
-                    @(posedge awready);
-                    @(posedge clk) awvalid = 0;
-                end
-                begin
-                    @(posedge wready);
-                    @(posedge clk) wvalid = 0;
-                end
-            join
-            @(posedge clk);
-        end
-    end
-endtask
-
-task axi_read;
-    input [31:0] adr;
-    output [MEM_WIDTH-1:0] data;
-    begin
-        data = 0;
-        for(j=0; j<NFOLDS_PER_WORD; j=j+1) begin
-            @(negedge clk);
-            arvalid = 1;
-            araddr = (adr*(1<<$clog2(NFOLDS_PER_WORD))+j)*4;
-            rready = 1;
-            fork
-                begin
-                    @(posedge arready);
-                    @(posedge clk) arvalid = 0;
-                end
-                begin
-                    @(posedge rvalid);
-                    @(posedge clk) rready = 0;
-                    data = data | (rdata<<(32*j));
-                end
-            join
-            @(posedge clk);
-        end
-    end
-endtask
-
-//clock
-initial begin
-    clk = 0;
-    forever #5 clk = ~clk;
-end
-
-initial begin
-    rst = 1;
-    awvalid = 0;
-    arvalid = 0;
-    wvalid = 0;
-    rready = 1;
-    bready = 1;
-    m_axis_0_afull = 1;
-    m_axis_1_afull = 1;
-    m_axis_2_afull = 1;
-    m_axis_3_afull = 1;
-    m_axis_4_afull = 1;
-    m_axis_5_afull = 1;
-    m_axis_0_tready = 0;
-    m_axis_1_tready = 0;
-    m_axis_2_tready = 0;
-    m_axis_3_tready = 0;
-    m_axis_4_tready = 0;
-    m_axis_5_tready = 0;
-    repeat(100) @(negedge clk);
-    rst = 0;
-    #100
-    //random initialization of golden data
-    for(i=0; i<MEM_DEPTH; i=i+1) begin
-        gword = 0;
-        repeat(NFOLDS_PER_WORD)
-            gword = (gword << 32) | $random;
-        golden[i] = gword;
-        axi_write(golden[i],i);
-        axi_read(i,gword);
-    end
-    //re-reset
-    repeat(100) @(negedge clk);
-    rst = 1;
-    #100
-    repeat(100) @(negedge clk);
-    rst = 0;
-    #100
-    @(negedge clk);
-    //start reads
-    m_axis_0_afull = 0;
-    m_axis_1_afull = 0;
-    m_axis_2_afull = 0;
-    m_axis_3_afull = 0;
-    m_axis_4_afull = 0;
-    m_axis_5_afull = 0;
-    m_axis_0_tready = 1;
-    m_axis_1_tready = 1;
-    m_axis_2_tready = 1;
-    m_axis_3_tready = 1;
-    m_axis_4_tready = 1;
-    m_axis_5_tready = 1;
-    fork
-	    begin
-		    $display("Starting to generate random AFULL");
-			while(~done) begin
-			    rng = $random;
-				m_axis_0_afull = rng[0];
-				m_axis_1_afull = rng[1];
-				m_axis_2_afull = rng[2];
-				m_axis_3_afull = rng[3];
-				m_axis_4_afull = rng[4];
-				m_axis_5_afull = rng[5];
-				@(negedge clk);
-			end
-		end
-	join
-end
-
-
-//DUT
-memstream
-#(
-    CONFIG_EN,
-    NSTREAMS,
-    MEM_DEPTH,
-    MEM_WIDTH,
-    ".",
-    "auto",
-    //widths per stream
-    STRM0_WIDTH,
-    STRM1_WIDTH,
-    STRM2_WIDTH,
-    STRM3_WIDTH,
-    STRM4_WIDTH,
-    STRM5_WIDTH,
-    //depths per stream
-    STRM0_DEPTH,
-    STRM1_DEPTH,
-    STRM2_DEPTH,
-    STRM3_DEPTH,
-    STRM4_DEPTH,
-    STRM5_DEPTH,
-    //offsets for each stream
-    STRM0_OFFSET,
-    STRM1_OFFSET,
-    STRM2_OFFSET,
-    STRM3_OFFSET,
-    STRM4_OFFSET,
-    STRM5_OFFSET
-)
-dut
-(
-    clk,
-    ~rst,
-
-    //optional AXI-Lite interface
-    awready,
-    awvalid,
-    awaddr,
-    awprot,
-    //write data
-    wready,
-    wvalid,
-    wdata,
-    wstrb,
-    //burst response
-    bready,
-    bvalid,
-    bresp,
-
-    //Read channels
-    //read address
-    arready,
-    arvalid,
-    araddr,
-    arprot,
-    //read data
-    rready,
-    rvalid,
-    rresp,
-    rdata,
-
-    //multiple output AXI Streams
-    m_axis_0_afull,
-    m_axis_0_tready,
-    m_axis_0_tvalid,
-    m_axis_0_tdata,
-    m_axis_1_afull,
-    m_axis_1_tready,
-    m_axis_1_tvalid,
-    m_axis_1_tdata,
-    m_axis_2_afull,
-    m_axis_2_tready,
-    m_axis_2_tvalid,
-    m_axis_2_tdata,
-    m_axis_3_afull,
-    m_axis_3_tready,
-    m_axis_3_tvalid,
-    m_axis_3_tdata,
-    m_axis_4_afull,
-    m_axis_4_tready,
-    m_axis_4_tvalid,
-    m_axis_4_tdata,
-    m_axis_5_afull,
-    m_axis_5_tready,
-    m_axis_5_tvalid,
-    m_axis_5_tdata
-
-);
-
-//stream checkers
-initial begin
-    ptr0 = STRM0_OFFSET;
-	ptr1 = STRM1_OFFSET;
-	ptr2 = STRM2_OFFSET;
-	ptr3 = STRM3_OFFSET;
-	ptr4 = STRM4_OFFSET;
-	ptr5 = STRM5_OFFSET;
-    fork
-		//check stream 0
-	    begin
-		    $display("Starting stream 0 checker");
-		    while(~done & (NSTREAMS > 0)) begin
-				@(negedge clk);
-				if(m_axis_0_tvalid & m_axis_0_tready) begin
-					if(m_axis_0_tdata != golden[ptr0]) begin
-						$display("Mismatch on stream 0");
-						$stop();
-					end
-					//increment pointer
-					ptr0 = ptr0 + 1;
-					//rewind pointer if it's reached end
-					if(ptr0 == (STRM0_OFFSET + STRM0_DEPTH))
-				        ptr0 = STRM0_OFFSET;
-				end
-			end
-		end
-		//check stream 1
-	    begin
-		    $display("Starting stream 1 checker");
-		    while(~done & (NSTREAMS > 1)) begin
-				@(negedge clk);
-				if(m_axis_1_tvalid & m_axis_1_tready) begin
-					if(m_axis_1_tdata != golden[ptr1]) begin
-						$display("Mismatch on stream 1");
-						$stop();
-					end
-					//increment pointer
-					ptr1 = ptr1 + 1;
-					//rewind pointer if it's reached end
-					if(ptr1 == (STRM1_OFFSET + STRM1_DEPTH))
-						ptr1 = STRM1_OFFSET;
-				end
-			end
-		end
-		//check stream 2
-	    begin
-		    $display("Starting stream 2 checker");
-		    while(~done & (NSTREAMS > 2)) begin
-				@(negedge clk);
-				if(m_axis_2_tvalid & m_axis_2_tready) begin
-					if(m_axis_2_tdata != golden[ptr2]) begin
-						$display("Mismatch on stream 2");
-						$stop();
-					end
-					//increment pointer
-					ptr2 = ptr2 + 1;
-					//rewind pointer if it's reached end
-					if(ptr2 == (STRM2_OFFSET + STRM2_DEPTH))
-						ptr2 = STRM2_OFFSET;
-				end
-			end
-		end
-		//check stream 3
-	    begin
-		    $display("Starting stream 3 checker");
-		    while(~done & (NSTREAMS > 3)) begin
-				@(negedge clk);
-				if(m_axis_3_tvalid & m_axis_3_tready) begin
-					if(m_axis_3_tdata != golden[ptr3]) begin
-						$display("Mismatch on stream 3");
-						$stop();
-					end
-					//increment pointer
-					ptr3 = ptr3 + 1;
-					//rewind pointer if it's reached end
-					if(ptr3 == (STRM3_OFFSET + STRM3_DEPTH))
-						ptr3 = STRM3_OFFSET;
-				end
-			end
-		end
-		//check stream 4
-	    begin
-		    $display("Starting stream 4 checker");
-		    while(~done & (NSTREAMS > 4)) begin
-				@(negedge clk);
-				if(m_axis_4_tvalid & m_axis_4_tready) begin
-					if(m_axis_4_tdata != golden[ptr4]) begin
-						$display("Mismatch on stream 4");
-						$stop();
-					end
-					//increment pointer
-					ptr4 = ptr4 + 1;
-					//rewind pointer if it's reached end
-					if(ptr4 == (STRM4_OFFSET + STRM4_DEPTH))
-						ptr4 = STRM4_OFFSET;
-				end
-			end
-		end
-		//check stream 5
-	    begin
-		    $display("Starting stream 5 checker");
-		    while(~done & (NSTREAMS > 5)) begin
-				@(negedge clk);
-				if(m_axis_5_tvalid & m_axis_5_tready) begin
-					if(m_axis_5_tdata != golden[ptr5]) begin
-						$display("Mismatch on stream 5");
-						$stop();
-					end
-					//increment pointer
-					ptr5 = ptr5 + 1;
-					//rewind pointer if it's reached end
-					if(ptr5 == (STRM5_OFFSET + STRM5_DEPTH))
-						ptr5 = STRM5_OFFSET;
-				end
-			end
-		end
-	join
-end
-
-initial begin
-    done = 0;
-    @(negedge rst);
-    $dumpfile("wave.vcd");
-    $dumpvars(0,tb_memstream_writes);
-    #50000
-	$display("Test done!");
-	done = 1;
-	#1000
-    $finish();
-end
-
-endmodule
+/*
+ Copyright (c) 2020, Xilinx
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+ * Neither the name of FINN nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+`timescale 1ns/10ps
+
+module tb_memstream_writes;
+
+//parameters to enable/disable axi-mm, set number of streams, set readmemh for memory, set per-stream offsets in memory, set per-stream widths
+parameter CONFIG_EN = 1;
+parameter NSTREAMS = 2;//1 up to 6
+
+parameter MEM_DEPTH = 40;
+parameter MEM_WIDTH = 70;
+
+//widths per stream
+parameter STRM0_WIDTH = 70;
+parameter STRM1_WIDTH = 32;
+parameter STRM2_WIDTH = 32;
+parameter STRM3_WIDTH = 32;
+parameter STRM4_WIDTH = 1;
+parameter STRM5_WIDTH = 1;
+
+//depths per stream
+parameter STRM0_DEPTH = 20;
+parameter STRM1_DEPTH = 20;
+parameter STRM2_DEPTH = 2304;
+parameter STRM3_DEPTH = 2304;
+parameter STRM4_DEPTH = 1;
+parameter STRM5_DEPTH = 1;
+
+//offsets for each stream
+parameter STRM0_OFFSET = 0;
+parameter STRM1_OFFSET = 20;
+parameter STRM2_OFFSET = 4608;
+parameter STRM3_OFFSET = 6912;
+parameter STRM4_OFFSET = 0;
+parameter STRM5_OFFSET = 0;
+
+
+reg clk;
+reg rst;
+
+wire        awready;
+reg         awvalid;
+reg [31:0]  awaddr;
+reg [2:0]   awprot;
+//write data
+wire        wready;
+reg         wvalid;
+reg [31:0]  wdata;
+reg [3:0]   wstrb;
+//burst response
+reg         bready;
+wire        bvalid;
+wire [1:0]  bresp;
+
+//Read channels
+//read address
+wire        arready;
+reg         arvalid;
+reg [31:0]  araddr;
+reg [2:0]   arprot;
+//read data
+reg         rready;
+wire        rvalid;
+wire [1:0]  rresp;
+wire [31:0] rdata;
+
+//multiple wire AXI Streams
+reg m_axis_0_afull;
+reg m_axis_0_tready;
+wire m_axis_0_tvalid;
+wire [STRM0_WIDTH-1:0] m_axis_0_tdata;
+
+reg m_axis_1_afull;
+reg m_axis_1_tready;
+wire m_axis_1_tvalid;
+wire [STRM1_WIDTH-1:0] m_axis_1_tdata;
+
+reg m_axis_2_afull;
+reg m_axis_2_tready;
+wire m_axis_2_tvalid;
+wire [STRM2_WIDTH-1:0] m_axis_2_tdata;
+
+reg m_axis_3_afull;
+reg m_axis_3_tready;
+wire m_axis_3_tvalid;
+wire [STRM3_WIDTH-1:0] m_axis_3_tdata;
+
+reg m_axis_4_afull;
+reg m_axis_4_tready;
+wire m_axis_4_tvalid;
+wire [STRM4_WIDTH-1:0] m_axis_4_tdata;
+
+reg m_axis_5_afull;
+reg m_axis_5_tready;
+wire m_axis_5_tvalid;
+wire [STRM5_WIDTH-1:0] m_axis_5_tdata;
+
+reg [MEM_WIDTH-1:0] golden[MEM_DEPTH-1:0];
+reg [MEM_WIDTH-1:0] gword;
+integer ptr0, ptr1, ptr2, ptr3, ptr4, ptr5;
+integer done = 0;
+integer i, j;
+reg [5:0] rng;
+
+parameter NFOLDS_PER_WORD = (MEM_WIDTH+31)/32;
+
+task axi_write;
+    input [MEM_WIDTH-1:0] data;
+    input [31:0] adr;
+    begin
+        for(j=0; j<(1<<$clog2(NFOLDS_PER_WORD)); j=j+1) begin
+            @(negedge clk);
+            awvalid = 1;
+            wvalid = 1;
+            wdata = data>>(j*32);
+            awaddr = (adr*(1<<$clog2(NFOLDS_PER_WORD))+j)*4;
+            fork
+                begin
+                    @(posedge awready);
+                    @(posedge clk) awvalid = 0;
+                end
+                begin
+                    @(posedge wready);
+                    @(posedge clk) wvalid = 0;
+                end
+            join
+            @(posedge clk);
+        end
+    end
+endtask
+
+task axi_read;
+    input [31:0] adr;
+    output [MEM_WIDTH-1:0] data;
+    begin
+        data = 0;
+        for(j=0; j<NFOLDS_PER_WORD; j=j+1) begin
+            @(negedge clk);
+            arvalid = 1;
+            araddr = (adr*(1<<$clog2(NFOLDS_PER_WORD))+j)*4;
+            rready = 1;
+            fork
+                begin
+                    @(posedge arready);
+                    @(posedge clk) arvalid = 0;
+                end
+                begin
+                    @(posedge rvalid);
+                    @(posedge clk) rready = 0;
+                    data = data | (rdata<<(32*j));
+                end
+            join
+            @(posedge clk);
+        end
+    end
+endtask
+
+//clock
+initial begin
+    clk = 0;
+    forever #5 clk = ~clk;
+end
+
+initial begin
+    rst = 1;
+    awvalid = 0;
+    arvalid = 0;
+    wvalid = 0;
+    rready = 1;
+    bready = 1;
+    m_axis_0_afull = 1;
+    m_axis_1_afull = 1;
+    m_axis_2_afull = 1;
+    m_axis_3_afull = 1;
+    m_axis_4_afull = 1;
+    m_axis_5_afull = 1;
+    m_axis_0_tready = 0;
+    m_axis_1_tready = 0;
+    m_axis_2_tready = 0;
+    m_axis_3_tready = 0;
+    m_axis_4_tready = 0;
+    m_axis_5_tready = 0;
+    repeat(100) @(negedge clk);
+    rst = 0;
+    #100
+    //random initialization of golden data
+    for(i=0; i<MEM_DEPTH; i=i+1) begin
+        gword = 0;
+        repeat(NFOLDS_PER_WORD)
+            gword = (gword << 32) | $random;
+        golden[i] = gword;
+        axi_write(golden[i],i);
+        axi_read(i,gword);
+    end
+    //re-reset
+    repeat(100) @(negedge clk);
+    rst = 1;
+    #100
+    repeat(100) @(negedge clk);
+    rst = 0;
+    #100
+    @(negedge clk);
+    //start reads
+    m_axis_0_afull = 0;
+    m_axis_1_afull = 0;
+    m_axis_2_afull = 0;
+    m_axis_3_afull = 0;
+    m_axis_4_afull = 0;
+    m_axis_5_afull = 0;
+    m_axis_0_tready = 1;
+    m_axis_1_tready = 1;
+    m_axis_2_tready = 1;
+    m_axis_3_tready = 1;
+    m_axis_4_tready = 1;
+    m_axis_5_tready = 1;
+    fork
+	    begin
+		    $display("Starting to generate random AFULL");
+			while(~done) begin
+			    rng = $random;
+				m_axis_0_afull = rng[0];
+				m_axis_1_afull = rng[1];
+				m_axis_2_afull = rng[2];
+				m_axis_3_afull = rng[3];
+				m_axis_4_afull = rng[4];
+				m_axis_5_afull = rng[5];
+				@(negedge clk);
+			end
+		end
+	join
+end
+
+
+//DUT
+memstream
+#(
+    CONFIG_EN,
+    NSTREAMS,
+    MEM_DEPTH,
+    MEM_WIDTH,
+    ".",
+    "auto",
+    //widths per stream
+    STRM0_WIDTH,
+    STRM1_WIDTH,
+    STRM2_WIDTH,
+    STRM3_WIDTH,
+    STRM4_WIDTH,
+    STRM5_WIDTH,
+    //depths per stream
+    STRM0_DEPTH,
+    STRM1_DEPTH,
+    STRM2_DEPTH,
+    STRM3_DEPTH,
+    STRM4_DEPTH,
+    STRM5_DEPTH,
+    //offsets for each stream
+    STRM0_OFFSET,
+    STRM1_OFFSET,
+    STRM2_OFFSET,
+    STRM3_OFFSET,
+    STRM4_OFFSET,
+    STRM5_OFFSET
+)
+dut
+(
+    clk,
+    ~rst,
+
+    //optional AXI-Lite interface
+    awready,
+    awvalid,
+    awaddr,
+    awprot,
+    //write data
+    wready,
+    wvalid,
+    wdata,
+    wstrb,
+    //burst response
+    bready,
+    bvalid,
+    bresp,
+
+    //Read channels
+    //read address
+    arready,
+    arvalid,
+    araddr,
+    arprot,
+    //read data
+    rready,
+    rvalid,
+    rresp,
+    rdata,
+
+    //multiple output AXI Streams
+    m_axis_0_afull,
+    m_axis_0_tready,
+    m_axis_0_tvalid,
+    m_axis_0_tdata,
+    m_axis_1_afull,
+    m_axis_1_tready,
+    m_axis_1_tvalid,
+    m_axis_1_tdata,
+    m_axis_2_afull,
+    m_axis_2_tready,
+    m_axis_2_tvalid,
+    m_axis_2_tdata,
+    m_axis_3_afull,
+    m_axis_3_tready,
+    m_axis_3_tvalid,
+    m_axis_3_tdata,
+    m_axis_4_afull,
+    m_axis_4_tready,
+    m_axis_4_tvalid,
+    m_axis_4_tdata,
+    m_axis_5_afull,
+    m_axis_5_tready,
+    m_axis_5_tvalid,
+    m_axis_5_tdata
+
+);
+
+//stream checkers
+initial begin
+    ptr0 = STRM0_OFFSET;
+	ptr1 = STRM1_OFFSET;
+	ptr2 = STRM2_OFFSET;
+	ptr3 = STRM3_OFFSET;
+	ptr4 = STRM4_OFFSET;
+	ptr5 = STRM5_OFFSET;
+    fork
+		//check stream 0
+	    begin
+		    $display("Starting stream 0 checker");
+		    while(~done & (NSTREAMS > 0)) begin
+				@(negedge clk);
+				if(m_axis_0_tvalid & m_axis_0_tready) begin
+					if(m_axis_0_tdata != golden[ptr0]) begin
+						$display("Mismatch on stream 0");
+						$stop();
+					end
+					//increment pointer
+					ptr0 = ptr0 + 1;
+					//rewind pointer if it's reached end
+					if(ptr0 == (STRM0_OFFSET + STRM0_DEPTH))
+				        ptr0 = STRM0_OFFSET;
+				end
+			end
+		end
+		//check stream 1
+	    begin
+		    $display("Starting stream 1 checker");
+		    while(~done & (NSTREAMS > 1)) begin
+				@(negedge clk);
+				if(m_axis_1_tvalid & m_axis_1_tready) begin
+					if(m_axis_1_tdata != golden[ptr1]) begin
+						$display("Mismatch on stream 1");
+						$stop();
+					end
+					//increment pointer
+					ptr1 = ptr1 + 1;
+					//rewind pointer if it's reached end
+					if(ptr1 == (STRM1_OFFSET + STRM1_DEPTH))
+						ptr1 = STRM1_OFFSET;
+				end
+			end
+		end
+		//check stream 2
+	    begin
+		    $display("Starting stream 2 checker");
+		    while(~done & (NSTREAMS > 2)) begin
+				@(negedge clk);
+				if(m_axis_2_tvalid & m_axis_2_tready) begin
+					if(m_axis_2_tdata != golden[ptr2]) begin
+						$display("Mismatch on stream 2");
+						$stop();
+					end
+					//increment pointer
+					ptr2 = ptr2 + 1;
+					//rewind pointer if it's reached end
+					if(ptr2 == (STRM2_OFFSET + STRM2_DEPTH))
+						ptr2 = STRM2_OFFSET;
+				end
+			end
+		end
+		//check stream 3
+	    begin
+		    $display("Starting stream 3 checker");
+		    while(~done & (NSTREAMS > 3)) begin
+				@(negedge clk);
+				if(m_axis_3_tvalid & m_axis_3_tready) begin
+					if(m_axis_3_tdata != golden[ptr3]) begin
+						$display("Mismatch on stream 3");
+						$stop();
+					end
+					//increment pointer
+					ptr3 = ptr3 + 1;
+					//rewind pointer if it's reached end
+					if(ptr3 == (STRM3_OFFSET + STRM3_DEPTH))
+						ptr3 = STRM3_OFFSET;
+				end
+			end
+		end
+		//check stream 4
+	    begin
+		    $display("Starting stream 4 checker");
+		    while(~done & (NSTREAMS > 4)) begin
+				@(negedge clk);
+				if(m_axis_4_tvalid & m_axis_4_tready) begin
+					if(m_axis_4_tdata != golden[ptr4]) begin
+						$display("Mismatch on stream 4");
+						$stop();
+					end
+					//increment pointer
+					ptr4 = ptr4 + 1;
+					//rewind pointer if it's reached end
+					if(ptr4 == (STRM4_OFFSET + STRM4_DEPTH))
+						ptr4 = STRM4_OFFSET;
+				end
+			end
+		end
+		//check stream 5
+	    begin
+		    $display("Starting stream 5 checker");
+		    while(~done & (NSTREAMS > 5)) begin
+				@(negedge clk);
+				if(m_axis_5_tvalid & m_axis_5_tready) begin
+					if(m_axis_5_tdata != golden[ptr5]) begin
+						$display("Mismatch on stream 5");
+						$stop();
+					end
+					//increment pointer
+					ptr5 = ptr5 + 1;
+					//rewind pointer if it's reached end
+					if(ptr5 == (STRM5_OFFSET + STRM5_DEPTH))
+						ptr5 = STRM5_OFFSET;
+				end
+			end
+		end
+	join
+end
+
+initial begin
+    done = 0;
+    @(negedge rst);
+    $dumpfile("wave.vcd");
+    $dumpvars(0,tb_memstream_writes);
+    #50000
+	$display("Test done!");
+	done = 1;
+	#1000
+    $finish();
+end
+
+endmodule
diff --git a/notebooks/end2end_example/bnn-pynq/cnv_end2end_example.ipynb b/notebooks/end2end_example/bnn-pynq/cnv_end2end_example.ipynb
index 4130f35d7a371711fe1f6bf494358e3c93d8c136..b49c5f3c3eb68961f08041a2c51a46bf66452c81 100644
--- a/notebooks/end2end_example/bnn-pynq/cnv_end2end_example.ipynb
+++ b/notebooks/end2end_example/bnn-pynq/cnv_end2end_example.ipynb
@@ -71,14 +71,25 @@
    "source": [
     "## 1. Brevitas Export, FINN Import and Tidy-Up\n",
     "\n",
-    "Similar to what we did in the TFC-w1a1 end-to-end notebook, we will start by exporting the [pretrained CNV-w1a1 network](https://github.com/Xilinx/brevitas/tree/master/brevitas_examples/bnn_pynq) to ONNX, importing that into FINN and running the \"tidy-up\" transformations to have a first look at the topology."
+    "Similar to what we did in the TFC-w1a1 end-to-end notebook, we will start by exporting the [pretrained CNV-w1a1 network](https://github.com/Xilinx/brevitas/tree/master/src/brevitas_examples/bnn_pynq) to ONNX, importing that into FINN and running the \"tidy-up\" transformations to have a first look at the topology."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 2,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/workspace/brevitas/src/brevitas_examples/bnn_pynq/models/CNV.py:106: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.\n",
+      "  x = 2.0 * x - torch.tensor([1.0], device=x.device)\n",
+      "/workspace/brevitas/src/brevitas/quant_tensor/__init__.py:74: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.\n",
+      "  training = torch.tensor(training, dtype=torch.bool)\n"
+     ]
+    }
+   ],
    "source": [
     "import onnx\n",
     "from finn.util.test import get_test_model_trained\n",
@@ -108,7 +119,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
@@ -125,17 +136,17 @@
        "        <iframe\n",
        "            width=\"100%\"\n",
        "            height=\"400\"\n",
-       "            src=\"http://0.0.0.0:8081/\"\n",
+       "            src=\"http://localhost:8081/\"\n",
        "            frameborder=\"0\"\n",
        "            allowfullscreen\n",
        "        ></iframe>\n",
        "        "
       ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x7f25b19194a8>"
+       "<IPython.lib.display.IFrame at 0x7f912af76550>"
       ]
      },
-     "execution_count": 27,
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -157,19 +168,19 @@
    "source": [
     "### Adding Pre- and Postprocessing <a id='prepost'></a>\n",
     "\n",
-    "TODO"
+    "Preprocessing and postprocessing steps can be added directly in the ONNX graph. In this case, the preprocessing step divides the input `uint8` data by 255 so the inputs to the CNV-w1a1 network are bounded between [0, 1]. The postprocessing step takes the output of the network and returns the index (0-9) of the image category with the highest probability (top-1). "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/workspace/finn/src/finn/transformation/infer_data_layouts.py:113: UserWarning: Assuming 4D input is NCHW\n",
+      "/workspace/finn-base/src/finn/transformation/infer_data_layouts.py:114: UserWarning: Assuming 4D input is NCHW\n",
       "  warnings.warn(\"Assuming 4D input is NCHW\")\n"
      ]
     }
@@ -197,14 +208,35 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from finn.transformation.insert_topk import InsertTopK\n",
+    "from finn.transformation.infer_datatypes import InferDataTypes\n",
+    "\n",
+    "# postprocessing: insert Top-1 node at the end\n",
+    "model = model.transform(InsertTopK(k=1))\n",
+    "chkpt_name = build_dir+\"/end2end_cnv_w1a1_pre_post.onnx\"\n",
+    "# tidy-up again\n",
+    "model = model.transform(InferShapes())\n",
+    "model = model.transform(FoldConstants())\n",
+    "model = model.transform(GiveUniqueNodeNames())\n",
+    "model = model.transform(GiveReadableTensorNames())\n",
+    "model = model.transform(InferDataTypes())\n",
+    "model = model.transform(RemoveStaticGraphInputs())\n",
+    "model.save(chkpt_name)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\n",
       "Stopping http://0.0.0.0:8081\n",
       "Serving '/workspace/finn/end2end_cnv_w1a1_pre_post.onnx' at http://0.0.0.0:8081\n"
      ]
@@ -216,37 +248,22 @@
        "        <iframe\n",
        "            width=\"100%\"\n",
        "            height=\"400\"\n",
-       "            src=\"http://0.0.0.0:8081/\"\n",
+       "            src=\"http://localhost:8081/\"\n",
        "            frameborder=\"0\"\n",
        "            allowfullscreen\n",
        "        ></iframe>\n",
        "        "
       ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x7f25b1919518>"
+       "<IPython.lib.display.IFrame at 0x7f8ffd85a760>"
       ]
      },
-     "execution_count": 29,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "from finn.transformation.insert_topk import InsertTopK\n",
-    "from finn.transformation.infer_datatypes import InferDataTypes\n",
-    "\n",
-    "# postprocessing: insert Top-1 node at the end\n",
-    "model = model.transform(InsertTopK(k=1))\n",
-    "chkpt_name = build_dir+\"/end2end_cnv_w1a1_pre_post.onnx\"\n",
-    "# tidy-up again\n",
-    "model = model.transform(InferShapes())\n",
-    "model = model.transform(FoldConstants())\n",
-    "model = model.transform(GiveUniqueNodeNames())\n",
-    "model = model.transform(GiveReadableTensorNames())\n",
-    "model = model.transform(InferDataTypes())\n",
-    "model = model.transform(RemoveStaticGraphInputs())\n",
-    "model.save(chkpt_name)\n",
-    "\n",
     "showInNetron(build_dir+\"/end2end_cnv_w1a1_pre_post.onnx\")"
    ]
   },
@@ -268,7 +285,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -311,14 +328,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\n",
       "Stopping http://0.0.0.0:8081\n",
       "Serving '/workspace/finn/end2end_cnv_w1a1_streamlined.onnx' at http://0.0.0.0:8081\n"
      ]
@@ -330,17 +346,17 @@
        "        <iframe\n",
        "            width=\"100%\"\n",
        "            height=\"400\"\n",
-       "            src=\"http://0.0.0.0:8081/\"\n",
+       "            src=\"http://localhost:8081/\"\n",
        "            frameborder=\"0\"\n",
        "            allowfullscreen\n",
        "        ></iframe>\n",
        "        "
       ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x7f25b19a9470>"
+       "<IPython.lib.display.IFrame at 0x7f91ac6e6f70>"
       ]
      },
-     "execution_count": 31,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -360,9 +376,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 9,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/workspace/finn/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py:591: UserWarning: Clipping some thresholds in \n",
+      "  warnings.warn(\"Clipping some thresholds in %s\" % self.onnx_node.name)\n"
+     ]
+    }
+   ],
    "source": [
     "import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls\n",
     "from finn.transformation.fpgadataflow.create_dataflow_partition import (\n",
@@ -409,7 +434,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 10,
    "metadata": {
     "scrolled": false
    },
@@ -418,7 +443,6 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\n",
       "Stopping http://0.0.0.0:8081\n",
       "Serving '/workspace/finn/end2end_cnv_w1a1_dataflow_parent.onnx' at http://0.0.0.0:8081\n"
      ]
@@ -430,17 +454,17 @@
        "        <iframe\n",
        "            width=\"100%\"\n",
        "            height=\"400\"\n",
-       "            src=\"http://0.0.0.0:8081/\"\n",
+       "            src=\"http://localhost:8081/\"\n",
        "            frameborder=\"0\"\n",
        "            allowfullscreen\n",
        "        ></iframe>\n",
        "        "
       ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x7f25b18b7668>"
+       "<IPython.lib.display.IFrame at 0x7f8ffd85ae20>"
       ]
      },
-     "execution_count": 36,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -458,14 +482,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\n",
       "Stopping http://0.0.0.0:8081\n",
       "Serving '/workspace/finn/end2end_cnv_w1a1_dataflow_model.onnx' at http://0.0.0.0:8081\n"
      ]
@@ -477,17 +500,17 @@
        "        <iframe\n",
        "            width=\"100%\"\n",
        "            height=\"400\"\n",
-       "            src=\"http://0.0.0.0:8081/\"\n",
+       "            src=\"http://localhost:8081/\"\n",
        "            frameborder=\"0\"\n",
        "            allowfullscreen\n",
        "        ></iframe>\n",
        "        "
       ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x7f25b18fe860>"
+       "<IPython.lib.display.IFrame at 0x7f8ffd832280>"
       ]
      },
-     "execution_count": 33,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -505,7 +528,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -549,14 +572,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\n",
       "Stopping http://0.0.0.0:8081\n",
       "Serving '/workspace/finn/end2end_cnv_w1a1_folded.onnx' at http://0.0.0.0:8081\n"
      ]
@@ -568,17 +590,17 @@
        "        <iframe\n",
        "            width=\"100%\"\n",
        "            height=\"400\"\n",
-       "            src=\"http://0.0.0.0:8081/\"\n",
+       "            src=\"http://localhost:8081/\"\n",
        "            frameborder=\"0\"\n",
        "            allowfullscreen\n",
        "        ></iframe>\n",
        "        "
       ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x7f252e5a6278>"
+       "<IPython.lib.display.IFrame at 0x7f8ff1243af0>"
       ]
      },
-     "execution_count": 35,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -605,11 +627,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 14,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/workspace/finn/src/finn/transformation/fpgadataflow/floorplan.py:107: UserWarning: 32 nodes have no entry in the provided floorplan, SLR was set to -1\n",
+      "  warnings.warn(\n",
+      "/workspace/finn/src/finn/transformation/fpgadataflow/insert_fifo.py:154: UserWarning: Overriding input FIFO depth to 32\n",
+      "  warnings.warn(\"Overriding input FIFO depth to 32\")\n",
+      "/workspace/finn/src/finn/transformation/fpgadataflow/insert_fifo.py:200: UserWarning: Overriding output FIFO depth to 32\n",
+      "  warnings.warn(\"Overriding output FIFO depth to 32\")\n"
+     ]
+    }
+   ],
    "source": [
-    "test_pynq_board = \"Pynq-Z1\"\n",
+    "test_pynq_board = \"Pynq-Z2\"\n",
     "target_clk_ns = 10\n",
     "\n",
     "from finn.transformation.fpgadataflow.make_zynq_proj import ZynqBuild\n",
@@ -624,17 +659,31 @@
    "source": [
     "## 5. Deployment and Remote Execution\n",
     "\n",
-    "Now that we're done with the hardware generation, we can generate a Python driver for accelerator and copy the necessary files onto our PYNQ board."
+    "Now that we're done with the hardware generation, we can generate a Python driver for accelerator and copy the necessary files onto our PYNQ board.\n",
+    "\n",
+    "**Make sure you've [set up the SSH keys for your PYNQ board](https://finn-dev.readthedocs.io/en/latest/getting_started.html#pynq-board-first-time-setup) before executing this step.**"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 15,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Welcome to PYNQ Linux, based on Ubuntu 18.04 (GNU/Linux 4.19.0-xilinx-v2019.1 armv7l)\r\n",
+      "\r\n",
+      " * Super-optimized for small spaces - read how we shrank the memory\r\n",
+      "   footprint of MicroK8s to make it the smallest full K8s around.\r\n",
+      "\r\n",
+      "   https://ubuntu.com/blog/microk8s-memory-optimisation\r\n"
+     ]
+    }
+   ],
    "source": [
     "import os\n",
-    "from finn.transformation.fpgadataflow.make_deployment import DeployToPYNQ\n",
     "\n",
     "# set up the following values according to your own environment\n",
     "# FINN will use ssh to deploy and run the generated accelerator\n",
@@ -643,6 +692,20 @@
     "password = os.getenv(\"PYNQ_PASSWORD\", \"xilinx\")\n",
     "port = os.getenv(\"PYNQ_PORT\", 22)\n",
     "target_dir = os.getenv(\"PYNQ_TARGET_DIR\", \"/home/xilinx/finn_cnv_end2end_example\")\n",
+    "# set up ssh options to only allow publickey authentication\n",
+    "options = \"-o PreferredAuthentications=publickey -o PasswordAuthentication=no\"\n",
+    "\n",
+    "# test access to PYNQ board\n",
+    "! ssh {options} {username}@{ip} -p {port} cat /var/run/motd.dynamic"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from finn.transformation.fpgadataflow.make_deployment import DeployToPYNQ\n",
     "\n",
     "model = ModelWrapper(build_dir + \"/end2end_cnv_w1a1_synth.onnx\")\n",
     "model = model.transform(DeployToPYNQ(ip, port, username, password, target_dir))\n",
@@ -651,16 +714,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "'/home/xilinx/finn_dev_maltanar/pynq_deployment_obskagv5'"
+       "'/home/xilinx/finn_dev_jduarte/pynq_deployment_yrxnwrak'"
       ]
      },
-     "execution_count": 23,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -672,24 +735,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "total 4216\r\n",
-      "-rw-r--r-- 1 xilinx xilinx    8508 Sep 21 13:19 driver.py\r\n",
-      "drwxr-xr-x 4 xilinx xilinx    4096 Sep 21 13:19 finn\r\n",
-      "-rw-r--r-- 1 xilinx xilinx 4045671 Sep 21 13:19 resizer.bit\r\n",
-      "-rw-r--r-- 1 xilinx xilinx  246205 Sep 21 13:19 resizer.hwh\r\n",
-      "-rw-r--r-- 1 xilinx xilinx    1727 Sep 21 13:19 validate.py\r\n"
+      "total 4240\r\n",
+      "-rw-rw-r-- 1 xilinx xilinx   18616 Jun 28 20:42 driver_base.py\r\n",
+      "-rw-r--r-- 1 xilinx xilinx    4868 Jun 28 20:42 driver.py\r\n",
+      "drwxr-xr-x 4 xilinx xilinx    4096 Jun 28 20:42 finn\r\n",
+      "-rw-r--r-- 1 xilinx xilinx 4045671 Jun 28 20:42 resizer.bit\r\n",
+      "-rw-r--r-- 1 xilinx xilinx  247083 Jun 28 20:42 resizer.hwh\r\n",
+      "drwxr-xr-x 2 xilinx xilinx    4096 Jun 28 20:42 runtime_weights\r\n",
+      "-rw-rw-r-- 1 xilinx xilinx    4107 Jun 28 20:42 validate.py\r\n"
      ]
     }
    ],
    "source": [
-    "! sshpass -p {password} ssh {username}@{ip} -p {port} 'ls -l {target_dir_pynq}'"
+    "! ssh {options} {username}@{ip} -p {port} 'ls -l {target_dir_pynq}'"
    ]
   },
   {
@@ -701,16 +766,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "<matplotlib.image.AxesImage at 0x7f89a07e6eb8>"
+       "<matplotlib.image.AxesImage at 0x7f917faeb6d0>"
       ]
      },
-     "execution_count": 2,
+     "execution_count": 19,
      "metadata": {},
      "output_type": "execute_result"
     },
@@ -747,7 +812,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -764,7 +829,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [
     {
@@ -773,7 +838,7 @@
        "array([[3.]], dtype=float32)"
       ]
      },
-     "execution_count": 44,
+     "execution_count": 21,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -795,7 +860,7 @@
    "source": [
     "### Validating the Accuracy on a PYNQ Board <a id='validation'></a>\n",
     "\n",
-    "All the command line prompts here are meant to be executed with `sudo` on the PYNQ board, so we'll use a workaround (`sshpass` and `echo password | sudo -S command`) to get that working from this notebook running on the host computer.\n",
+    "All the command line prompts here are meant to be executed with `sudo` on the PYNQ board, so we'll use a workaround (`echo password | sudo -S command`) to get that working from this notebook running on the host computer.\n",
     "\n",
     "**Ensure that your PYNQ board has a working internet connecting for the next steps, since some there is some downloading involved.**\n",
     "\n",
@@ -809,7 +874,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [
     {
@@ -819,12 +884,12 @@
       "[sudo] password for xilinx: Requirement already satisfied: dataset_loading from git+https://github.com/fbcotter/dataset_loading.git@0.0.4#egg=dataset_loading in /usr/local/lib/python3.6/dist-packages\n",
       "Requirement already satisfied: Pillow in /usr/lib/python3/dist-packages (from dataset_loading)\n",
       "Requirement already satisfied: scipy in /usr/lib/python3/dist-packages (from dataset_loading)\n",
-      "Connection to 192.168.2.99 closed.\n"
+      "Connection to 99.121.248.96 closed.\n"
      ]
     }
    ],
    "source": [
-    "! sshpass -p {password} ssh -t {username}@{ip} -p {port} 'echo {password} | sudo -S pip3 install git+https://github.com/fbcotter/dataset_loading.git@0.0.4#egg=dataset_loading'"
+    "! ssh {options} -t {username}@{ip} -p {port} 'echo {password} | sudo -S pip3 install git+https://github.com/fbcotter/dataset_loading.git@0.0.4#egg=dataset_loading'"
    ]
   },
   {
@@ -840,7 +905,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [
     {
@@ -850,30 +915,30 @@
       "[sudo] password for xilinx: Tar File found in dest_dir. Not Downloading again\n",
       "Extracting Python CIFAR10 data.\n",
       "Files extracted\n",
-      "batch 0 / 10 : total OK 851 NOK 149\n",
-      "batch 1 / 10 : total OK 1683 NOK 317\n",
-      "batch 2 / 10 : total OK 2522 NOK 478\n",
-      "batch 3 / 10 : total OK 3370 NOK 630\n",
-      "batch 4 / 10 : total OK 4207 NOK 793\n",
-      "batch 5 / 10 : total OK 5044 NOK 956\n",
-      "batch 6 / 10 : total OK 5887 NOK 1113\n",
-      "batch 7 / 10 : total OK 6728 NOK 1272\n",
-      "batch 8 / 10 : total OK 7570 NOK 1430\n",
-      "batch 9 / 10 : total OK 8419 NOK 1581\n",
+      "batch 1 / 10 : total OK 851 NOK 149\n",
+      "batch 2 / 10 : total OK 1683 NOK 317\n",
+      "batch 3 / 10 : total OK 2522 NOK 478\n",
+      "batch 4 / 10 : total OK 3370 NOK 630\n",
+      "batch 5 / 10 : total OK 4207 NOK 793\n",
+      "batch 6 / 10 : total OK 5044 NOK 956\n",
+      "batch 7 / 10 : total OK 5887 NOK 1113\n",
+      "batch 8 / 10 : total OK 6728 NOK 1272\n",
+      "batch 9 / 10 : total OK 7570 NOK 1430\n",
+      "batch 10 / 10 : total OK 8419 NOK 1581\n",
       "Final accuracy: 84.190000\n",
-      "Connection to 192.168.2.99 closed.\n"
+      "Connection to 99.121.248.96 closed.\n"
      ]
     }
    ],
    "source": [
-    "! sshpass -p {password} ssh -t {username}@{ip} -p {port} 'cd {target_dir_pynq}; echo {password} | sudo -S python3.6 validate.py --dataset cifar10 --batchsize 1000'"
+    "! ssh {options} -t {username}@{ip} -p {port} 'cd {target_dir_pynq}; echo {password} | sudo -S python3.6 validate.py --dataset cifar10 --batchsize 1000'"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We see that the final top-1 accuracy is 84.19%, which is very close to the 84.22% reported on the [BNN-PYNQ accuracy table in Brevitas](https://github.com/Xilinx/brevitas/tree/master/brevitas_examples/bnn_pynq). "
+    "We see that the final top-1 accuracy is 84.19%, which is very close to the 84.22% reported on the [BNN-PYNQ accuracy table in Brevitas](https://github.com/Xilinx/brevitas/tree/master/src/brevitas_examples/bnn_pynq). "
    ]
   },
   {
@@ -900,7 +965,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.8"
+   "version": "3.8.5"
   }
  },
  "nbformat": 4,
diff --git a/notebooks/end2end_example/bnn-pynq/tfc_end2end_example.ipynb b/notebooks/end2end_example/bnn-pynq/tfc_end2end_example.ipynb
index 8cbff4fcea58d452b1e35c0dab647a8f922dc2c0..5ed4b170b4eeee4b438d9539d2317a7d5eab5df2 100644
--- a/notebooks/end2end_example/bnn-pynq/tfc_end2end_example.ipynb
+++ b/notebooks/end2end_example/bnn-pynq/tfc_end2end_example.ipynb
@@ -1337,7 +1337,43 @@
    "source": [
     "### Deployment and Remote Execution <a id='deploy'></a>\n",
     "\n",
-    "We'll now use the `DeployToPYNQ` transformation to create a deployment folder with the bitfile and driver file(s), and copy that to the PYNQ board. You can change the default IP address, username, password and target folder for the PYNQ below."
+    "We'll now use the `DeployToPYNQ` transformation to create a deployment folder with the bitfile and driver file(s), and copy that to the PYNQ board. You can change the default IP address, username, password and target folder for the PYNQ below.\n",
+    "\n",
+    "**Make sure you've [set up the SSH keys for your PYNQ board](https://finn-dev.readthedocs.io/en/latest/getting_started.html#pynq-board-first-time-setup) before executing this step.**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Welcome to PYNQ Linux, based on Ubuntu 18.04 (GNU/Linux 5.4.0-xilinx-v2020.1 armv7l)\r\n",
+      "\r\n",
+      " * Pure upstream Kubernetes 1.21, smallest, simplest cluster ops!\r\n",
+      "\r\n",
+      "     https://microk8s.io/\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "\n",
+    "# set up the following values according to your own environment\n",
+    "# FINN will use ssh to deploy and run the generated accelerator\n",
+    "ip = os.getenv(\"PYNQ_IP\", \"192.168.2.99\")\n",
+    "username = os.getenv(\"PYNQ_USERNAME\", \"xilinx\")\n",
+    "password = os.getenv(\"PYNQ_PASSWORD\", \"xilinx\")\n",
+    "port = os.getenv(\"PYNQ_PORT\", 22)\n",
+    "target_dir = os.getenv(\"PYNQ_TARGET_DIR\", \"/home/xilinx/finn_tfc_end2end_example\")\n",
+    "# set up ssh options to only allow publickey authentication\n",
+    "options = \"-o PreferredAuthentications=publickey -o PasswordAuthentication=no\"\n",
+    "\n",
+    "# test access to PYNQ board\n",
+    "! ssh {options} {username}@{ip} -p {port} cat /var/run/motd.dynamic"
    ]
   },
   {
@@ -1347,11 +1383,7 @@
    "outputs": [],
    "source": [
     "from finn.transformation.fpgadataflow.make_deployment import DeployToPYNQ\n",
-    "ip = \"192.168.2.99\"\n",
-    "port = \"22\"\n",
-    "username = \"xilinx\"\n",
-    "password = \"xilinx\"\n",
-    "target_dir = \"/home/xilinx/finn_tfc_end2end_example\"\n",
+    "\n",
     "model = model.transform(DeployToPYNQ(ip, port, username, password, target_dir))\n",
     "model.save(build_dir + \"/tfc_w1_a1_pynq_deploy.onnx\")"
    ]
@@ -1456,7 +1488,7 @@
     }
    ],
    "source": [
-    "! sshpass -p {password} ssh {username}@{ip} -p {port} 'ls -l {target_dir_pynq}'"
+    "! ssh {options} {username}@{ip} -p {port} 'ls -l {target_dir_pynq}'"
    ]
   },
   {
@@ -1578,7 +1610,7 @@
    "source": [
     "### Validating the Accuracy on a PYNQ Board <a id='validation'></a>\n",
     "\n",
-    "All the command line prompts here are meant to be executed with `sudo` on the PYNQ board, so we'll use a workaround (`sshpass` and `echo password | sudo -S command`) to get that working from this notebook running on the host computer.\n",
+    "All the command line prompts here are meant to be executed with `sudo` on the PYNQ board, so we'll use a workaround (`echo password | sudo -S command`) to get that working from this notebook running on the host computer.\n",
     "\n",
     "**Ensure that your PYNQ board has a working internet connecting for the next steps, since some there is some downloading involved.**\n",
     "\n",
@@ -1587,7 +1619,7 @@
     "\n",
     "Command to execute on PYNQ:\n",
     "\n",
-    "```pip3 install git+https://github.com/fbcotter/dataset_loading.git@0.0.4#egg=dataset_loading```"
+    "```sudo pip3 install git+https://github.com/fbcotter/dataset_loading.git@0.0.4#egg=dataset_loading```"
    ]
   },
   {
@@ -1609,7 +1641,7 @@
     }
    ],
    "source": [
-    "! sshpass -p {password} ssh -t {username}@{ip} -p {port} 'echo {password} | sudo -S pip3 install git+https://github.com/fbcotter/dataset_loading.git@0.0.4#egg=dataset_loading'"
+    "! ssh {options} -t {username}@{ip} -p {port} 'echo {password} | sudo -S pip3 install git+https://github.com/fbcotter/dataset_loading.git@0.0.4#egg=dataset_loading'"
    ]
   },
   {
@@ -1656,7 +1688,7 @@
     }
    ],
    "source": [
-    "! sshpass -p {password} ssh -t {username}@{ip} -p {port} 'cd {target_dir_pynq}; echo {password} | sudo -S python3.6 validate.py --dataset mnist --batchsize 1000'"
+    "! ssh {options} -t {username}@{ip} -p {port} 'cd {target_dir_pynq}; echo {password} | sudo -S python3.6 validate.py --dataset mnist --batchsize 1000'"
    ]
   },
   {
diff --git a/notebooks/end2end_example/cybersecurity/1-train-mlp-with-brevitas.ipynb b/notebooks/end2end_example/cybersecurity/1-train-mlp-with-brevitas.ipynb
index ff4c5704002219ca18bb07eeb8c768f860f3ffbf..e0ce00c1beefe8172ac5fd2aeaaa076b9bb574c1 100644
--- a/notebooks/end2end_example/cybersecurity/1-train-mlp-with-brevitas.ipynb
+++ b/notebooks/end2end_example/cybersecurity/1-train-mlp-with-brevitas.ipynb
@@ -103,23 +103,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "--2021-02-24 16:57:33--  https://zenodo.org/record/4519767/files/unsw_nb15_binarized.npz?download=1\n",
+      "--2021-05-10 18:14:00--  https://zenodo.org/record/4519767/files/unsw_nb15_binarized.npz?download=1\n",
       "Resolving zenodo.org (zenodo.org)... 137.138.76.77\n",
       "Connecting to zenodo.org (zenodo.org)|137.138.76.77|:443... connected.\n",
       "HTTP request sent, awaiting response... 200 OK\n",
       "Length: 13391907 (13M) [application/octet-stream]\n",
-      "Saving to: 'unsw_nb15_binarized.npz'\n",
+      "Saving to: â€˜unsw_nb15_binarized.npzâ€™\n",
       "\n",
-      "unsw_nb15_binarized 100%[===================>]  12.77M  2.17MB/s    in 8.9s    \n",
+      "unsw_nb15_binarized 100%[===================>]  12.77M  3.96MB/s    in 3.4s    \n",
       "\n",
-      "2021-02-24 16:57:44 (1.44 MB/s) - 'unsw_nb15_binarized.npz' saved [13391907/13391907]\n",
+      "2021-05-10 18:14:04 (3.77 MB/s) - â€˜unsw_nb15_binarized.npzâ€™ saved [13391907/13391907]\n",
       "\n"
      ]
     }
@@ -422,7 +422,9 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Training loss = 0.132480 test accuracy = 0.797989: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 10/10 [00:58<00:00,  5.70s/it]\n"
+      "Training loss:   0%|          | 0/10 [00:00<?, ?it/s]/opt/conda/lib/python3.8/site-packages/torch/autograd/__init__.py:130: UserWarning: CUDA initialization: Found no NVIDIA driver on your system. Please check that you have an NVIDIA GPU and installed a driver from http://www.nvidia.com/Download/index.aspx (Triggered internally at  /opt/conda/conda-bld/pytorch_1607370172916/work/c10/cuda/CUDAFunctions.cpp:100.)\n",
+      "  Variable._execution_engine.run_backward(\n",
+      "Training loss = 0.131708 test accuracy = 0.805398: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 10/10 [01:04<00:00,  6.42s/it]\n"
      ]
     }
    ],
@@ -457,7 +459,7 @@
    "outputs": [
     {
      "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEWCAYAAAB8LwAVAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAAjcUlEQVR4nO3de5QcZ33m8e/TPTfNrceyRrdu2ZJBMsiewQZhwhIICRBsQmwSjokJIWQ3OQ5ZHEggm5gkBxJnOccLWTbZXbOLN5jsbiCObQirJAYTwj0JoDG+yJKxLcsXjWRdLWlGl7n/9o+uGfeMeqSRND3VPf18zukzVW9Vdf+mj6RHVW+9bykiMDMzmymTdgFmZladHBBmZlaWA8LMzMpyQJiZWVkOCDMzK8sBYWZmZTkgzGYh6cuS3jPf+55lDa+X1D/f72s2Fw1pF2A2nyQdK1ltBYaB8WT91yPic3N9r4i4phL7mtUKB4QtKhHRPrks6Wng1yLiazP3k9QQEWMLWZtZrfElJqsLk5dqJP2epL3AZyVdIOnvJR2QdDhZLpQc801Jv5Ys/4qk70r602TfpyRdc477rpP0bUmDkr4m6TZJfzXH3+OlyWcdkbRN0rUl294iaXvyvrsl/U7Sviz53Y5Iel7SdyT5776dkf+QWD1ZCSwFLgZupPjn/7PJ+kXASeC/n+b4VwGPAcuAjwOfkaRz2PfzwA+AC4E/At49l+IlNQJ/B3wVWA78JvA5SZcmu3yG4mW0DuBy4OtJ+4eAfqAbWAH8PuA5duyMHBBWTyaAj0bEcEScjIhDEfGFiDgREYPAx4CfOM3xz0TE/4qIceB/A6so/oM7530lXQS8EvhIRIxExHeBzXOs/8eAduDW5NivA38PvDPZPgpslNQZEYcj4ocl7auAiyNiNCK+E56EzebAAWH15EBEDE2uSGqV9GlJz0gaAL4NdEnKznL83smFiDiRLLaf5b6rgedL2gB2zbH+1cCuiJgoaXsGyCfLbwfeAjwj6VuSXp20fwLYAXxV0k5JN8/x86zOOSCsnsz8X/OHgEuBV0VEJ/C6pH22y0bz4TlgqaTWkrY1czx2D7BmRv/BRcBugIjYEhHXUbz89CXgrqR9MCI+FBGXANcCH5T0hvP7NaweOCCsnnVQ7Hc4Imkp8NFKf2BEPAP0AX8kqSn5X/7PzvHw7wMngN+V1Cjp9cmxdybv9S5JuYgYBQYoXlJD0lslvTjpAzlK8bbfibKfYFbCAWH17M+AJcBB4HvAVxboc98FvBo4BPxH4G8ojtc4rYgYoRgI11Cs+VPAL0fEj5Jd3g08nVwue2/yOQDrga8Bx4B/BT4VEd+Yt9/GFi25r8osXZL+BvhRRFT8DMbsbPgMwmyBSXqlpBdJyki6GriOYp+BWVXxSGqzhbcS+CLFcRD9wG9ExAPplmR2Kl9iMjOzsnyJyczMylo0l5iWLVsWa9euTbsMM7Oacv/99x+MiO5y2xZNQKxdu5a+vr60yzAzqymSnpltmy8xmZlZWQ4IMzMrywFhZmZlVTQgJF0t6TFJO043g6Skt0sKSZtK2j6cHPeYpDdXsk4zMztVxTqpkymTbwPeRHEw0BZJmyNi+4z9OoAPUJyIbLJtI3ADcBnFKY6/JmlDMre+mZktgEqeQVwF7IiInckkY3dSnFJgpj8B/hMwVNJ2HXBn8mCXpyjOZX9VBWs1M7MZKhkQeaY/CKWfFx5sAoCklwNrIuIfzvbY5PgbJfVJ6jtw4MD8VG1mZkCKndTJQ08+SfGhLeckIm6PiE0Rsam7u+w4jzM6cmKEP//aE2ztP3quZZiZLUqVHCi3m+lPyiokbZMmH6z+zeRZ7iuBzZKuncOx8yabEf/la4+TEfQUcpX4CDOzmlTJM4gtwHpJ6yQ1Uex0nno4e0QcjYhlEbE2ItZSfGDLtRHRl+x3g6RmSesoPvDkB5UosqOlkUu629i622cQZmalKnYGERFjkm4C7gOywB0RsU3SLUBfRGw+zbHbJN0FbAfGgPdV8g6m3nyO7z/1fKXe3sysJlV0LqaIuBe4d0bbR2bZ9/Uz1j8GfKxixZW4PJ/jSw/uYf/gEMs7WhbiI83Mqp5HUgO9hS4AHvFlJjOzKQ4I4LLVnUjwsO9kMjOb4oAA2pobeHF3u88gzMxKOCASPYWczyDMzEo4IBI9+Rz7B4fZNzB05p3NzOqAAyLRmwyS81mEmVmRAyKxcVWOjGBr/5G0SzEzqwoOiMSSpiwbVnR4RLWZWcIBUaInn2Pr7qNERNqlmJmlzgFRoqeQ4+CxEZ476o5qMzMHRImevDuqzcwmOSBKvHRVJw0ZsXX3kbRLMTNLnQOiREvjZEf1QNqlmJmlzgExQ08+x9b+I+6oNrO654CYoaeQ4/CJUfoPn0y7FDOzVDkgZpgcUe3xEGZW7xwQM1y6soPGrHwnk5nVPQfEDM0NWV6ystNTf5tZ3XNAlHF5PsfD7qg2szrngCijt5BjYGiMZ58/kXYpZmapcUCU4RHVZmYOiLI2rOigqSHjfggzq2sOiDKaGjK8dFWnzyDMrK45IGbRky/eyTQx4Y5qM6tPDohZ9Oa7GBwe4+lDx9MuxcwsFQ6IWfR4RLWZ1TkHxCzWL2+nuSHDVvdDmFmdckDMoiGbYePqTh72GYSZ1amKBoSkqyU9JmmHpJvLbH+vpK2SHpT0XUkbk/a1kk4m7Q9K+p+VrHM2vfkc23YfZdwd1WZWhyoWEJKywG3ANcBG4J2TAVDi8xHRExFXAB8HPlmy7cmIuCJ5vbdSdZ5OT6GL4yPjPHXwWBofb2aWqkqeQVwF7IiInRExAtwJXFe6Q0SUPrqtDaiq/6pPTv3t8RBmVo8qGRB5YFfJen/SNo2k90l6kuIZxPtLNq2T9ICkb0l6bbkPkHSjpD5JfQcOHJjP2gF4UXc7SxqzvpPJzOpS6p3UEXFbRLwI+D3gD5Pm54CLIuJK4IPA5yV1ljn29ojYFBGburu75722bEZctrrTdzKZWV2qZEDsBtaUrBeSttncCbwNICKGI+JQsnw/8CSwoTJlnl5PIce2PQOMjU+k8fFmZqmpZEBsAdZLWiepCbgB2Fy6g6T1Jas/AzyRtHcnndxIugRYD+ysYK2z6i3kODk6zpMHPKLazOpLQ6XeOCLGJN0E3AdkgTsiYpukW4C+iNgM3CTpjcAocBh4T3L464BbJI0CE8B7I+L5StV6Oj35LgAe7j/CpSs70ijBzCwVFQsIgIi4F7h3RttHSpY/MMtxXwC+UMna5uqSZW20NWV5ZPdRrt+05swHmJktEql3Ule7TEZcls95RLWZ1R0HxBz05nNs3zPAqDuqzayOOCDmoKeQY3hsgif2eUS1mdUPB8Qc9Ba6ANi6+0iqdZiZLSQHxBxcvLSVjpYGj6g2s7rigJiDTEZcvjrnEdVmVlccEHPUW8jx6HODjIy5o9rM6oMDYo56CjlGxid4fN9g2qWYmS0IB8Qc9SYjqt0PYWb1wgExR2uWLiG3pNHPhjCzuuGAmCNJ9ORzvtXVzOqGA+Is9BRyPLZ3kKHR8bRLMTOrOAfEWejN5xgdDx7b645qM1v8HBBnoSd5RrU7qs2sHjggzkK+awkXtDZ6wJyZ1QUHxFmQRE+hy1N/m1ldcECcpd58jsf3uaPazBY/B8RZ6inkGJ8Itj83kHYpZmYV5YA4S71JR/UjvsxkZoucA+IsrexsYVl7k0dUm9mi54A4S1Mjqh0QZrbIOSDOQU+hiyf2D3JiZCztUszMKsYBcQ568zkmArbvcUe1mS1eDohz4BHVZlYPHBDnYEVnC8s7mt0PYWaLmgPiHPUWch5RbWaLmgPiHPXku3jywDGODbuj2swWJwfEOeot5IiAbT6LMLNFqqIBIelqSY9J2iHp5jLb3ytpq6QHJX1X0saSbR9OjntM0psrWee5uDzvjmozW9wqFhCSssBtwDXARuCdpQGQ+HxE9ETEFcDHgU8mx24EbgAuA64GPpW8X9Xo7mhmVa7FAWFmi1YlzyCuAnZExM6IGAHuBK4r3SEiSgcStAGRLF8H3BkRwxHxFLAjeb+q4hHVZraYVTIg8sCukvX+pG0aSe+T9CTFM4j3n82xaest5Nh58DgDQ6Npl2JmNu9S76SOiNsi4kXA7wF/eDbHSrpRUp+kvgMHDlSmwNPoKXQBntnVzBanSgbEbmBNyXohaZvNncDbzubYiLg9IjZFxKbu7u7zq/Yc9OQ99beZLV6VDIgtwHpJ6yQ1Uex03ly6g6T1Jas/AzyRLG8GbpDULGkdsB74QQVrPSdL25rIdy3x1N9mtig1VOqNI2JM0k3AfUAWuCMitkm6BeiLiM3ATZLeCIwCh4H3JMduk3QXsB0YA94XEVX5jM/eQs53MpnZolSxgACIiHuBe2e0faRk+QOnOfZjwMcqV9386Cnk+PIjezl6YpRca2Pa5ZiZzZvUO6lr3VQ/xB6fRZjZ4uKAOE+TAeF+CDNbbBwQ56mrtYmLlraydfeRtEsxM5tXDoh50FPI+QzCzBYdB8Q86M3n6D98ksPHR9Iuxcxs3jgg5kGPZ3Y1s0XIATEPLnNAmNki5ICYB7kljaxb1sbD/UfSLsXMbN44IOaJp/42s8XGATFPegs59hwd4uCx4bRLMTObFw6IeeJHkJrZYuOAmCeXre5EwpeZzGzRcEDMk46WRi5Z1uYBc2a2aMwpICS1ScokyxskXSvJU5fO0Fvo8pQbZrZozPUM4ttAi6Q88FXg3cBfVqqoWtWTz7FvYJj9A0Npl2Jmdt7mGhCKiBPAzwOfiojrgcsqV1Zt6im4o9rMFo85B4SkVwPvAv4hactWpqTatXFVJxl56m8zWxzmGhC/BXwY+NvkcaCXAN+oWFU1qq25gRcvb/cZhJktCnN65GhEfAv4FkDSWX0wIt5fycJqVU++i289foCIQFLa5ZiZnbO53sX0eUmdktqAR4Dtkv5DZUurTT35Tg4eG2avO6rNrMbN9RLTxogYAN4GfBlYR/FOJpuhp9AFeMCcmdW+uQZEYzLu4W3A5ogYBaJiVdWwjas6yWbkfggzq3lzDYhPA08DbcC3JV0MDFSqqFq2pCnL+uXtvpPJzGrenAIiIv5rROQj4i1R9AzwkxWurWb1FnJs3X2UCJ9kmVntmmsndU7SJyX1Ja//TPFswsroyed4/vgIe466o9rMatdcLzHdAQwC70heA8BnK1VUrXuho/pIqnWYmZ2PuQbEiyLioxGxM3n9MXBJJQurZS9Z2UFDRu6HMLOaNteAOCnpxydXJL0GOFmZkmpfS2OWS1d2+E4mM6tpcxpJDbwX+D+Scsn6YeA9lSlpcegt5Lh3616PqDazmjXXu5geioiXAb1Ab0RcCfzUmY6TdLWkxyTtkHRzme0flLRd0sOS/im5fXZy27ikB5PX5rP4narC5fkcR0+O0n/YJ1pmVpvO6olyETGQjKgG+ODp9pWUBW4DrgE2Au+UtHHGbg8AmyKiF7gH+HjJtpMRcUXyuvZs6qwGvfkuwDO7mlntOp9Hjp7puslVwI6kU3sEuBO4rnSHiPhG8pwJgO8BhfOop6psWNlOUzbDw37CnJnVqPMJiDONAssDu0rW+5O22fwqxXmeJrUkYy6+J+lt5Q6QdOPk2IwDBw7MpeYF09yQ5SWrOjwnk5nVrNN2UksapHwQCFgyX0VI+iVgE/ATJc0XR8Tu5NkTX5e0NSKeLD0uIm4HbgfYtGlT1Q1bvjyf4+8e2uOOajOrSac9g4iIjojoLPPqiIgz3QG1G1hTsl5I2qaR9EbgD4BrI2K45LN3Jz93At8ErpzTb1RFevM5BofGeObQiTPvbGZWZc7nEtOZbAHWS1onqQm4AZh2N5KkKylOBHhtROwvab9AUnOyvAx4DbC9grVWxOQzqh/2eAgzq0EVC4iIGANuAu4DHgXuSh5XeoukybuSPgG0A3fPuJ31pUCfpIcoPtr01oiouYDYsKKDpoaMp9wws5o014Fy5yQi7gXundH2kZLlN85y3L8APZWsbSE0ZjNsXNXpW13NrCZV8hKTUZzZddueASYmqq4P3czstBwQFdZTyHFseIynDh1PuxQzs7PigKiw3qSj2uMhzKzWOCAq7MXd7bQ0ZtwPYWY1xwFRYQ3ZDJetzrHVU26YWY1xQCyAnnyOR3YPMO6OajOrIQ6IBdCTz3FydJydB46lXYqZ2Zw5IBbAZEe1+yHMrJY4IBbAJd3ttDZl/QhSM6spDogFkM2Iy1Z38rCn3DCzGuKAWCA9+S62PzfA2PhE2qWYmc2JA2KB9BZyDI1OsMMd1WZWIxwQC6THHdVmVmMcEAtk3YVttDc3eMoNM6sZDogFkpnsqPadTGZWIxwQC6i3kOPR5wYYdUe1mdUAB8QC6il0MTI2weP7BtMuxczsjBwQC6g376m/zax2OCAW0MUXttLR0uB+CDOrCQ6IBSSJnnzOZxBmVhMcEAusp5DjR3sHGB4bT7sUM7PTckAssN58F6PjweN7PaLazKqbA2KBTU397SfMmVmVc0AssMIFS+hqbXQ/hJlVPQfEApvsqPacTGZW7RwQKejJ53h83yBDo+6oNrPq5YBIQW8hx9hE8KO9HlFtZtXLAZGCnkIXAFv9hDkzq2IVDQhJV0t6TNIOSTeX2f5BSdslPSzpnyRdXLLtPZKeSF7vqWSdC211roWlbU3uhzCzqlaxgJCUBW4DrgE2Au+UtHHGbg8AmyKiF7gH+Hhy7FLgo8CrgKuAj0q6oFK1LrSpEdWecsPMqlglzyCuAnZExM6IGAHuBK4r3SEivhERJ5LV7wGFZPnNwD9GxPMRcRj4R+DqCta64HoLOZ7Yf4yTI+6oNrPqVMmAyAO7Stb7k7bZ/Crw5bM5VtKNkvok9R04cOA8y11YPfkc4xPB9ucG0i7FzKysquiklvRLwCbgE2dzXETcHhGbImJTd3d3ZYqrkF53VJtZlatkQOwG1pSsF5K2aSS9EfgD4NqIGD6bY2vZis5mlrU3e+pvM6talQyILcB6SeskNQE3AJtLd5B0JfBpiuGwv2TTfcBPS7og6Zz+6aRt0ZBEb8FTf5tZ9apYQETEGHATxX/YHwXuiohtkm6RdG2y2yeAduBuSQ9K2pwc+zzwJxRDZgtwS9K2qPTkczx54BjHh8fSLsXM7BQNlXzziLgXuHdG20dKlt94mmPvAO6oXHXp6y3kmAjY/twAr1y7NO1yzMymqYpO6nrVkzyj2gPmzKwaOSBStLyzhRWdzb6TycyqkgMiZT35Lo+oNrOq5IBIWW8hx86DxxkcGk27FDOzaRwQKesp5IiAbXs8otrMqosDImWTHdUeD2Fm1cYBkbJl7c2szrV4RLWZVR0HRBXoKeR4xAFhZlXGAVEFegtdPHXwOEdPuqPazKqHA6IKTPZDbPNZhJlVEQdEFZgaUe2AMLMq4oCoAhe0NVG4YInvZDKzquKAqBK9BT+j2syqiwOiSvTku3j2+RPsHxhKuxQzM8ABUTVe/aILkeAnPvFNPnjXg3xv5yEiIu2yzKyOVfR5EDZ3V6zp4kv//jXcuWUXf/fQHr74w91cfGEr17+iwNtfUWBVbknaJZpZndFi+V/qpk2boq+vL+0y5sXJkXG+su057trSz7/uPIQEr13fzTs2FXjTxhU0N2TTLtHMFglJ90fEprLbHBDV7dlDJ7jn/l3cc38/e44O0dXayHUvW831m9ZweXJ7rJnZuXJALALjE8G/PHmQu/r6uW/bXkbGJti4qpN3bCpw3RV5LmhrSrtEM6tBDohF5siJETY/tIe7+/rZuvsoTdkMb9q4gus3FXjt+m6yGaVdopnVCAfEIrZ9zwB337+LLz2wm8MnRlnZ2cLbX5Hn+lesYe2ytrTLM7Mq54CoA8Nj4/zTo/u5u28X33r8ABMBV61byjs2reEtPStpbfINa2Z2KgdEndl7dIgv/LCfu/t28fShE7Q1ZXlr72re8coCL7/oAiRfgjKzIgdEnYoI+p45zF1bdvEPW5/jxMg4l3S3cf0r1vD2l+dZ3tmSdolmljIHhHFseIx7H36Ou+/fxZanD5PNiNdv6Ob6TQV+6iUraGrwoHqzeuSAsGl2HjjG3ff384X7+9k/OMzStiZ+7so879i0hktXdqRdnpktIAeElTU2PsF3njjIXX27+Nqj+xgdD3oLOa67Is/lqzvZsKLD4yvMFjkHhJ3RoWPDfOnBPdzdt4sf7R2cau/uaGbDinbWL+/g0pUdxeUVHXS2NKZYrZnNFweEzVlEsOfoEI/vG+SJfYM8vu9YsnyMk6PjU/utyrWwfkUHG5a3s2FFBxtWdrB+eTttzb6d1qyWnC4gKvq3WdLVwJ8DWeAvIuLWGdtfB/wZ0AvcEBH3lGwbB7Ymq89GxLWVrNWKJJHvWkK+awk/eenyqfaJiWD3kZM8vm+Qx5LAeHzfIP935yGGxyam9st3LWHDinY2rOxgw/IONqzo4MXL21nS5AkGzWpNxQJCUha4DXgT0A9skbQ5IraX7PYs8CvA75R5i5MRcUWl6rOzk8mINUtbWbO0lTe8dMVU+/hE8OzzJ0454/jnHYcYGS8GhwQXLW1l/fLiJapLV3awfnkHl3S30dLo4DCrVpU8g7gK2BEROwEk3QlcB0wFREQ8nWybKPcGVv2yGbFuWRvrlrXx5stWTrWPjU/w9KET00Lj8X2DfPOx/YxNFC9rZgRrL2wrXqJK+jY2rOhg3bI233ZrVgUqGRB5YFfJej/wqrM4vkVSHzAG3BoRX5q5g6QbgRsBLrroonOv1OZdQzbDi5e38+Ll7VzT80L7yNgETx86zmN7S8449g/y1e17SXKDhpLQWZlrYUXn5Kt5armzpcEjws0qrJp7FC+OiN2SLgG+LmlrRDxZukNE3A7cDsVO6jSKtLPT1JBJzhimj7cYGh1n54HjPLF/MDnbOMbTh47z/aee5+jJ0VPep6UxUxIcLazoaGZlroXlJcsrOlt8CcvsPFQyIHYDa0rWC0nbnETE7uTnTknfBK4EnjztQVazWhqzbFzdycbVnadsGxodZ//AMHsHhtg37VVs29p/hH8cGGJo9NQrlZ0tDVNhsbyjhZW55pLl4lnJsvZmGrO+pGU2UyUDYguwXtI6isFwA/CLczlQ0gXAiYgYlrQMeA3w8YpValWtpTHLRRe2ctGFrbPuExEMDI2xf2AoCZLhaWGyd2CYHfsPsn9wmPGJ6SebEixrby5ewupoYUWupfizs5kVuRaWdxRDZWlrExk/a8PqSMUCIiLGJN0E3EfxNtc7ImKbpFuAvojYLOmVwN8CFwA/K+mPI+Iy4KXAp5PO6wzFPojts3yUGZLILWkkt6SR9Stmny5kfCI4dHy4eEZydIh9g0mYJMt7jg7xwK4jPH985JRjGzKiu6OZ5R3NdCcBsryjheWdzS8sdzRzYXuzH9pki4IHypmVMTxWvKy1b2CI/YPD7B8YYt9gMVj2Dw5N/Tx84tT+kUxyRrI8OSNZXhIky0uCZVl7Ew2+tGUpS22gnFmtam7ITo37OJ3hsXEODA4nITI9PPYNDLPn6BAP9R/h4LFTz0gkuLCteEYyPUyaWd7ZMvWzu73Zt/1aKhwQZuehuSFL4YJWChecPkhGxyc4eGx4+llJcmZSXB5i254BDh0bZqLMSf3Stia6WouX0DpbGulc0khnS0Nxfaqt4ZTtnUsa3QFv58wBYbYAGrMZVuWWsCq35LT7jU8Eh44Vw+OFy1vD7Bsc4uiJUQaGRjlyYoRnnz/B0ZOjDJwcnRp4OJvWpuysATIzYCaXJ/fraGlwx3wdc0CYVZFsRsXLS50tXJ7PnXH/iODk6DgDJ8cYGBqdCo2BodFi28mkbXJ9aJS9A0M8vn9wav103ZAStDc3TAuVxmyGTEY0ZERGIpuBhkyxLSvIZjJkM8XfJZsRWSnZJrLZ5OeMbQ3JekaiITv5vi/sM7Wc7NOYFUsas7Q0ZWltytLa2MCSpmzx1Zj1TQLzxAFhVsMk0drUQGtTcbzH2ZqYCI6PjCXBMpYEyWSojE0Lm6MnRxkcGuXk6DjjEzH1mohgbCKYmAjGIxgbL7ZN7VO6XLJ/Je+PaW7IsKQpS2tjdio4JkOkNQmR6csNxeVkffpycVtL4wv718tZlQPCrI5lMqKjpZGOlsbizeYLKJLgGJsZMmcIlvGJYgidHB3n5Mg4J0fHOTEyzsmRsZLl5Ofo5HJx25ETI+w5Utw2NPrCPmerpTFTDJbGYnA0NWRobszSnM3Q3JihuSFDc0O2+LOxZHlyv4aSfU67//TtTQ2ZBT07ckCYWSqUXE5qSHk2lImJYHhsghMjY9OCoxgeY5wcmZgKmFODZ5yR8QmGR8cZHptgeGycY8NjHDpWXC62lW4//3lJG7M6JXAuz+f4b++8ch6+jekcEGZW1zIZTV2GurDCnxURjI7HrOExbXlsnOHRkuWxiWR9/JTta5ae/uaHc+WAMDNbIJJoahBNDRlmH+9fPXyDtJmZleWAMDOzshwQZmZWlgPCzMzKckCYmVlZDggzMyvLAWFmZmU5IMzMrKxF80Q5SQeAZ87jLZYBB+epnFrn72I6fx/T+ft4wWL4Li6OiO5yGxZNQJwvSX2zPXav3vi7mM7fx3T+Pl6w2L8LX2IyM7OyHBBmZlaWA+IFt6ddQBXxdzGdv4/p/H28YFF/F+6DMDOzsnwGYWZmZTkgzMysrLoPCElXS3pM0g5JN6ddT5okrZH0DUnbJW2T9IG0a0qbpKykByT9fdq1pE1Sl6R7JP1I0qOSXp12TWmS9NvJ35NHJP21pJa0a5pvdR0QkrLAbcA1wEbgnZI2pltVqsaAD0XERuDHgPfV+fcB8AHg0bSLqBJ/DnwlIl4CvIw6/l4k5YH3A5si4nIgC9yQblXzr64DArgK2BEROyNiBLgTuC7lmlITEc9FxA+T5UGK/wDk060qPZIKwM8Af5F2LWmTlANeB3wGICJGIuJIqkWlrwFYIqkBaAX2pFzPvKv3gMgDu0rW+6njfxBLSVoLXAl8P+VS0vRnwO8CEynXUQ3WAQeAzyaX3P5CUlvaRaUlInYDfwo8CzwHHI2Ir6Zb1fyr94CwMiS1A18AfisiBtKuJw2S3grsj4j7066lSjQALwf+R0RcCRwH6rbPTtIFFK82rANWA22SfindquZfvQfEbmBNyXohaatbkhophsPnIuKLadeTotcA10p6muKlx5+S9FfplpSqfqA/IibPKO+hGBj16o3AUxFxICJGgS8C/yblmuZdvQfEFmC9pHWSmih2Mm1OuabUSBLFa8yPRsQn064nTRHx4YgoRMRain8uvh4Ri+5/iHMVEXuBXZIuTZreAGxPsaS0PQv8mKTW5O/NG1iEnfYNaReQpogYk3QTcB/FuxDuiIhtKZeVptcA7wa2Snowafv9iLg3vZKsivwm8LnkP1M7gX+bcj2piYjvS7oH+CHFu/8eYBFOu+GpNszMrKx6v8RkZmazcECYmVlZDggzMyvLAWFmZmU5IMzMrCwHhFlC0rHk51pJvzjP7/37M9b/ZT7f36wSHBBmp1oLnFVAJBO2nc60gIiIRTfq1hYfB4TZqW4FXivpwWTO/6ykT0jaIulhSb8OIOn1kr4jaTPJqGJJX5J0f/KcgBuTtlspzvr5oKTPJW2TZytK3vsRSVsl/ULJe3+z5PkLn0tG7CLp1uSZHQ9L+tMF/3asbtT1SGqzWdwM/E5EvBUg+Yf+aES8UlIz8M+SJmfufDlweUQ8laz/u4h4XtISYIukL0TEzZJuiogrynzWzwNXUHy+wrLkmG8n264ELqM4jfQ/A6+R9Cjwc8BLIiIkdc3vr272Ap9BmJ3ZTwO/nEw/8n3gQmB9su0HJeEA8H5JDwHfozgR5HpO78eBv46I8YjYB3wLeGXJe/dHxATwIMVLX0eBIeAzkn4eOHGev5vZrBwQZmcm4Dcj4orkta5k7v/jUztJr6c4y+erI+JlFOfnOZ/HUA6XLI8DDRExRvFBV/cAbwW+ch7vb3ZaDgizUw0CHSXr9wG/kUyFjqQNszwsJwccjogTkl5C8bGtk0Ynj5/hO8AvJP0c3RSf2vaD2QpLntWRSyZQ/G2Kl6bMKsJ9EGanehgYTy4V/SXFZzGvBX6YdBQfAN5W5rivAO9N+gkeo3iZadLtwMOSfhgR7ypp/1vg1cBDQAC/GxF7k4AppwP4f5JaKJ7ZfPCcfkOzOfBsrmZmVpYvMZmZWVkOCDMzK8sBYWZmZTkgzMysLAeEmZmV5YAwM7OyHBBmZlbW/wc3oVuWUfor/QAAAABJRU5ErkJggg==\n",
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY4AAAEWCAYAAABxMXBSAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAApFUlEQVR4nO3df5xddX3n8dd77vzK/LyTzJCfMyRAAAPi3O6AP6i0KrbAtsB2qYK/oGtLbYvVRa2ou9pl5bFWWrXuUhdWxVqpqKglVRCsAtJWMIGEQIKBGAJJSMjk9ySTzM/P/nHPTO5MJsncZG7unZn38/G4j3vP9/y4n3Mheeec7znfo4jAzMxsvMqKXYCZmU0uDg4zM8uLg8PMzPLi4DAzs7w4OMzMLC8ODjMzy4uDw+w4SLpf0rUTvWyeNfympE0TvV2zYykvdgFmJ4ukfTmTNUAPMJBM/3FE3DXebUXEpYVY1mwycHDYtBERdUOfJW0A/jAi/mX0cpLKI6L/ZNZmNpn4VJVNe0OnfCR9VNJW4E5JTZJ+IKlT0q7k84KcdR6W9IfJ5+sk/aukv06WfUHSpce57CJJP5PUJelfJN0m6Rvj3I9XJd+1W9JqSZfnzLtM0ppku5slfThpb072bbeknZIeleS/F+yo/D+IWdYcYCZwKnA92T8bdybTbcAB4P8cZf3XAmuBZuCzwFck6TiW/UfgF8As4C+Bd4+neEkVwD8DDwKnAO8H7pJ0VrLIV8iejqsHzgV+mrR/CNgEtACzgY8DHofIjsrBYZY1CHwqInoi4kBE7IiI70ZEd0R0AbcAv3GU9V+MiP8XEQPA3wNzyf5FPO5lJbUB5wOfjIjeiPhXYOk4638dUAd8Jln3p8APgGuS+X3AEkkNEbErIp7MaZ8LnBoRfRHxaHgAOzsGB4dZVmdEHByakFQj6XZJL0raC/wMSEtKHWH9rUMfIqI7+ViX57LzgJ05bQAbx1n/PGBjRAzmtL0IzE8+/2fgMuBFSY9Ien3SfiuwDnhQ0npJN43z+2wac3CYZY3+V/aHgLOA10ZEA3BR0n6k008TYQswU1JNTlvrONd9GWgd1T/RBmwGiIhlEXEF2dNY/wR8O2nviogPRcRpwOXAjZLecmK7YVOdg8NsbPVk+zV2S5oJfKrQXxgRLwLLgb+UVJkcFfzuOFd/HOgG/kJShaTfTNa9O9nWOyU1RkQfsJfsqTkk/Y6kM5I+lj1kL08eHPMbzBIODrOxfQGYAWwHHgN+dJK+953A64EdwKeBb5G93+SoIqKXbFBcSrbmvwPeExG/TBZ5N7AhOe32vuR7ABYD/wLsA34O/F1EPDRhe2NTktwPZla6JH0L+GVEFPyIx2y8fMRhVkIknS/pdEllki4BriDbJ2FWMnznuFlpmQN8j+x9HJuAP4mIFcUtyWwkn6oyM7O8+FSVmZnlZVqcqmpubo6FCxcWuwwzs0nliSee2B4RLaPbp0VwLFy4kOXLlxe7DDOzSUXSi2O1+1SVmZnlxcFhZmZ5cXCYmVleHBxmZpYXB4eZmeXFwWFmZnlxcJiZWV4cHEdx78rNfOOxMS9jNjObthwcR/HA6q3c/rNfFbsMM7OS4uA4ivbWNBt3HmD7vmM+R8fMbNooaHBIukTSWknrJN00xvz3SXpa0kpJ/yppSc68jyXrrZX02+Pd5kTKtDUBsPKl3YX8GjOzSaVgwSEpBdxG9lGWS4BrcoMh8Y8R8eqIaAc+C3wuWXcJcDVwDnAJ8HeSUuPc5oQ5d14j5WVixcZdhfoKM7NJp5BHHBcA6yJiffI85LvJPs1sWETszZmsBYYeDnIFcHdE9ETEC8C6ZHvH3OZEmlGZ4lVzG1jhIw4zs2GFDI75wMac6U1J2wiS/kzSr8gecfz5MdYd1zaT7V4vabmk5Z2dnce9E5m2NE9t3M3AoB94ZWYGJdA5HhG3RcTpwEeB/zaB270jIjoioqOl5bDh5Mct05Zmf+8Az2/rmqjSzMwmtUIGx2agNWd6QdJ2JHcDVx5j3Xy3ecLaW91BbmaWq5DBsQxYLGmRpEqynd1LcxeQtDhn8j8CzyeflwJXS6qStAhYDPxiPNucaAtn1ZCuqXA/h5lZomBPAIyIfkk3AA8AKeCrEbFa0s3A8ohYCtwg6WKgD9gFXJusu1rSt4E1QD/wZxExADDWNgu1D8n3kWlN+8oqM7NEQR8dGxH3AfeNavtkzucPHGXdW4BbxrPNQsu0NfHwc53sPdhHQ3XFyfxqM7OSU/TO8ckg05YmAlZt3FPsUszMis7BMQ6vaU0jwYqXfLrKzMzBMQ4N1RWc3lLHyo27i12KmVnROTjGKdtBvpsI3whoZtObg2OcMm1N7Nzfy0s7u4tdiplZUTk4xinTlgbw/RxmNu05OMbpzNn11FSm3EFuZtOeg2OcUmXivAWNrHAHuZlNcw6OPGTamljz8l4O9g0UuxQzs6JxcOQh05qmfzBY/bJvBDSz6cvBkYd2d5CbmTk48nFKfTULmmY4OMxsWnNw5CnT1uQrq8xsWnNw5Km9Nc3Lew7yyt6DxS7FzKwoHBx58o2AZjbdOTjydM68BipTZX6wk5lNWw6OPFWVp1gyr8FHHGY2bTk4jkOmLc2qTbvpHxgsdilmZiedg+M4ZNqaONg3yC+3dhW7FDOzk87BcRwyrWkAP9jJzKalggaHpEskrZW0TtJNY8y/UdIaSask/UTSqUn7myStzHkdlHRlMu9rkl7ImddeyH0Yy4KmGTTXVbqfw8ympfJCbVhSCrgNeCuwCVgmaWlErMlZbAXQERHdkv4E+Czw9oh4CGhPtjMTWAc8mLPeRyLinkLVfiySaG9t8pVVZjYtFfKI4wJgXUSsj4he4G7gitwFIuKhiBh6pN5jwIIxtnMVcH/OciUh05Zmfed+dnf3FrsUM7OTqpDBMR/YmDO9KWk7kvcC94/RfjXwzVFttySntz4vqWqsjUm6XtJyScs7OzvzqXtchm4EdD+HmU03JdE5LuldQAdw66j2ucCrgQdymj8GnA2cD8wEPjrWNiPijojoiIiOlpaWCa/5vAVpJN9BbmbTTyGDYzPQmjO9IGkbQdLFwCeAyyOiZ9TstwHfj4i+oYaI2BJZPcCdZE+JnXR1VeWcNbveRxxmNu0UMjiWAYslLZJUSfaU09LcBSRlgNvJhsa2MbZxDaNOUyVHIUgScCXwzMSXPj6ZtjQrN+5mcDCKVYKZ2UlXsOCIiH7gBrKnmZ4Fvh0RqyXdLOnyZLFbgTrgO8mltcPBImkh2SOWR0Zt+i5JTwNPA83Apwu1D8eSaW1iz4E+Xtixv1glmJmddAW7HBcgIu4D7hvV9smczxcfZd0NjNGZHhFvnsAST0juSLmnt9QVtxgzs5OkJDrHJ6vTW+qoryr3g53MbFpxcJyAsjLxmta0r6wys2nFwXGCMm1p1r7SRXdvf7FLMTM7KRwcJyjTlmZgMHh6055il2JmdlI4OE5Qe2sTACt8P4eZTRMOjhM0s7aShbNq3EFuZtOGg2MCZNqaWPHSbiJ8I6CZTX0OjgnQ3ppmW1cPW/YcLHYpZmYF5+CYALk3ApqZTXUOjglw9pwGqsrL3M9hZtOCg2MCVJaX8er5jb6yysymBQfHBMm0pXl68x56+weLXYqZWUE5OCZIe2sTvf2DPLtlb7FLMTMrKAfHBPGjZM1sunBwTJC5jdXMbqhyB7mZTXkOjgkiiUxrkzvIzWzKc3BMoExbmhd3dLNj3+hHp5uZTR0OjgmUacsOeOh+DjObyhwcE+jV8xtJlcl3kJvZlFbQ4JB0iaS1ktZJummM+TdKWiNplaSfSDo1Z96ApJXJa2lO+yJJjyfb/JakykLuQz5mVKY4e069jzjMbEorWHBISgG3AZcCS4BrJC0ZtdgKoCMizgPuAT6bM+9ARLQnr8tz2v8K+HxEnAHsAt5bqH04Hpm2NCs37mZg0CPlmtnUVMgjjguAdRGxPiJ6gbuBK3IXiIiHIqI7mXwMWHC0DUoS8GayIQPw98CVE1n0icq0NrGvp59fde4rdilmZgVRyOCYD2zMmd6UtB3Je4H7c6arJS2X9JikK5O2WcDuiBh6wPextnnSHRop1/dzmNnUVBKd45LeBXQAt+Y0nxoRHcA7gC9IOj3PbV6fBM/yzs7OCaz26BY119I4o8Id5GY2ZRUyODYDrTnTC5K2ESRdDHwCuDwihm+AiIjNyft64GEgA+wA0pLKj7bNZL07IqIjIjpaWlpOfG/GSRLtrWl3kJvZlFXI4FgGLE6ugqoErgaW5i4gKQPcTjY0tuW0N0mqSj43AxcCayL7bNaHgKuSRa8F7i3gPhyXTFuata90sa+n/9gLm5lNMgULjqQf4gbgAeBZ4NsRsVrSzZKGrpK6FagDvjPqsttXAcslPUU2KD4TEWuSeR8FbpS0jmyfx1cKtQ/HK9PWRASs8lGHmU1B5cde5PhFxH3AfaPaPpnz+eIjrPfvwKuPMG892Su2Slb7gjQAKzbu5g1nNBe3GDOzCVYSneNTTWNNBae31PrKKjObkhwcBdLe2sSKl3aT7ZYxM5s6HBwFkmlLs2N/L5t2HSh2KWZmE8rBUSBDNwI+6dNVZjbFODgK5KzZ9cyoSPlGQDObchwcBVKeKuO8BY1+IqCZTTkOjgLKtDWx5uU9HOwbKHYpZmYTxsFRQO2tafoGgtUv7y12KWZmE8bBUUBDHeQet8rMphIHRwHNbqhmfnqGbwQ0synFwVFg7W1pX1llZlOKg6PAMq1pNu8+wLa9B4tdipnZhHBwFFimrQnAl+Wa2ZTh4Ciwc+Y1UJGST1eZ2ZTh4Ciw6ooUS+Y2sHKjO8jNbGpwcJwEmbYmVm3aQ//AYLFLMTM7YQ6OkyDTlqa7d4DnXtlX7FLMzE6Yg+MkyLQOdZD7dJWZTX4OjpOgdeYMZtZWuoPczKYEB8dJIIlMa9pDj5jZlFDQ4JB0iaS1ktZJummM+TdKWiNplaSfSDo1aW+X9HNJq5N5b89Z52uSXpC0Mnm1F3IfJkqmLc26bfvYc6Cv2KWYmZ2QggWHpBRwG3ApsAS4RtKSUYutADoi4jzgHuCzSXs38J6IOAe4BPiCpHTOeh+JiPbktbJQ+zCRhm4EfMpHHWY2yRXyiOMCYF1ErI+IXuBu4IrcBSLioYjoTiYfAxYk7c9FxPPJ55eBbUBLAWstuPMWNCLhfg4zm/TGFRySaiWVJZ/PlHS5pIpjrDYf2JgzvSlpO5L3AveP8d0XAJXAr3Kab0lOYX1eUtURar5e0nJJyzs7O49RauHVV1dw5in1vrLKzCa98R5x/AyoljQfeBB4N/C1iSpC0ruADuDWUe1zgX8A/iAihu6e+xhwNnA+MBP46FjbjIg7IqIjIjpaWkrjYKU96SCPiGKXYmZ23MYbHEpOKf0e8HcR8fvAOcdYZzPQmjO9IGkbuWHpYuATwOUR0ZPT3gD8EPhERDw21B4RWyKrB7iT7CmxSSHTlmZ3dx8bdnQfe2EzsxI17uCQ9HrgnWT/MgdIHWOdZcBiSYskVQJXA0tHbTQD3E42NLbltFcC3we+HhH3jFpn7lBBwJXAM+Pch6IbHinXD3Yys0lsvMHxQbKniL4fEaslnQY8dLQVIqIfuAF4AHgW+Hay7s2SLk8WuxWoA76TXFo7FCxvAy4Crhvjstu7JD0NPA00A58e5z4U3Rmn1FFXVe4OcjOb1JTv+fakk7wuIvYWpqSJ19HREcuXLy92GQC888uPsedAHz94/xuLXYqZ2VFJeiIiOka3j/eqqn+U1CCpluypoTWSPjLRRU4HmdYmnt3SxYHegWKXYmZ2XMZ7qmpJcoRxJdlLZheRvbLK8tTemmZgMHh6855il2JmdlzGGxwVyX0bVwJLI6IP8DWlx6G9LQ3gBzuZ2aQ13uC4HdgA1AI/S8aUmjR9HKWkua6Ktpk17iA3s0lrXMEREV+MiPkRcVlyD8WLwJsKXNuUlWlLOzjMbNIab+d4o6TPDQ3hIelvyB592HHItKbZuvcgW/YcKHYpZmZ5G++pqq8CXWTvr3gb2dNUdxaqqKmuffhGwN3FLcTM7DiMNzhOj4hPJSPdro+I/wGcVsjCprIlcxuoLC/zg53MbFIab3AckPTrQxOSLgR8nuU4VZaXce68Bg89YmaTUvk4l3sf8HVJjcn0LuDawpQ0PWTamvjGYy/SNzBIRcpP8DWzyWO8V1U9FRGvAc4DzouIDPDmglY2xWXa0vT0D/LLLV3FLsXMLC95/VM3IvbmjFF1YwHqmTaGR8r1jYBmNsmcyDkSTVgV09C8xmpa6qt8ZZWZTTonEhwecuQESCKTPBHQzGwyOWrnuKQuxg4IATMKUtE0kmlr4sE1r7Brfy9NtZXFLsfMbFyOesQREfUR0TDGqz4ixntFlh1BZnjAw91FrcPMLB++DrSIzlvQSJn8KFkzm1wcHEVUU1nO2XMaWOEjDjObRBwcRdbelmblS7sZHPS1BmY2OTg4iizTmqarp5/12/cVuxQzs3EpaHBIukTSWknrJN00xvwbJa2RtErST5IHRA3Nu1bS88nr2pz2/yDp6WSbX5Q0qe8nGboR8Enfz2Fmk0TBgkNSCrgNuBRYAlwjacmoxVYAHRFxHnAP8Nlk3ZnAp4DXAhcAn5LUlKzzJeCPgMXJ65JC7cPJcFpzLQ3V5b4R0MwmjUIecVwArEuGYe8F7gauyF0gIh6KiO5k8jFgQfL5t4EfR8TOiNgF/Bi4RNJcoCEiHouIAL5O9jnok1ZZmWhva/KVVWY2aRQyOOYDG3OmNyVtR/Je4P5jrDs/+XzMbUq6fuiJhZ2dnXmWfnK1t6Z57pUu9vX0F7sUM7NjKonOcUnvAjqAWydqmxFxR0R0RERHS0vLRG22IDJtaQYDVm3aXexSzMyOqZDBsRlozZlekLSNIOli4BPA5RHRc4x1N3PodNYRtznZtC9IA76D3Mwmh0IGxzJgsaRFkiqBq4GluQtIygC3kw2NbTmzHgB+S1JT0in+W8ADEbEF2CvpdcnVVO8B7i3gPpwUTbWVnNZc6w5yM5sUCjbeVET0S7qBbAikgK9GxGpJNwPLI2Ip2VNTdcB3kqtqX4qIyyNip6T/STZ8AG6OiJ3J5z8FvkZ2kMX7OdQvMqm1t6X52XPbiQgm+RXGZjbFFXSgwoi4D7hvVNsncz5ffJR1vwp8dYz25cC5E1hmSci0NfG9JzezadcBWmfWFLscM7MjKonOccveQQ543CozK3kOjhJx1px6qivKWOl+DjMrcQ6OElGRKuO8+Wk/g9zMSp6Do4Rk2tKs3ryXnv6BYpdiZnZEDo4SkmlL0zswyJqX9xa7FDOzI3JwlJChkXJ9P4eZlTIHRwmZ3VDN3MZqX1llZiXNwVFiMm1pVrqD3MxKmIOjxGRam9i48wCdXT3HXtjMrAgcHCUm05YGPOChmZUuB0eJOXd+I+Vl8oOdzKxkOThKTHVFilfNbfCVVWZWshwcJSjTlmbVpt0MDEaxSzEzO4yDowRl2tLs7x3g+W1dxS7FzOwwDo4SlGn1jYBmVrocHCXo1Fk1NNVUuIPczEqSg6MESSLT1uQjDjMrSQ6OEtXemmZd5z72HuwrdilmZiM4OEpUpi1NBKzauKfYpZiZjVDQ4JB0iaS1ktZJummM+RdJelJSv6SrctrfJGllzuugpCuTeV+T9ELOvPZC7kOxvKY1jYT7Ocys5JQXasOSUsBtwFuBTcAySUsjYk3OYi8B1wEfzl03Ih4C2pPtzATWAQ/mLPKRiLinULWXgobqCs5oqfNIuWZWcgp5xHEBsC4i1kdEL3A3cEXuAhGxISJWAYNH2c5VwP0R0V24UktTpi3Nipd2EeEbAc2sdBQyOOYDG3OmNyVt+boa+OaotlskrZL0eUlVY60k6XpJyyUt7+zsPI6vLb5MWxO7uvt4cce0y0wzK2El3TkuaS7wauCBnOaPAWcD5wMzgY+OtW5E3BERHRHR0dLSUvBaC6G9NQ3ACj+fw8xKSCGDYzPQmjO9IGnLx9uA70fE8DWpEbElsnqAO8meEpuSzpxdT01lipW+n8PMSkghg2MZsFjSIkmVZE85Lc1zG9cw6jRVchSCJAFXAs+ceKmlKVUmXrMg7Q5yMyspBQuOiOgHbiB7mulZ4NsRsVrSzZIuB5B0vqRNwO8Dt0taPbS+pIVkj1geGbXpuyQ9DTwNNAOfLtQ+lIJMW5o1L+/lYN9AsUsxMwMKeDkuQETcB9w3qu2TOZ+XkT2FNda6GxijMz0i3jyxVZa2TFsT/YPBM5v30LFwZrHLMTMr7c5xO9RBvmyDO8jNrDQ4OEpcS30V58xr4K8fXMv/uv9ZDvT6lJWZFZeDYxK46w9fy1W/toDbH1nPWz//CA+v3VbsksxsGnNwTALpmkr+6qrz+Nb1r6OyvIzr7lzG+7+5gs6unmKXZmbTkINjEnntabO4/wNv5IMXL+aBZ7bylr95mG/+4iUG/WxyMzuJHByTTFV5ig9efCb3f/CNvGpuAx/73tO8/Y6f8/wrfj65mZ0cDo5J6vSWOu6+/nV89qrzeH7bPi774qP8zYNrfb+HmRWcg2MSk8TbOlr5yY2/we+eN4///dN1XPq3j/Lv67YXuzQzm8IcHFPArLoqPvf2dr7x3tcyGME7vvw4N357JTv39xa7NDObghwcU8ivL27mgQ9exJ+96XSWrnyZt/zNw9zzxCY/z8PMJpSDY4qprkjxkd8+mx/++Rs5raWOD3/nKd7x/x5nfee+YpdmZlOEg2OKOmtOPd/549dzy386l2de3sMlf/soX/zJ8/T2H+1hi2Zmx+bgmMLKysQ7X3sqP7nxN3jrktl87sfPcdkXH2XZhp3FLs3MJjEHxzRwSkM1t73j17jzuvM50DvA7//fn3PTd1exp7vv2CubmY3i4JhG3nT2Kfz4xou4/qLT+M4Tm3jL5x7m3pWb3XluZnlxcEwzNZXlfPyyV7H0hguZl57BB+5eybV3LmPjzu5il2Zmk4SDY5o6Z14j3//TC/nU7y7hiQ07eevnH+FLD/+KvgF3npvZ0Tk4prFUmfiDCxfxLx/6DS5a3MJf/eiX/O7//ldWvOSHRpnZkTk4jLmNM7jjPR3c/u7/wO7uPn7vS//OJ+99hr0H3XluZodzcNiw3z5nDj++8SKuff1C/uGxF3nr5x7h/qe3uPPczEYoaHBIukTSWknrJN00xvyLJD0pqV/SVaPmDUhambyW5rQvkvR4ss1vSaos5D5MN/XVFfzl5efwT396ITNrq/iTu57kj76+nM27DxS7NDMrEQULDkkp4DbgUmAJcI2kJaMWewm4DvjHMTZxICLak9flOe1/BXw+Is4AdgHvnfDijde0pvnnGy7k45edzb+t28FbP/cIX350PXsO+PSV2XRXXsBtXwCsi4j1AJLuBq4A1gwtEBEbknnjupRHkoA3A+9Imv4e+EvgSxNVtB1Snirj+otO59Jz5/Lf732GT//wWT79w2eZ21jNmbPrOWtOffZ9dj1nnFLHjMpUsUs2s5OgkMExH9iYM70JeG0e61dLWg70A5+JiH8CZgG7I6I/Z5vzx1pZ0vXA9QBtbW35VW4jtM6s4c7rzufn63ewatMe1m7tYu3WLn6+fsfw2FcSLJxVy5mz6zhrdj1nzskGysLmWipS7kozm0oKGRwn6tSI2CzpNOCnkp4G9ox35Yi4A7gDoKOjw727J0gSbzi9mTec3jzc1j8wyIs7u3luaxdrX+niuVeygfLjNa8w9Bj0ipQ4vaVu+AjlrOR9fnoGZWUq0t6Y2YkoZHBsBlpzphckbeMSEZuT9/WSHgYywHeBtKTy5Kgjr23axCpPlXF6Sx2nt9Rx6avnDrcf7BvgV537kiDJvj/x4i6WPvXy8DI1lSkWz67nrNkjQ6WlvorsGUkzK1WFDI5lwGJJi8j+5X41h/omjkpSE9AdET2SmoELgc9GREh6CLgKuBu4Fri3INXbcauuSHHOvEbOmdc4or3rYB/Pb9s3fISydmsXP/3lNr69fNPwMk01FSP7T+bUc+Yp9TTWVJzs3TCzI1Ahr9GXdBnwBSAFfDUibpF0M7A8IpZKOh/4PtAEHAS2RsQ5kt4A3A4Mkr3y6wsR8ZVkm6eRDY2ZwArgXRHRc7Q6Ojo6Yvny5QXZRztx2/f18NwrXUmg7Bv+3NXTP7zMnIbqpN8ke4RyWkstp86qZVZtpY9QzApE0hMR0XFY+3S4ucvBMflEBFv2HBw+Mhk6Snl+274RD6OqrypnYXNt9jWrhoWzDn2e6VAxOyFHCo5S7hy3aUwS89IzmJeewZvOOmW4fWAweGlnNxu27+eF7ft5ccd+XtjRzVMbd/PDVS8Pd8oD1FeXs6g5e2SyaFYNC4c+N9fSVFPhUDE7Tg4Om1RSZWJRc/Yv/zeNmtfbP8imXd1s2LGfF7Z3Z0Nl+35Wbtx1WKg0VCdHKskRyqLmmiRgammq9WAEZkfj4LApo7K8jNNa6jitpe6web39g2zclXukkg2YJ1/axQ9GhUrjjIrsaa/hYMmeAlvUXEu6xqFi5uCwaaGy/NClw6P19A+wcecBNmzfz4YdyWt79/AlxLndgOmaiuFTX22zapnXWM3sxmrmNlYzt2EGDTPKfQrMpjwHh017VeUpzjiljjNOOVKodI849bVhx36WbdjFvaNCBWBGRYo5jdXMaciGyZyhV0M1cxtnMKexmlm1lb750SY1B4fZUWRDpZ4zTqk/bF7fwCDbunrYuucAW/YcZGvy2rI3+/74Czt5Ze9B+gdHpktFSpxSnw2W2Y3VzG3Ihks2WKqY0ziDU+qrPFSLlSwHh9lxqkiVMT89g/npGUdcZnAw2L6/ZzhUtu49yJY9B3llT/Z9zct7+cmzr3Cwb+Q4nxK01FWNOnpJgqVhxvDRTHWFB5a0k8/BYVZAZWXZo4tT6qs5b8HYy0QEew/0s2XvyCOXoaOXDTv289j6Hew92H/YuumaCprrqqitTFFbVU5NZTm1Vanse9I2PF2VorayPFkuNfxel6xXWe4jHBsfB4dZkUmisaaCxpoKzp7TcMTl9vf0s3XvwVFHLwfYub+X/T0DdPf28/LuA+zv7R+e7u4dGHcdFSmNCJyaquznmspy6qpSI6Zrq5JQqjwUQo0zKjilvoqZtZWU+zTblObgMJskaqvKj3hl2JEMDgbdfQN09/Szv3eA/T3ZMNnf08/+3n66eway770D7OvpH16uOwmf/T397Oo+MDw9njCSYGZNJS31VbTUV9Fcl31vqauiub6SlrrqpL2SphpfKDAZOTjMprCyMlFXVU5d1cT9UR8YDA6MCqN9Pf3s7u6jc18P27t66NzXQ2dXD9v39fDC9v10dvXQ03/489pSZaK5rnJUuGTfRwdPQ7UvdS4VDg4zy0vqOMIoItjX009n11Cg9NLZdTAJmt7hoPnlli627+s57Eo0yN6LMzJYKocDpjnnvb66nBmVKarLUz6aKRAHh5kVnCTqqyuor64Y887+XIODwZ4Dhx+9DL939bB59wFWbtzNjv09h91Lk6uqvGw4RGZUpqiuSDGjIts2oyI7nW3LnX9omdHzD61TdmidaRhQDg4zKyllZaKptpKm2krOnH34/TO5BgaDnft7h4Nle1cP+3v7OdA7wIG+7Otg7wAH+wYPTfcNcKB3gN3dfRzsy5mXrHM8hgIqN4yqysuoLC+jKnlVlpdRmSqjqjyV/ZzTPrqtamjZijIqU6kRy45YL5VKlik7qeHl4DCzSStVpuG+kIkQEfT0D3Kgd4CD/YfCJBs2g4c+5wTQUCD19A2OnO4fpLc/2we0c/8gvf2DSdsgvQOD9PQN0DswSN/AxDzaorxMo8IlGzhffk8HC5trJ+Q7hr9rQrdmZjaJSRo+YjhZBgcjGyT9g/T0D2SD5bCQGaR3YCB5H1p2MGfZgZHrDLUNDDKjcuL3xcFhZlZEZWWiumworCbHI5J9l46ZmeXFwWFmZnkpaHBIukTSWknrJN00xvyLJD0pqV/SVTnt7ZJ+Lmm1pFWS3p4z72uSXpC0Mnm1F3IfzMxspIL1cUhKAbcBbwU2AcskLY2INTmLvQRcB3x41OrdwHsi4nlJ84AnJD0QEbuT+R+JiHsKVbuZmR1ZITvHLwDWRcR6AEl3A1cAw8ERERuSeSPGIoiI53I+vyxpG9AC7C5gvWZmNg6FPFU1H9iYM70pacuLpAuASuBXOc23JKewPi9pYi7gNjOzcSnpznFJc4F/AP4gIoaOSj4GnA2cD8wEPnqEda+XtFzS8s7OzpNSr5nZdFDI4NgMtOZML0jaxkVSA/BD4BMR8dhQe0Rsiawe4E6yp8QOExF3RERHRHS0tLQc1w6YmdnhCtnHsQxYLGkR2cC4GnjHeFaUVAl8H/j66E5wSXMjYouy4ytfCTxzrO098cQT2yW9mGf9Q5qB7ce57lTk3+MQ/xYj+fcYaSr8HqeO1ag42tCSJ0jSZcAXgBTw1Yi4RdLNwPKIWCrpfLIB0QQcBLZGxDmS3kX2aGJ1zuaui4iVkn5KtqNcwErgfRGxr4D7sDwiOgq1/cnGv8ch/i1G8u8x0lT+PQoaHFPBVP6Pfzz8exzi32Ik/x4jTeXfo6Q7x83MrPQ4OI7tjmIXUGL8exzi32Ik/x4jTdnfw6eqzMwsLz7iMDOzvDg4zMwsLw6OozjW6L7ThaRWSQ9JWpOMWPyBYtdUCiSlJK2Q9INi11JsktKS7pH0S0nPSnp9sWsqFkn/Nflz8oykb0qqLnZNE83BcQQ5o/teCiwBrpG0pLhVFU0/8KGIWAK8Dvizafxb5PoA8GyxiygRfwv8KCLOBl7DNP1dJM0H/hzoiIhzyd7DdnVxq5p4Do4jGx7dNyJ6gaHRfaedZJiXJ5PPXWT/Ush7wMqpRNIC4D8CXy52LcUmqRG4CPgKQET05jwCYToqB2ZIKgdqgJeLXM+Ec3Ac2YSM7jvVSFoIZIDHi1xKsX0B+Atg8BjLTQeLgE7gzuTU3Zcl1Ra7qGKIiM3AX5N91tAWYE9EPFjcqiaeg8PGTVId8F3ggxGxt9j1FIuk3wG2RcQTxa6lRJQDvwZ8KSIywH5gWvYJSmoie2ZiETAPqE2GUJpSHBxHdkKj+041kirIhsZdEfG9YtdTZBcCl0vaQPYU5pslfaO4JRXVJmBTRAwdhd5DNkimo4uBFyKiMyL6gO8BbyhyTRPOwXFkw6P7JqP1Xg0sLXJNRZGMRPwV4NmI+Fyx6ym2iPhYRCyIiIVk/7/4aURMuX9VjldEbAU2SjoraXoLOU/6nGZeAl4nqSb5c/MWpuCFAoUcVn1Si4h+STcAD3BodN/Vx1htqroQeDfwtKSVSdvHI+K+4pVkJeb9wF3JP7LWA39Q5HqKIiIel3QP8CTZqxFXMAWHHvGQI2ZmlhefqjIzs7w4OMzMLC8ODjMzy4uDw8zM8uLgMDOzvDg4zI5B0r7kfaGkd0zwtj8+avrfJ3L7ZoXg4DAbv4VAXsGRDHR3NCOCIyKm3F3GNvU4OMzG7zPAGyWtTJ65kJJ0q6RlklZJ+mMASb8p6VFJS0nuoJb0T5KeSJ7TcH3S9hmyo6iulHRX0jZ0dKNk289IelrS23O2/XDOsy/uSu5QRtJnkmemrJL01yf917Fpw3eOm43fTcCHI+J3AJIA2BMR50uqAv5N0tBIqL8GnBsRLyTT/yUidkqaASyT9N2IuEnSDRHRPsZ3/R7QTvbZFs3JOj9L5mWAc8gO1/1vwIWSngX+E3B2RISk9MTuutkhPuIwO36/BbwnGYblcWAWsDiZ94uc0AD4c0lPAY+RHTxzMUf368A3I2IgIl4BHgHOz9n2pogYBFaSPYW2BzgIfEXS7wHdJ7hvZkfk4DA7fgLeHxHtyWtRzrMX9g8vJP0m2VFTXx8RryE7ftGJPE60J+fzAFAeEf1kHz52D/A7wI9OYPtmR+XgMBu/LqA+Z/oB4E+SIeeRdOYRHmDUCOyKiG5JZ5N9/O6QvqH1R3kUeHvSj9JC9gl7vzhSYcmzUhqTgSf/K9lTXGYF4T4Os/FbBQwkp5y+RvY52wuBJ5MO6k7gyjHW+xHwvqQfYi3Z01VD7gBWSXoyIt6Z0/594PXAU0AAfxERW5PgGUs9cK+karJHQjce1x6ajYNHxzUzs7z4VJWZmeXFwWFmZnlxcJiZWV4cHGZmlhcHh5mZ5cXBYWZmeXFwmJlZXv4//lzH8IMQHB8AAAAASUVORK5CYII=\n",
       "text/plain": [
        "<Figure size 432x288 with 1 Axes>"
       ]
@@ -483,7 +485,7 @@
    "outputs": [
     {
      "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEWCAYAAAB8LwAVAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAAo3UlEQVR4nO3deXxddZ3/8dcnW5OmbbokXUiXtHRnLYRSBCq0LBWQVaEgiz8XRkeY0XEZdNRxYFTGUdFRRgcRF0CQRZjKIFvCJhZoS8vSJt2XJKVJuiZps+fz++Oe0tv2Jr1tc3PuTd7Px+M+mrPdfO5Fzzvn+z3n+zV3R0RE5EBpYRcgIiLJSQEhIiIxKSBERCQmBYSIiMSkgBARkZgUECIiEpMCQkREYlJASMozs4aoV4eZNUYtf+II3u8lM/tMImoVSSUZYRcgcrTcfcDen81sA/AZd38hvIoSy8wy3L0t7Dqk99MVhPRaZpZmZreZ2Voz22Zmj5jZ0GBbtpk9EKzfaWaLzGyEmX0XOBv4eXAF8vNO3vtRM9tiZrvM7BUzOy5qW46Z/cjMNgbb/2pmOcG2s8zsb8HvrDCzTwbr97tqMbNPmtlfo5bdzL5gZquB1cG6nwbvUWdmS8zs7Kj9083sG8Fnrw+2jzGzu83sRwd8lgVm9qWj/8alt1FASG92K3A58GHgGGAHcHew7SYgDxgDDAM+BzS6+78ArwK3uPsAd7+lk/f+CzAJGA68BTwYte2HwKnAh4ChwNeADjMbFxz3M6AAOBlYdhif53LgdGB6sLwoeI+hwB+AR80sO9j2T8C1wEXAIOBTwB7gd8C1ZpYGYGb5wHnB8SL7UROT9GafI3KirwQws+8Am8zsBqCVSDBMdPd3gCWH88buft/en4P33WFmeUA9kZPxLHevCnb5W7DfdcAL7v5QsH5b8IrX9919e1QND0Rt+5GZfROYArwNfAb4mruvDLa/vfd3mtkuYC7wPDAfeMndqw+jDukjdAUhvdk44ImgOWcnUAa0AyOA+4FngYfNbLOZ/cDMMuN506D55s6g+aYO2BBsyg9e2cDaGIeO6WR9vCoOqOMrZlYWNGPtJHJFlB/H7/odcH3w8/VEvguRgyggpDerAD7i7oOjXtnuXuXure7+b+4+nUhT0CXAjcFxhxri+DrgMiJNM3lAUbDegK1AE3BsJ/XEWg+wG+gftTwyxj4f1BX0N3wNuBoY4u6DgV1BDYf6XQ8Al5nZScA04MlO9pM+TgEhvdkvge8Gbf+YWYGZXRb8fK6ZnWBm6UAdkSanjuC4amBCF+87EGgm0jzUH/je3g3u3gHcB/zYzI4JrjbOMLN+RPopzjOzq80sw8yGmdnJwaHLgCvNrL+ZTQQ+fYjPNhBoA2qBDDP7NpG+hr3uBe4ws0kWcaKZDQtqrCTSf3E/8Li7Nx7id0kfpYCQ3uynwALgOTOrB14n0skLkb/QHyMSDmXAy+xravkp8DEz22Fm/xXjfX8PbASqgBXB+0b7CvAukZPwduA/gDR330Sk0/jLwfplwEnBMXcBLUTC6Xfs3+kdy7PAM8CqoJYm9m+C+jHwCPBc8Bl/DeREbf8dcAJqXpIumCYMEul7zGw2kaamca6TgHRCVxAifUzQGf+PwL0KB+mKAkKkDzGzacBOYBTwk1CLkaSnJiYREYlJVxAiIhJTr3mSOj8/34uKisIuQ0QkpSxZsmSruxfE2tZrAqKoqIjFixeHXYaISEoxs42dbVMTk4iIxKSAEBGRmBQQIiISU0IDwszmmdlKM1tjZrfF2D7WzF40s6Vm9o6ZXRS17evBcSvN7MJE1ikiIgdLWCd1MAja3cD5QCWwyMwWuPuKqN2+CTzi7r8ws+nA00BR8PN84DgiE728YGaT3b09UfWKiMj+EnkFMRNY4+7r3L0FeJjIEMnRnH0jUOYBm4OfLwMedvdmd18PrAneT0REekgiA6KQ/UeXrAzWRfsOcL2ZVRK5erj1MI4VEZEECvs5iGuB37r7j8zsDOB+Mzs+3oPN7GbgZoCxY8cmqEQRkeTS1t5B5Y5G1m/dzbqtu8nJTOe607v/HJjIgKgiMu3hXqODddE+DcwDcPeFwYTr+XEei7vfA9wDUFxcrEGlRKTXcHdqG5pZX7v7gyBYV7ub9Vsb2LR9D63t+055p4wdnHIBsQiYZGbjiZzc5xOZqjHaJiKTp/82GGUym8gMWQuAP5jZj4l0Uk8C3kxgrSIioWhobmPDBwHQwPqtkUBYX7ub+ua2D/bLykijaFh/Jg4fwAXHjWR8fi4T8nOZUDCAIf3jmk79sCUsINy9zcxuITLzVTpwn7svN7PbgcXuvoDIzFq/MrMvEemw/mQwPv1yM3uEyGxdbcAXdAeTiKSq1vYOKrbviVwJ1EbCYP3WSBhU1zV/sJ8ZHJOXw4SCXK48pZDx+bmMLxjAhPxcjhmcQ3qadfFbul+vGe67uLjYNRaTiITF3ampbw4CoGG/pqFN2/fQ3rHvXDs0Nyty8g9exxbkMj5/AOOG9Sc7M71H6zazJe5eHGtb2J3UIiIpoaWtg227m6mt3/favKspuCqIXA3sadnX0JGdmUbRsFymjRrIxSeMCq4GIs1Cg/tnhfhJ4qeAEJE+q73D2bGnZb+Tfm1DM1uDf6PX7dzTetDxaQajh/RnfH4uM8cPZUJ+5EpgfEEuowZlk9bDTULdTQEhIr2Ku1PX1Lbfyb22vpmtDc0HBcG2hmY6YrSy52SmM3xQP/IH9OPYggHMmjCMgoH9Iq8B/ciP+jkro/cOaaeAEJGU0tDcxmtrtlJT10RtQ0vMv/xb2joOOi4z3cgfEDmxj8rL5sTReR+c9PeuLwj+ze2nUyMoIEQkRayqruf+hRv501uV7A7a+s1gWG7WByf4Ywty9zvRf/DvwH7k5WRiltpNPj1NASEiSau1vYPnlldz/+sbeH3ddrLS07jkxFFcfdoYJuTnMjQ3i4z03tvEEzYFhIgknZq6Jv7w5iYeenMT1XXNFA7O4Z/nTeXq4tEMG9Av7PL6DAWEiCQFd+eN9du5f+FGnl2+hbYO58OTC/ju5eM4d+rwHn9ITBQQIhKyhuY2nnirkvtf38iq6gYGZWfwyQ8Vcf2scRTl54ZdXp+mgBCRUKyuruf+1zfyp7eqaGhu4/jCQfzgqhP56EnHkJPVs08TS2wKCBHpMa3tHTy/oprfL9y/0/n6M8YxY8xg3WWUZBQQIpJwNXVNPPRmBX94c+MHnc5fmzeFa4rHqNM5iSkgRCQh3J0312/n969v5Nn3Ip3Os9XpnFIUECLSrRqa23hiaRUPLNzIyup6BmVncFPQ6Txenc4pRQEhIt1iTU3kSefHg07n444ZxH9cdQKXnlSoTucUpYAQSXItbR0sXLeN5tZ2BuVkMig7k0E5GeTlZJKblRHqiKFtH3Q6b2Thum1kpadx8YmjuH7WOE4Zq07nVKeAEElCHR3Okk07eGJpFU+/+37MoaYhMtz0wKjAGJQdeeXlRNZFwqST5exMsjPTjugkvrfT+aE3N7GlronCwTl89cIpXHPaGPLV6dxrKCBEksiamnqeXLqZJ5dVUbmjkZzMdC48bgSXnVxIwcB+1DW1UtfYSl1jG3VNrexqDJab2qhrjCyv29pAXWMbuxpbaWzteqbezHT7IFAG5mQyKDtjvwDZGyqRgMmko8N5/K1Kngk6nc+elM8dlx/PHHU690oKCJGQ1dQ1seDtSCi8V1VHmsHZkwr4ygVTOH/6iKMaerqlrYP6vUESFSKRoDk4ZHY1tlK1o/GD9a3tB0+WsLfT+ROnj2VCwYCj+eiS5BQQIiFoaG7jueVbeGJpFa+t2UqHw4mj8/j2JdO55KRRDB+Y3S2/JysjjWED+h3RswbuTlNrx34h0tjazqnjhtA/S6eOvkD/lUV6SGt7B39dvZUnllbx3IotNLV2MGZoDrecO5HLZhRybJL9NW5m5GSlk5OVzohB3RNYkloUECIJ5O4sq9jJk0ureOqd99m2u4XB/TP52KmjuWJGIaeMHaI7fSRpKSBEEmDD1t08uayKJ5dWsWHbHvplpHHe9BFccXIhsycX9Op5jKX3UECIdJNtDc089c77PLG0imUVOzGDMyYM4+/Pnci840cyKDsz7BJFDosCQuQoNLa083xZNU8ureLlVbW0dzjTRg3iGxdN5dKTChmZp7Z7SV0KCJHD1N7h/G1tpLP52fe2sLulnVF52Xz27AlcPuMYpo4cFHaJIt1CASESB3dn+eY6nlhaxZ/f3kxNfTMDszP46EnHcPmMQmYWDQ11yAuRRFBAiHTh/V2N/OmtKp5YWsWamgYy041zpwznihmFnDt1ONmZGoROei8FhEgMDc1t/PeLa7j3r+tpaetgZtFQvnfFCVx0wkgG988KuzyRHqGAEInS3uE8tqSC/3x2FVsbmrlyRiFfPG8yY4f1D7s0kR6ngBAJLFy7jTueWsGK9+s4ddwQ7r2pmJPHDA67LJHQKCCkz9uwdTffe7qM51ZUUzg4h59dO4NLThylJ5ylz1NASJ9V19TKz0vX8JvX1pOZnsZXL5zCp88ar45nkYACQvqctvYOHl5UwY+fX8WOPS18/NTRfOWCKQzXgHQi+1FASJ/y6upa7nhqBauqGzh9/FC+dcl0ji/MC7sskaSU0IAws3nAT4F04F53v/OA7XcB5waL/YHh7j442NYOvBts2+TulyayVund1tQ08L2nyygtr2Hs0P788vpTuPC4kepnEOlCwgLCzNKBu4HzgUpgkZktcPcVe/dx9y9F7X8rMCPqLRrd/eRE1Sd9w849LfzkhdU88PpGsjPT+fpHpvLJM4vol6F+BpFDSeQVxExgjbuvAzCzh4HLgBWd7H8t8K8JrEf6kNb2Dh58fSN3vbCa+qZW5s8cyz+dP5n8I5hZTaSvSmRAFAIVUcuVwOmxdjSzccB4oDRqdbaZLQbagDvd/ckYx90M3AwwduzY7qlaUpq78+LKGr77f2Wsrd3NWRPz+eYl0zSAnsgRSJZO6vnAY+7eHrVunLtXmdkEoNTM3nX3tdEHufs9wD0AxcXFB8+uLn3Kqup67nhqBa+u3sqE/Fx+fVMxc6YOVz+DyBFKZEBUAWOilkcH62KZD3wheoW7VwX/rjOzl4j0T6w9+FDp67Y1NHPXC6v4wxubGNAvg29dMp0bZo3TrG0iRymRAbEImGRm44kEw3zgugN3MrOpwBBgYdS6IcAed282s3zgTOAHCaxVUlBLWwe/X7iBn5asZk9LOzfMGscXz5vMkFwNpifSHRIWEO7eZma3AM8Suc31Pndfbma3A4vdfUGw63zgYXePbiKaBvyPmXUAaUT6IDrr3JY+xt15bkU133+6jA3b9nDOlAK+efE0Jg4fGHZpIr2K7X9eTl3FxcW+ePHisMuQBFuxuY47nlrBwnXbmDh8AN+8eBrnTBkedlkiKcvMlrh7caxtydJJLdKl2vpmfvTcSv64uILBOZncftlxXDdzLBnp6mcQSRQFhCS1ptZ27nttPf/94lqaWtv59JnjuXXOJPL6Z4Zdmkivp4CQpOTuPP3uFr7/lzIqdzRy3rQRfOOiqUwoGBB2aSJ9hgJCksr23S08ubSKRxZXUL6lnqkjB/LgZ07nzIn5YZcm0ucoICR07R3Oq6treXRxJc+vqKalvYMTR+fxg6tO5KpTR5OepgfdRMKggJDQbNq2h0eXVPDYkkre39XEkP6ZfGLWWK4uHsO0URoaQyRsCgjpUU2t7Tzz3hb+uKiCheu2YQazJxXwzYunc9704RplVSSJKCAk4dydd6t28cdFFSx4ezP1TW2MGZrDl8+fzFWnjuaYwTlhlygiMSggJGEO7HDul5HGRSeM4uPFo5k1fhhp6lsQSWoKCOlWezucH1lcwfMrqmltd04ance/X348Hz3pGPJy9PyCSKpQQEi3iNXhfMOsIq4+bbTmYhBJUQoIOWKNLe08s/x9/riogtfXbSfNYPbkAr59yXTmThuh4bZFUpwCQg6Lu/NO5S4eWVzBgmWbqW9uY+zQ/nzlgkiH86g8dTiL9BYKCInL9t0tPLG0ikeDDufszDQuOn4UHy8ew+njh6rDWaQXUkBIp9o7nFdW1/JodIfzmMF894pIh/OgbHU4i/RmCgg5yMZtu3l0cSWPLalkS10TQ3OzuPGMIq4uHsOUkZqUR6SvUEDIfp55730+98BbpBl8eHIB37l0OnOmqsNZpC9SQMh+HllcSeHgHB7//IcYmZcddjkiEiL9WSgfaGxp57U1W7nguBEKBxFRQMg+f1u7lea2DuZOHRF2KSKSBDptYjKzK+M4vsndn+7GeiREJeU15GalM3P80LBLEZEk0FUfxK+A/wW6usF9NqCA6AXcndKyGmZPLlCHtIgAXQfEX9z9U10dbGYPdHM9EpLlm+vYUtfEnKnDwy5FRJJEp38quvv1hzo4nn0kNZSW12AG50xRQIhIRNxtCWY20cweMLPHzeyMRBYlPa+kvIaTRg+mYGC/sEsRkSTRaUCY2YH3Od4BfB34IvCLBNYkPay2vpm3K3Zy3jRdPYjIPl1dQfzZzG6MWm4FioBxQHsii5Ke9eLKGgDm6PZWEYnSVUDMAwaZ2TNmNhv4CnAhcAXwiZ4oTnpGSVk1o/KymTZK4yyJyD6d3sXk7u3Az83sfuBbwOeBb7r72p4qThKvua2dV1dv5YoZhZhpyG4R2aerB+VOB74KtADfAxqB75pZFXCHu+/skQolod5Yt509Le3MVf+DiBygq+cg/ge4CBgA/MbdzwTmm9mHgT8SaW6SFFdaXkN2ZhofOjY/7FJEJMl0FRBtRDqlc4lcRQDg7i8DLye2LOkJ7k5JeTVnHptPdmZ62OWISJLpqpP6OuAqYA5wYxf7SYpaU9NAxfZG5k7T3UsicrCuOqlXAV/uwVqkh5WU7729Vf0PInKwrh6Ue+pQBx9qHzObZ2YrzWyNmd0WY/tdZrYseK0ys51R224ys9XB66ZD1SKHr6SsmuOOGaS5H0Qkpq76IM4yswVdbDdgeqcbzdKBu4HzgUpgkZktcPcVe/dx9y9F7X8rMCP4eSjwr0Ax4MCS4Ngdh/5IEo8du1tYsnEHt5w7MexSRCRJdRUQl8VxfEsX22YCa9x9HYCZPRy854pO9r+WSChA5A6p5919e3Ds80Qe3HsojpokDi+vqqXDYY76H0SkE131QRztnUqFQEXUciVweqwdzWwcMB4o7eLYwhjH3QzcDDB27NijLLdvKSmvIX9AFicW5oVdiogkqWSZGWY+8Fjw9Hbc3P0edy929+KCgoIEldb7tLZ38PLKGs6dMpy0ND09LSKxJTIgqoAxUcujg3WxzGf/5qPDOVYO05KNO6hratPT0yLSpUMGhJl91MyOJEgWAZPMbLyZZREJgYM6vc1sKjAEWBi1+lngAjMbYmZDgAuCddINSstryEpP46xJuuoSkc7Fc+K/BlhtZj8ITuZxcfc24BYiJ/Yy4BF3X25mt5vZpVG7zgcednePOnY7kfknFgWv2/d2WMvRe6GsmtMnDGVAv67uURCRvu6QZwh3v97MBhG5y+i3ZubAb4CH3L3+EMc+DTx9wLpvH7D8nU6OvQ+471D1yeFZv3U362p3c+OscWGXIiJJLq6mI3evAx4DHgZGEZkT4q3g2QVJIaXlmhxIROITTx/EpWb2BPASkAnMdPePACehoThSTml5NZOGD2DssP5hlyIiSS6eRuirgLvc/ZXole6+x8w+nZiyJBHqm1p5Y912Pn32+LBLEZEUEE9AfAd4f++CmeUAI9x9g7uXJKow6X6vrt5KW4czV81LIhKHePogHgU6opbbg3WSYkrKahjcP5NTxg4OuxQRSQHxBESGu0dPGNQCZCWuJEmE9g7nxZU1nDO5gIz0ZHmAXkSSWTxnitro5xbM7DJga+JKkkRYVrGT7btbNDifiMQtnj6IzwEPmtnPiQzxXYFmmEs5peXVpKcZH9bT0yISp3gelFsLzDKzAcFyQ8Krkm5XUlZD8bgh5PXPDLsUEUkRcY21YGYXA8cB2WaR0T/d/fYE1iXdqGpnI+Vb6vnGRXGPlCIiEteDcr8kMh7TrUSamD4OaJyGFKKnp0XkSMTTSf0hd78R2OHu/wacAUxObFnSnUrKqika1p9jC3LDLkVEUkg8AdEU/LvHzI4BWomMxyQpYE9LG39bu405U0ewt3lQRCQe8fRB/NnMBgP/CbwFOPCrRBYl3ee1NdtoaevQ5EAicti6DIhgoqASd98JPG5mTwHZ7r6rJ4qTo1daXs2AfhmcVjQ07FJEJMV02cTk7h3A3VHLzQqH1OHulJTVMHtyPlkZenpaRA5PPGeNEjO7ytSAnXKWb66jpr5Zdy+JyBGJJyD+jsjgfM1mVmdm9WZWl+C6pBuUlNVgBudM0dPTInL44nmSemBPFCLdr6S8mhljBpM/oF/YpYhICjpkQJjZ7FjrD5xASJJLTV0T71Tu4qsXTgm7FBFJUfHc5vrVqJ+zgZnAEmBOQiqSbvHiyr1PT+v2VhE5MvE0MX00etnMxgA/SVRB0j1Kymo4Ji+bqSPVQigiR+ZI7n2sBKZ1dyHSfZpa2/nrmq3MmTZcT0+LyBGLpw/iZ0SenoZIoJxM5IlqSVJvrN/OnpZ2zT0tIkclnj6IxVE/twEPuftrCapHukFpWTXZmWmcceywsEsRkRQWT0A8BjS5ezuAmaWbWX9335PY0uRIuDsvlNVw1sQCsjPTwy5HRFJYXE9SAzlRyznAC4kpR47WquoGqnY2anA+ETlq8QREdvQ0o8HP/RNXkhyNkvJqAM6dooAQkaMTT0DsNrNT9i6Y2alAY+JKkqNRWlbD8YWDGJmXHXYpIpLi4umD+CLwqJltJjLl6EgiU5BKktm+u4W3Nu3gljmTwi5FRHqBeB6UW2RmU4G9YzasdPfWxJYlR+LlVTV0OMzV09Mi0g0O2cRkZl8Act39PXd/DxhgZn+f+NLkcL1QVkPBwH6cUJgXdiki0gvE0wfx2WBGOQDcfQfw2YRVJEektb2DV1bWMmfKcNLS9PS0iBy9eAIiPXqyIDNLB7ISV5IciUUbtlPf3MYc3d4qIt0knoB4Bvijmc01s7nAQ8G6QzKzeWa20szWmNltnexztZmtMLPlZvaHqPXtZrYseC2I5/f1ZaVlNWSlp3HWxPywSxGRXiKeu5j+GbgZ+Hyw/Dzwq0MdFFxp3A2cT2SAv0VmtsDdV0TtMwn4OnCmu+8ws+g/fxvd/eS4PoVQWl7DrGOHkdsvnv+kIiKHdsgrCHfvcPdfuvvH3P1jwArgZ3G890xgjbuvc/cW4GHgsgP2+Sxwd9CvgbvXHF75ArCutoF1W3fr7iUR6VZxDfdtZjPM7AdmtgG4HSiP47BCoCJquTJYF20yMNnMXjOz181sXtS2bDNbHKy/vJO6bg72WVxbWxvPR+mVSss1OZCIdL9O2yPMbDJwbfDaCvwRMHc/t5t//yTgHGA08IqZnRDcNTXO3avMbAJQambvuvva6IPd/R7gHoDi4mKnjyopq2HyiAGMGaoRUESk+3R1BVFOZFrRS9z9LHf/GdB+GO9dBYyJWh4drItWCSxw91Z3Xw+sIhIYuHtV8O864CVgxmH87j5jV2MrizZsZ+40zf0gIt2rq4C4EngfeNHMfhXcwXQ4N9gvAiaZ2XgzywLmAwfejfQkkasHzCyfSJPTOjMbYmb9otafSaTvQw7w6upa2jpc/Q8i0u06DQh3f9Ld5wNTgReJjMk03Mx+YWYXHOqN3b0NuAV4FigDHnH35WZ2u5ldGuz2LLDNzFYEv+Or7r6NyJSmi83s7WD9ndF3P8k+pWU1DO6fyYyxQ8IuRUR6GXOPv+nezIYAHweucfe5CavqCBQXF/vixYsPvWMv0t7hFP/785wzZTh3XXNy2OWISAoysyXuXhxrW1x3Me3l7jvc/Z5kC4e+alnFDnbsadXdSyKSEIcVEJJcSspqSE8zZk8uCLsUEemFFBAprKSshtOKhpCXkxl2KSLSCykgUlTF9j2srK5n7lTd3ioiiaGASFEvrow8PT1Xo7eKSIIoIFJUSVkN4/NzmVAwIOxSRKSXUkCkoN3NbSxcu013L4lIQikgUtBra7bS0t6hp6dFJKEUECmotLyGgf0yKC4aGnYpItKLKSBSTEeHU1Jew+zJBWRl6D+fiCSOzjAp5r3Nu6itb1b/g4gknAIixZSU1WAG5yogRCTBFBApprS8hlPGDmFoblbYpYhIL6eASCHVdU28W7VLzUsi0iMUECnkxXI9PS0iPUcBkUJeKKuhcHAOU0YMDLsUEekDFBApoqm1ndfWbGXO1OGYHc7MryIiR0YBkSIWrttGY2s7c9S8JCI9RAGRIkrLasjJTOeMCcPCLkVE+ggFRApwd0rLazhrUj7ZmelhlyMifYQCIgWsrK6namejBucTkR6lgEgBJWWR21v19LSI9CQFRAooKavmhMI8RgzKDrsUEelDFBBJbltDM0srdurpaRHpcQqIJPfSylrc9fS0iPQ8BUSSKy2vYfjAfhx/TF7YpYhIH6OASGItbR28sqqWOVOHk5amp6dFpGcpIJLY4g3bqW9uU/+DiIRCAZHESspryMpI48yJ+WGXIiJ9kAIiSbk7JWXVnDFhGLn9MsIuR0T6IAVEklq3dTcbtu3R3UsiEhoFRJIqDZ6eVv+DiIRFAZGkSsqrmTpyIKOH9A+7FBHpoxQQSWhXYyuLNuzQ1YOIhCqhAWFm88xspZmtMbPbOtnnajNbYWbLzewPUetvMrPVweumRNaZbF5ZVUt7h6v/QURClbDbY8wsHbgbOB+oBBaZ2QJ3XxG1zyTg68CZ7r7DzIYH64cC/woUAw4sCY7dkah6k0lpeQ1D+mdy8pghYZciIn1YIq8gZgJr3H2du7cADwOXHbDPZ4G795743b0mWH8h8Ly7bw+2PQ/MS2CtSaOtvYMXV9Zw7pThpOvpaREJUSIDohCoiFquDNZFmwxMNrPXzOx1M5t3GMdiZjeb2WIzW1xbW9uNpYdnacVOdu5p1dzTIhK6sDupM4BJwDnAtcCvzGxwvAe7+z3uXuzuxQUFBYmpsIeVlNWQkWbMntw7Po+IpK5EBkQVMCZqeXSwLlolsMDdW919PbCKSGDEc2yvVFpezczxQxmUnRl2KSLSxyUyIBYBk8xsvJllAfOBBQfs8ySRqwfMLJ9Ik9M64FngAjMbYmZDgAuCdb1axfY9rKpu0O2tIpIUEnYXk7u3mdktRE7s6cB97r7czG4HFrv7AvYFwQqgHfiqu28DMLM7iIQMwO3uvj1RtSaL0vJIH/3caSNCrkREBMzdw66hWxQXF/vixYvDLuOo3PDrN6ja0UjpV84JuxQR6SPMbIm7F8faFnYntQQamtt4Y912NS+JSNLQONJJ4KWVNdzx1Apa2juYd/zIsMsREQEUEKFaW9vAd/+vjNLyGoqG9efeG4spLhoadlkiIoACIhS7Glv5r5LV/O5vG8jOTOcbF03lpg8V0S8jPezSREQ+oIDoQe0dzsOLNvGj51axY08L1xSP4csXTKFgYL+wSxMROYgCoocsXLuN259aQdn7dcwsGsq3Pzqd4wvzwi5LRKRTCogE27RtD997uoxnlm+hcHAOd193ChedMBIzDcQnIslNAZEgDc1t/PeLa7j3r+tJN+PL50/ms7MnkJ2pfgYRSQ0KiG7W0eH8aWkVP3imnJr6Zq6YUcg/z5vKyLzssEsTETksCohutGTjDm7/83LertzFSWMG88sbTuWUsZr0R0RSkwKiG7y/q5E7/1LO/y7bzIhB/fjx1Sdx+cmFpGnCHxFJYQqIo9DY0s49r6zjFy+vocPhlnMn8vlzjiW3n75WEUl9OpMdAXfnqXfe5/tPl7F5VxMXnzCK2z4ylTFD+4ddmohIt1FAHKZ3K3dx+1PLWbRhB9NHDeLH15zMrAnDwi5LRKTbKSDiVFPfxA+fXcmjSyoZ2j+L7195AlcXjyFd/Qwi0kspIA6hua2d37y2gZ+XrqG5rZ3Pnj2BW+ZM1JSgItLrKSA64e48t6Ka7z1dxsZtezhv2nD+5eLpjM/PDbs0EZEeoYCIYeWWem5/ajmvrdnGpOED+P2nZjJ7ckHYZYmI9CgFRJTtu1u46/lVPPjGRgZmZ/Jvlx7HJ04fS0a6Jt4Tkb5HAQG0tndw/8KN/OSFVexuaeeGWeP44nmTGZKbFXZpIiKh6fMBUbF9D5/8zZusrd3N2ZPy+dYl05k8YmDYZYmIhK7PB8SIQdmMG5bL1z8yjbnThmsYbhGRQJ8PiKyMNO775GlhlyEiknTU+yoiIjEpIEREJCYFhIiIxKSAEBGRmBQQIiISkwJCRERiUkCIiEhMCggREYnJ3D3sGrqFmdUCG4/iLfKBrd1UTqrTd7E/fR/70/exT2/4Lsa5e8zhqntNQBwtM1vs7sVh15EM9F3sT9/H/vR97NPbvws1MYmISEwKCBERiUkBsc89YReQRPRd7E/fx/70fezTq78L9UGIiEhMuoIQEZGYFBAiIhJTnw8IM5tnZivNbI2Z3RZ2PWEyszFm9qKZrTCz5Wb2j2HXFDYzSzezpWb2VNi1hM3MBpvZY2ZWbmZlZnZG2DWFycy+FPz/5D0ze8jMssOuqbv16YAws3TgbuAjwHTgWjObHm5VoWoDvuzu04FZwBf6+PcB8I9AWdhFJImfAs+4+1TgJPrw92JmhcA/AMXufjyQDswPt6ru16cDApgJrHH3de7eAjwMXBZyTaFx9/fd/a3g53oiJ4DCcKsKj5mNBi4G7g27lrCZWR4wG/g1gLu3uPvOUIsKXwaQY2YZQH9gc8j1dLu+HhCFQEXUciV9+IQYzcyKgBnAGyGXEqafAF8DOkKuIxmMB2qB3wRNbveaWW7YRYXF3auAHwKbgPeBXe7+XLhVdb++HhASg5kNAB4HvujudWHXEwYzuwSocfclYdeSJDKAU4BfuPsMYDfQZ/vszGwIkdaG8cAxQK6ZXR9uVd2vrwdEFTAmanl0sK7PMrNMIuHwoLv/Kex6QnQmcKmZbSDS9DjHzB4It6RQVQKV7r73ivIxIoHRV50HrHf3WndvBf4EfCjkmrpdXw+IRcAkMxtvZllEOpkWhFxTaMzMiLQxl7n7j8OuJ0zu/nV3H+3uRUT+d1Hq7r3uL8R4ufsWoMLMpgSr5gIrQiwpbJuAWWbWP/j/zVx6Yad9RtgFhMnd28zsFuBZInch3Ofuy0MuK0xnAjcA75rZsmDdN9z96fBKkiRyK/Bg8MfUOuD/hVxPaNz9DTN7DHiLyN1/S+mFw25oqA0REYmprzcxiYhIJxQQIiISkwJCRERiUkCIiEhMCggREYlJASESMLOG4N8iM7uum9/7Gwcs/607318kERQQIgcrAg4rIIIB27qyX0C4e6976lZ6HwWEyMHuBM42s2XBmP/pZvafZrbIzN4xs78DMLNzzOxVM1tA8FSxmT1pZkuCeQJuDtbdSWTUz2Vm9mCwbu/VigXv/Z6ZvWtm10S990tR8y88GDyxi5ndGczZ8Y6Z/bDHvx3pM/r0k9QinbgN+Iq7XwIQnOh3uftpZtYPeM3M9o7ceQpwvLuvD5Y/5e7bzSwHWGRmj7v7bWZ2i7ufHON3XQmcTGR+hfzgmFeCbTOA44gMI/0acKaZlQFXAFPd3c1scPd+dJF9dAUhcmgXADcGw4+8AQwDJgXb3owKB4B/MLO3gdeJDAQ5ia6dBTzk7u3uXg28DJwW9d6V7t4BLCPS9LULaAJ+bWZXAnuO8rOJdEoBIXJoBtzq7icHr/FRY//v/mAns3OIjPJ5hrufRGR8nqOZhrI56ud2IMPd24hMdPUYcAnwzFG8v0iXFBAiB6sHBkYtPwt8PhgKHTOb3MlkOXnADnffY2ZTiUzbulfr3uMP8CpwTdDPUUBk1rY3OyssmKsjLxhA8UtEmqZEEkJ9ECIHewdoD5qKfktkLuYi4K2go7gWuDzGcc8Anwv6CVYSaWba6x7gHTN7y90/EbX+CeAM4G3Aga+5+5YgYGIZCPyvmWUTubL5pyP6hCJx0GiuIiISk5qYREQkJgWEiIjEpIAQEZGYFBAiIhKTAkJERGJSQIiISEwKCBERien/A0GEoRH/AAAdAAAAAElFTkSuQmCC\n",
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEWCAYAAAB8LwAVAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAAn/klEQVR4nO3de5xdZ13v8c937pOZyaXJ5J4maZs2DRRSGCtYxQtQq2KL4MEUEVCx4KGAqHCKx4NYjp56AeFgX0qBKsqlQFEMnkoFBURuJoVy6eyWpmmhSXaSyXXPJHOf3/ljrZnsTPbM7DTZWXvv+b5fr51Z61nrWfs3+5VZv72eZ63nUURgZmY2XUPWAZiZWXVygjAzs5KcIMzMrCQnCDMzK8kJwszMSnKCMDOzkpwgzMysJCcIq3mSBopeE5IGi9Z/+Ukc7wuSXlWJWM1qSVPWAZidq4jonFyW9Djwqoj4XHYRVZakpogYyzoOq3++grC6JalB0q2SHpV0WNLHJV2UbmuT9KG0/JikHZJWSPoj4MeAv0yvQP5yhmN/QtJ+Sccl/YekpxRta5f0DknfT7f/p6T2dNuPSvpK+p5PSHplWn7aVYukV0r6z6L1kPRaSY8Aj6Rl706PUZB0v6QfK9q/UdLvpb97f7p9naQ7JL1j2u+yXdIbz/0Tt3rjBGH17HXAC4EfB1YDR4E70m2vABYB64ClwGuAwYj4n8CXgFsiojMibpnh2P8CbAKWA98APly07c+BZwI/AlwEvBmYkLQ+rfceoBvYCjxwFr/PC4EfBrak6zvSY1wEfAT4hKS2dNtvAzcBPwssBH4NOAl8ELhJUgOApGXA89L6ZqdxE5PVs9eQnOj3AEh6G/ADSb8CjJIkhssi4tvA/Wdz4Ii4a3I5Pe5RSYuAfpKT8bMiYm+6y1fS/V4KfC4iPpqWH05f5fo/EXGkKIYPFW17h6TfB64AvgW8CnhzRDycbv/W5HtKOg48F/gssA34QkQcOIs4bJ7wFYTVs/XAP6bNOceAHDAOrAD+HrgPuFvSPkl/Kqm5nIOmzTe3p803BeDxdNOy9NUGPFqi6roZysv1xLQ4fldSLm3GOkZyRbSsjPf6IPCydPllJJ+F2RmcIKyePQH8TEQsLnq1RcTeiBiNiD+MiC0kTUEvAF6e1ptriOOXAjeSNM0sAjak5QIOAUPApTPEU6oc4ASwoGh9ZYl9puJK+xveDLwEWBIRi4HjaQxzvdeHgBslPR24EvjUDPvZPOcEYfXsr4E/Stv+kdQt6cZ0+SclXSWpESiQNDlNpPUOAJfMctwuYJikeWgB8MeTGyJiArgLeKek1enVxrMltZL0UzxP0kskNUlaKmlrWvUB4EWSFki6DPj1OX63LmAM6AOaJL2VpK9h0vuBt0vapMTTJC1NY9xD0n/x98AnI2JwjveyecoJwurZu4HtwL9K6ge+RtLJC8k39HtIkkMO+CKnmlreDfyipKOS/m+J4/4d8H1gL9CbHrfY7wLfITkJHwH+BGiIiB+QdBr/Tlr+APD0tM5fACMkyemDnN7pXcp9wGeA76WxDHF6E9Q7gY8D/5r+jh8A2ou2fxC4Cjcv2SzkCYPM5h9JzyFpalofPgnYDHwFYTbPpJ3xbwDe7+Rgs3GCMJtHJF0JHANWAe/KNBirem5iMjOzknwFYWZmJdXNk9TLli2LDRs2ZB2GmVlNuf/++w9FRHepbXWTIDZs2MDOnTuzDsPMrKZI+v5M29zEZGZmJTlBmJlZSU4QZmZWkhOEmZmV5ARhZmYlOUGYmVlJThBmZlZS3TwHYWb1ISIYGp2gf2iUwtAYhaFR+ofGkvXB5OfI2ATdXa2sWtzOqkVtrFzUxsK2siYErBsTE8GhE8McLAwzEcHT1i4+7+9R0QQh6XqSsfUbSUaOvH3a9otJxqVfnO5za0Tcm257C8mkKePA6yPivkrGambnx8jYqZN7/7ST++TJvvikf/r6GIXBUcYmzn6MuM7WJlYuamNV+lq5qH1qedWi9jSJNCFp7oNlKCIoDI1xsDDE/sIQBwrDHCgMTb32F4Y5WBiir3946nN6+rrF/NNrrz3vsVQsQaQzdd0BPB/YA+yQtD0ieot2+33g4xHxV5K2APcCG9LlbcBTgNXA5yRdHhHjlYrXzE6JCAaGxzg8MMLhE8McGhhJlgeGTz+ZT0sEhcFRhscm5jx+Z2sTC9ua6GprpqutieVdbVza3URXWrYwLe9qa2Jhe/Np+y5sa6a5sYGD/UPsPz5E/vgQ+eOD5I8n6/uOD/G9A30c7B9m+likHS2NaRJpPz2RLE4TycJ2FrZXLokMjY6nJ/ph9heGOFh00j+Qru8vDDE0euZnuKi9mRULW1mxsI1Ny5dNLa9Y2MbaJe0l3u3cVfIK4hpgV0TsBpB0N8k8vsUJIjg1TeIiYF+6fCNwd0QMA49J2pUe76sVjNesrg2PjXPkRHKiPzQwPHXyT9ZPLR8eGObQiRFGZjjRtzU3FJ3Am1nU3szaJe1TJ/HpJ/Opk3578rOztYnGhnM/Aa9dsoC1SxbMuH10fIKD/cPsT5NH/tjpyeRLjxziYP8Q0y9WFkwlkVOJZOWiNlanVyGrFrWxqL35tCQyNj7BoYGR9Bv/0Azf/oc5Pjh6RpytTQ2sXJSc6K9au5jndbWyclEbyxe2sWJyuauN9pbGc/7MzlYlE8QaTp8CcQ+npnuc9DaS6SBfB3SQTAI/Wbd4Gsc9adlpJN0M3Axw8cUXn5egzWrFxERwbHA0OaGXOMEfnkoCSULoHxoreZyWpgaWdbSwtLOVpZ0tXL6ii2WdLSztbGFpR1K2rLOVZZ2tLOloprXpwp+onozmxgbWLG5nzeKZv12PpUlkMnFMvyL58q5DHCicmUTamxtZtaiNBa2NHCgMc2jgzKuVxgaxvKuV5Qvb2LC0g2ddsnTqG3/xt/9qbvbKupP6JuBvI+Idkp4N/L2kp5ZbOSLuBO4E6Onp8cQWVhcmOx/3HRsif2yQvceSk9WhgeGpb/6HBkY4enKE8RJt9RJctODUCf4pqxeyrLOVpUVJYFnRyb+ztXpPUJXW1NjA6sXtrF7cDiwpuc/Y+AR9A8NFVyGnEsnJkTGesmoRKxalJ/2utvTbfytLO1rPy5VSliqZIPYC64rW16ZlxX4duB4gIr4qqQ1YVmZds5pUGBpl37FB8seG0pP/IPuODSVl6bfX0fHTT/xtzQ0s72pjaWcLa5csYOu6xSW/4S/tbGHJgpaaPzFVk6bGhrSpqR3mWUNFJRPEDmCTpI0kJ/dtwEun7fMD4LnA36ZTIbYBfcB24COS3knSSb0J+K8Kxmp2XgyPjbP/eHriT0/6+45PnvyTRDAwfHpTT2ODWLkwadveum4xP3PVStYsbp9q/16zuJ3FC5rn7bd8y07FEkREjEm6BbiP5BbWuyLiQUm3ATsjYjvwO8D7JL2RpMP6lekk6g9K+jhJh/YY8FrfwWRZm5gI+gaGk5N+2tQwlQjSk/+hgeEz6i3taGHV4qQd+kcuXcbqxUnn5+rFbaxe3M7yrjZ/47eqVDdzUvf09IQnDLJzNTY+wQ+OnOTRvhPsOjjAroMDPHH0JPuODXKgMHRG009HSyOr0jbs1YuSE/7kt/7Jh7jammujU9fmJ0n3R0RPqW1Zd1KbZWJwZJxH+wZ4tC9JApM/Hz90kpHxU7d3rljYyvqLOuhZvyQ5+S9uZ83kFcCiyt4zb5Y1Jwira0dOjEyd/Cdfj/YNsPfY4NRtiQ2C9Us7uLS7k5/avILLlndyaXcHly7vnHfDN5gVc4KwmjcxEew7Ppie/JOmoUcPDrCrb4AjJ0am9mtrbuCSZZ084+IlvKRnHZct7+Sy5Z2sX7qgZu7tN7uQnCCsZoyMTfD9wydOuxLY1TfAowdPMDh66h6GJQuauWx5J9dtSa8GlndyWXcnaxa30+DOYLOyOUFYVTpyYoTPP3SQXZN9BAcH+P6Rk6c9GLZmcTuXLu/kh665KLka6E6uCJZ2tmYYuVn9cIKwqvS//7mXf/jmXpoaxIZlHVy+ooufvWoVly7v4LLuLi7p7qCj1f99zSrJf2FWlb699zjPubybD7yih+ZGz2tllgX/5VnVGRodZ3ffAFvXLnJyMMuQ//qs6nzvQD8TAVeuWjj3zmZWMU4QVnVy+QLgBGGWNScIqzq5fD8dLY1cfNHMk8GYWeU5QVjV6d1X4IqVXX5mwSxjThBWVSKC3P4CW1a7ecksa04QVlX2HB2kf2jM/Q9mVcAJwqqKO6jNqocThFWV3nwBCTav7Mo6FLN5zwnCqkouX2Dj0g4WtPghf7OsOUFYVcnl+928ZFYlnCCsavQPjfKDIye5cpWbl8yqQUUThKTrJT0saZekW0ts/wtJD6Sv70k6VrRtvGjb9krGadXh4f39gDuozapFxRp6JTUCdwDPB/YAOyRtj4jeyX0i4o1F+78OuLroEIMRsbVS8Vn16fUdTGZVpZJXENcAuyJid0SMAHcDN86y/03ARysYj1W5XL7A4gXNrFrUlnUoZkZlE8Qa4Imi9T1p2RkkrQc2Av9eVNwmaaekr0l64Qz1bk732dnX13eewras9Ob7uXLlQiQPsWFWDaqlk3obcE9EjBeVrY+IHuClwLskXTq9UkTcGRE9EdHT3d19oWK1ChifCB7eX3DzklkVqWSC2AusK1pfm5aVso1pzUsRsTf9uRv4Aqf3T1idefzwCYZGJ3wHk1kVqWSC2AFskrRRUgtJEjjjbiRJm4ElwFeLypZIak2XlwHXAr3T61r96N3nDmqzalOxu5giYkzSLcB9QCNwV0Q8KOk2YGdETCaLbcDdERFF1a8E3itpgiSJ3V5895PVn1y+QFOD2LSiM+tQzCxV0fEMIuJe4N5pZW+dtv62EvW+AlxVydisuuTyBS5b3klrU2PWoZhZqlo6qW2e8xAbZtXHCcIyd+TECPsLQ+6gNqsyThCWuck5ILasWpRxJGZWzAnCMndqkiBfQZhVEycIy1xvvsDyrlaWdrZmHYqZFXGCsMy5g9qsOjlBWKZGxibYddAJwqwaOUFYpnYdHGB0PNiy2gnCrNo4QVimTt3B5A5qs2rjBGGZyuULtDY1sGFpR9ahmNk0ThCWqd58gStWdtHU6P+KZtXGf5WWmYggly9w5Ur3P5hVIycIy8yBwjBHT466g9qsSjlBWGZOPUHtBGFWjZwgLDO9aYLY7DuYzKqSE4RlpjdfYO2Sdha2NWcdipmV4ARhmcnlC2xx85JZ1XKCsEwMjozz+KET7n8wq2JOEJaJhw/0MxHuoDarZhVNEJKul/SwpF2Sbi2x/S8kPZC+vifpWNG2V0h6JH29opJx2oV3aogNJwizatVUqQNLagTuAJ4P7AF2SNoeEb2T+0TEG4v2fx1wdbp8EfAHQA8QwP1p3aOVitcurN59BTpbm1i7pD3rUMxsBpW8grgG2BURuyNiBLgbuHGW/W8CPpou/zTw2Yg4kiaFzwLXVzBWu8By+QJXruqioUFZh2JmM6hkglgDPFG0victO4Ok9cBG4N/Ppq6kmyXtlLSzr6/vvARtlTcxETy033NAmFW7aumk3gbcExHjZ1MpIu6MiJ6I6Onu7q5QaHa+7Tk6yMDwmBOEWZWrZILYC6wrWl+blpWyjVPNS2db12pMb/444DuYzKpdJRPEDmCTpI2SWkiSwPbpO0naDCwBvlpUfB9wnaQlkpYA16VlVgd68/00CK5Y4SE2zKpZxe5iiogxSbeQnNgbgbsi4kFJtwE7I2IyWWwD7o6IKKp7RNLbSZIMwG0RcaRSsdqFlcsX2Lisg/aWxqxDMbNZVCxBAETEvcC908reOm39bTPUvQu4q2LBWWZy+QJb1y3OOgwzm0O1dFLbPHF8cJQ9Rwfd/2BWA5wg7IJ6yE9Qm9UMJwi7oKaG2PAscmZVb8Y+CEkvKqP+UNrPYFaWXL6fizpaWN7VmnUoZjaH2Tqp3wf8EzDbWAjPYVontNlscvuTITYkD7FhVu1mSxD/EhG/NltlSR86z/FYHRsbn+Ch/f28/Fnrsw7FzMowYx9ERLxsrsrl7GM26bFDJxgZm/AdTGY1ouxOakmXSfqQpE9KenYlg7L61OsOarOaMlsndVtEDBUVvR14c7r8aWBrBeOyOpTL99PcKC7t7sw6FDMrw2xXEJ+W9PKi9VFgA7AeOKtRV80gucX1suVdtDT57mqzWjDbX+r1wEJJn5H0HOB3SSby+QXgly9EcFZfetNJgsysNszYxJTOzfCXkv4e+F/AbwK/HxGPXqjgrH4cGhimr3/YT1Cb1ZDZ+iB+GHgTMAL8MTAI/JGkvcDbI+LYBYnQ6kLOQ2yY1ZzZnoN4L/CzQCfwNxFxLbBN0o8DHyNpbjIry2SC8C2uZrVjtgQxRtIp3UFyFQFARHwR+GJlw7J6k8v3s3JhG0s6WrIOxczKNFuCeCnwapLk8PJZ9jObU+8+d1Cb1ZrZOqm/B/zOBYzF6tTw2DiP9g3wvC3Lsw7FzM7CjLe5SvrnuSqXs4/ZIwcGGJsI9z+Y1ZjZmph+VNL2WbYL2HKe47E65A5qs9o0W4K4sYz6I7NtlHQ98G6gEXh/RNxeYp+XAG8DAvhWRLw0LR8HvpPu9oOIuKGMeKwK9eYLtDU3sGFpR9ahmNlZmK0P4pzuVJLUCNwBPB/YA+yQtD0ieov22QS8Bbg2Io5KKm6kHoyIrecSg1WHXL7AFSsX0tjgOSDMakklB8W5BtgVEbsjYgS4mzOvSn4DuCMijgJExMEKxmMZiAhy+X4/IGdWgyqZINYATxSt70nLil0OXC7py5K+ljZJTWqTtDMtf2GpN5B0c7rPzr6+vvMavJ0f+eNDHB8cZYtvcTWrOXMmCEk/L6lSiaQJ2AT8BHAT8D5Ji9Nt6yOih+R5jHdJunR65Yi4MyJ6IqKnu7u7QiHauXAHtVntKufE/0vAI5L+VNLmszj2XmBd0fratKzYHmB7RIxGxGPA90gSBhGxN/25G/gCcPVZvLdVid59SYLY7ARhVnPmTBDptKJXA48Cfyvpq2nTzlxtBjuATZI2SmoBtgHTb5v9FMnVA5KWkTQ57Za0RFJrUfm1QC9Wc3L7C6xfuoDO1tlumDOzalRW01FEFIB7SDqaV5HMCfENSa+bpc4YcAtwH5ADPh4RD0q6TdLkLav3AYcl9QKfB94UEYeBK4Gdkr6Vlt9efPeT1Y5cvp8rV/rqwawWzfm1Lj2Z/ypwGfB3wDURcVDSApJv9e+ZqW5E3AvcO63srUXLAfx2+ire5yvAVeX/GlaNTo6M8fjhE7xw6/R7E8ysFpRz3f9i4C8i4j+KCyPipKRfr0xYVg8e2t9PBB6kz6xGlZMg3gbkJ1cktQMrIuLxiPi3SgVmtW+yg9p3MJnVpnL6ID4BTBStj6dlZrPK5Qt0tTWxdkl71qGY2ZNQToJoSp+EBiBd9qwvNqdcvsCVqxYieYgNs1pUToLoK7rrCEk3AocqF5LVg4mJ4KH9HmLDrJaV0wfxGuDDkv6SZIjvJ/AMczaH7x85ycmRcXdQm9WwORNERDwKPEtSZ7o+UPGorOZ5iA2z2lfW462Sfg54CskAegBExG0VjMtqXC5foLFBXL7CVxBmtaqcwfr+mmQ8pteRNDH9N2B9heOyGpfLF7hkWQdtzY1Zh2JmT1I5ndQ/EhEvB45GxB8CzyYZM8lsRrl8v5uXzGpcOQliKP15UtJqYJRkPCazko6dHGHvsUEnCLMaV04fxKfTORr+DPgGydzR76tkUFbbcvl+ALasdoIwq2WzJoh0oqB/i4hjwCcl/TPQFhHHL0RwVptO3cHkDmqzWjZrE1NETAB3FK0POznYXHL5Ass6W1je1ZZ1KGZ2Dsrpg/g3SS+Wx0uwMvWmQ2yYWW0rJ0G8mmRwvmFJBUn9kgoVjstq1Oj4BI8cGHCCMKsD5TxJ7YZkK9vuvhOMjE94DCazOlDOjHLPKVU+fQIhM/AQG2b1pJwmpjcVvf4X8GmSSYTmJOl6SQ9L2iXp1hn2eYmkXkkPSvpIUfkrJD2Svl5RzvtZ9nL5Ai2NDVzS3ZF1KGZ2jsppYvr54nVJ64B3zVVPUiPJHVDPB/YAOyRtj4jeon02AW8Bro2Io5KWp+UXAX8A9JA8d3F/Wvdoub+YZaM3X2DTik6aG8v57mFm1ezJ/BXvAa4sY79rgF0RsTudZOhu4MZp+/wGcMfkiT8iDqblPw18NiKOpNs+C1z/JGK1CyznO5jM6kY5fRDvIfkWD0lC2UryRPVc1pDMHTFpD/DD0/a5PH2PLwONwNsi4jMz1F1TIrabgZsBLr744jJCsko62D/EoYERd1Cb1YlyhtrYWbQ8Bnw0Ir58Ht9/E/ATwFrgPyRdVW7liLgTuBOgp6cn5tjdKmxyiA1fQZjVh3ISxD3AUESMQ9K3IGlBRJyco95eYF3R+tq0rNge4OsRMQo8Jul7JAljL0nSKK77hTJitQxN3sHkKwiz+lDWk9RAe9F6O/C5MurtADZJ2iipBdgGbJ+2z6dIE4GkZSRNTruB+4DrJC2RtAS4Li2zKta7r8DqRW0sWtCcdShmdh6UcwXRVjzNaEQMSFowV6WIGJN0C8mJvRG4KyIelHQbsDMitnMqEfQC48CbIuIwgKS3kyQZgNsi4shZ/WZ2weXyBY/galZHykkQJyQ9IyK+ASDpmcBgOQePiHuBe6eVvbVoOYDfTl/T694F3FXO+1j2hkbH2X3oBNc/dWXWoZjZeVJOgvgt4BOS9pFMObqSZApSsymPHBhgfCLcQW1WR8p5UG6HpM3AFWnRw2mnstmU3nwyCrwThFn9mLOTWtJrgY6I+G5EfBfolPTfKx+a1ZJcvp8FLY2sv2jO7ikzqxHl3MX0G+mMcgCkTzb/RsUisprUmy+weWUXDQ2eNsSsXpSTIBqLJwtKx1hqqVxIVmsiwkNsmNWhcjqpPwN8TNJ70/VXp2VmAOw9Nkj/0JgThFmdKSdB/A+S8Y5+M13/LPC+ikVkNad3n+eAMKtHczYxRcRERPx1RPxiRPwi0Au8p/KhWa3I5fuRYPNKTz5oVk/KuYJA0tXATcBLgMeAf6hkUFZbcvkCG5Z20NFa1n8nM6sRM/5FS7qcJCncBBwCPgYoIn7yAsVmNSK3v8BTPMSGWd2ZrYnpIeCngBdExI9GxHtIxksymzIwPMb3D5/kypVOEGb1ZrYE8SIgD3xe0vskPZdkqA2zKQ/l3UFtVq9mTBAR8amI2AZsBj5PMibTckl/Jem6CxSfVbmpOSDcxGRWd8q5i+lERHwkIn6eZOKeb5Lc+mpGb76fRe3NrFrUlnUoZnaelfMk9ZSIOBoRd0bEcysVkNWW5AnqLooetjezOnFWCcKs2PhE8NB+D7FhVq+cIOxJe/zwCYZGJ5wgzOqUE4Q9aVMd1E4QZnXJCcKetFy+QFOD2LSiM+tQzKwCKpogJF0v6WFJuyTdWmL7KyX1SXogfb2qaNt4Ufn2SsZpT04u38+l3Z20NjVmHYqZVUDFBs9J5424A3g+sAfYIWl7RPRO2/VjEXFLiUMMRsTWSsVn5653X4FnXXJR1mGYWYVU8griGmBXROyOiBHgbuDGCr6fXUBHT4ywvzDkDmqzOlbJBLEGeKJofU9aNt2LJX1b0j2S1hWVt0naKelrkl5Y6g0k3Zzus7Ovr+/8RW5z8hPUZvUv607qTwMbIuJpJBMRfbBo2/qI6AFeCrxL0qXTK6cP7fVERE93d/eFidiAZA5q8BhMZvWskgliL1B8RbA2LZsSEYcjYjhdfT/wzKJte9Ofu4EvAFdXMFY7S735At1drSzrbM06FDOrkEomiB3AJkkbJbUA24DT7kaStKpo9QYgl5YvkdSaLi8DriWZyc6qRC7f76sHszpXsbuYImJM0i3AfUAjcFdEPCjpNmBnRGwHXi/pBmAMOAK8Mq1+JfBeSRMkSez2Enc/WUZGxibYdbCfH7/czXpm9ayic0RGxL3AvdPK3lq0/BbgLSXqfQW4qpKx2ZP3aN8Ao+PBlas8B7VZPcu6k9pqkIfYMJsfnCDsrPXuK9DS1MDGZR1Zh2JmFeQEYWctt7/AFSu6aGr0fx+zeua/cDsrEUEu3+/mJbN5wAnCzsrB/mGOnBhxB7XZPOAEYWfFT1CbzR9OEHZWevclCWKzE4RZ3XOCsLOSyxdYs7idRe3NWYdiZhXmBGFnJZcveARXs3nCCcLKNjgyzmOHTrj/wWyecIKwsj18oJ+JgC2+g8lsXnCCsLLlfAeT2bziBGFly+ULdLY2sW7JgqxDMbMLwAnCypbLF9i8souGBmUdipldAE4QVpaJifAkQWbzjBOElWXP0UEGhsecIMzmEScIK8upITZ8B5PZfOEEYWXJ5Qs0CDav9BWE2XzhBGFlyeULbFjWQXtLY9ahmNkFUtEEIel6SQ9L2iXp1hLbXympT9ID6etVRdteIemR9PWKSsZpc+vNF9z/YDbPNFXqwJIagTuA5wN7gB2StkdE77RdPxYRt0yrexHwB0APEMD9ad2jlYrXZlYYGmXP0UFuuubirEMxswuoklcQ1wC7ImJ3RIwAdwM3lln3p4HPRsSRNCl8Fri+QnHaHB7K9wN4FjmzeaaSCWIN8ETR+p60bLoXS/q2pHskrTubupJulrRT0s6+vr7zFbdN4yE2zOanrDupPw1siIinkVwlfPBsKkfEnRHRExE93d3dFQnQkgSxZEEzKxa2Zh2KmV1AlUwQe4F1Retr07IpEXE4IobT1fcDzyy3rl04kx3UkofYMJtPKpkgdgCbJG2U1AJsA7YX7yBpVdHqDUAuXb4PuE7SEklLgOvSMrvAxsYneHi/h9gwm48qdhdTRIxJuoXkxN4I3BURD0q6DdgZEduB10u6ARgDjgCvTOsekfR2kiQDcFtEHKlUrDazxw+fYHhswh3UZvNQxRIEQETcC9w7reytRctvAd4yQ927gLsqGZ/NrTe9g8lXEGbzT9ad1FblcvkCzY3isuWdWYdiZheYE4TNqndfgUu7O2lp8n8Vs/nGf/U2q1y+4P4Hs3nKCcJmdHhgmIP9w2xZ7QRhNh85QdiMcu6gNpvXnCBsRr3544AThNl85QRhM8rl+1mxsJWLOlqyDsXMMuAEYTNyB7XZ/OYEYSUNj42z6+CAm5fM5jEnCCtp18EBxibCCcJsHnOCsJJ693kOCLP5zgnCSsrl+2lrbmDjso6sQzGzjDhBWEm5fIErVi6kscFzQJjNV04QdoaIILe/wJZVXVmHYmYZcoKwM+wvDHHs5Kj7H8zmOScIO4M7qM0MnCCshFw+SRCbV7qJyWw+c4KwM+Ty/Vx80QK62pqzDsXMMlTRBCHpekkPS9ol6dZZ9nuxpJDUk65vkDQo6YH09deVjNNOl8sXuNId1GbzXsXmpJbUCNwBPB/YA+yQtD0ieqft1wW8Afj6tEM8GhFbKxWflXZyZIzHDp/ghq2rsw7FzDJWySuIa4BdEbE7IkaAu4EbS+z3duBPgKEKxmJlemh/PxHuoDazCl5BAGuAJ4rW9wA/XLyDpGcA6yLi/0l607T6GyV9EygAvx8RX6pEkEdPjPCC9/znaWXSDMtohvLi/VWyfHrBTHUiIvk59U/yIyImV4mAIIg4tV5cP4rKJvcrrju5Nlle/J6jYxMAHsXVzCqaIGYlqQF4J/DKEpvzwMURcVjSM4FPSXpKRBSmHeNm4GaAiy+++EnF0dQonnXJ0qn1U6dhmGFx6oR6Znnp/Werc/p7RJKE0nwhkuQxmT6kU2WT20/tq6ntp/bVVCJTurNOO/appFd87NWL21i7pB0zm98qmSD2AuuK1temZZO6gKcCX0hPeCuB7ZJuiIidwDBARNwv6VHgcmBn8RtExJ3AnQA9PT3Tz8ll6Wpr5h0vefqTqWpmVtcq2QexA9gkaaOkFmAbsH1yY0Qcj4hlEbEhIjYAXwNuiIidkrrTTm4kXQJsAnZXMFYzM5umYlcQETEm6RbgPqARuCsiHpR0G7AzIrbPUv05wG2SRoEJ4DURcaRSsZqZ2ZlU3DZey3p6emLnzp1z72hmZlMk3R8RPaW2+UlqMzMryQnCzMxKcoIwM7OSnCDMzKwkJwgzMyupbu5iktQHfP8cDrEMOHSewql1/ixO58/jdP48TqmHz2J9RHSX2lA3CeJcSdo5061e840/i9P58zidP49T6v2zcBOTmZmV5ARhZmYlOUGccmfWAVQRfxan8+dxOn8ep9T1Z+E+CDMzK8lXEGZmVpIThJmZlTTvE4Sk6yU9LGmXpFuzjidLktZJ+rykXkkPSnpD1jFlTVKjpG9K+uesY8mapMWS7pH0kKScpGdnHVOWJL0x/Tv5rqSPSmrLOqbzbV4niHRSojuAnwG2ADdJ2pJtVJkaA34nIrYAzwJeO88/D4A3ALmsg6gS7wY+ExGbgaczjz8XSWuA1wM9EfFUkjlvtmUb1fk3rxMEcA2wKyJ2R8QIcDdwY8YxZSYi8hHxjXS5n+QEsCbbqLIjaS3wc8D7s44la5IWkUzk9QGAiBiJiGOZBpW9JqBdUhOwANiXcTzn3XxPEGuAJ4rW9zCPT4jFJG0Arga+nnEoWXoX8GaSWQ3nu41AH/A3aZPb+yV1ZB1UViJiL/DnwA+APHA8Iv4126jOv/meIKwESZ3AJ4HfiohC1vFkQdILgIMRcX/WsVSJJuAZwF9FxNXACWDe9tlJWkLS2rARWA10SHpZtlGdf/M9QewF1hWtr03L5i1JzSTJ4cMR8Q9Zx5Oha4EbJD1O0vT4U5I+lG1ImdoD7ImIySvKe0gSxnz1POCxiOiLiFHgH4AfyTim826+J4gdwCZJGyW1kHQybc84psxIEkkbcy4i3pl1PFmKiLdExNqI2EDy/+LfI6LuviGWKyL2A09IuiItei7Qm2FIWfsB8CxJC9K/m+dSh532TVkHkKWIGJN0C3AfyV0Id0XEgxmHlaVrgV8BviPpgbTs9yLi3uxCsiryOuDD6Zep3cCvZhxPZiLi65LuAb5BcvffN6nDYTc81IaZmZU035uYzMxsBk4QZmZWkhOEmZmV5ARhZmYlOUGYmVlJThBmKUkD6c8Nkl56no/9e9PWv3I+j29WCU4QZmfaAJxVgkgHbJvNaQkiIuruqVurP04QZme6HfgxSQ+kY/43SvozSTskfVvSqwEk/YSkL0naTvpUsaRPSbo/nSfg5rTsdpJRPx+Q9OG0bPJqRemxvyvpO5J+qejYXyiaf+HD6RO7SLo9nbPj25L+/IJ/OjZvzOsnqc1mcCvwuxHxAoD0RH88In5IUivwZUmTI3c+A3hqRDyWrv9aRByR1A7skPTJiLhV0i0RsbXEe70I2Eoyv8KytM5/pNuuBp5CMoz0l4FrJeWAXwA2R0RIWnx+f3WzU3wFYTa364CXp8OPfB1YCmxKt/1XUXIAeL2kbwFfIxkIchOz+1HgoxExHhEHgC8CP1R07D0RMQE8QNL0dRwYAj4g6UXAyXP83cxm5ARhNjcBr4uIrelrY9HY/yemdpJ+gmSUz2dHxNNJxuc5l2koh4uWx4GmiBgjmejqHuAFwGfO4fhms3KCMDtTP9BVtH4f8JvpUOhIunyGyXIWAUcj4qSkzSTTtk4anaw/zZeAX0r7ObpJZm37r5kCS+fqWJQOoPhGkqYps4pwH4TZmb4NjKdNRX9LMhfzBuAbaUdxH/DCEvU+A7wm7Sd4mKSZadKdwLclfSMifrmo/B+BZwPfAgJ4c0TsTxNMKV3AP0lqI7my+e0n9RualcGjuZqZWUluYjIzs5KcIMzMrCQnCDMzK8kJwszMSnKCMDOzkpwgzMysJCcIMzMr6f8DtnEhqj6H3isAAAAASUVORK5CYII=\n",
       "text/plain": [
        "<Figure size 432x288 with 1 Axes>"
       ]
@@ -507,7 +509,7 @@
     {
      "data": {
       "text/plain": [
-       "0.7979886313948404"
+       "0.8053976582616722"
       ]
      },
      "execution_count": 15,
@@ -546,7 +548,7 @@
     {
      "data": {
       "text/plain": [
-       "IncompatibleKeys(missing_keys=[], unexpected_keys=[])"
+       "<All keys matched successfully>"
       ]
      },
      "execution_count": 17,
@@ -773,12 +775,14 @@
     "# Export to FINN-ONNX <a id=\"export_finn_onnx\" ></a>\n",
     "\n",
     "\n",
-    "[ONNX](https://onnx.ai/) is an open format built to represent machine learning models, and the FINN compiler expects an ONNX model as input. We'll now export our network into ONNX to be imported and used in FINN for the next notebooks. Note that the particular ONNX representation used for FINN differs from standard ONNX, you can read more about this [here](https://finn.readthedocs.io/en/latest/internals.html#intermediate-representation-finn-onnx)."
+    "[ONNX](https://onnx.ai/) is an open format built to represent machine learning models, and the FINN compiler expects an ONNX model as input. We'll now export our network into ONNX to be imported and used in FINN for the next notebooks. Note that the particular ONNX representation used for FINN differs from standard ONNX, you can read more about this [here](https://finn.readthedocs.io/en/latest/internals.html#intermediate-representation-finn-onnx).\n",
+    "\n",
+    "You can see below how we export a trained network in Brevitas into a FINN-compatible ONNX representation. Note how we create a `QuantTensor` instance with dummy data to tell Brevitas how our inputs look like, which will be used to set the input quantization annotation on the exported model."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 27,
    "metadata": {
     "scrolled": true
    },
@@ -787,69 +791,38 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Model saved to cybsec-mlp.onnx\n"
+      "Model saved to cybsec-mlp-ready.onnx\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/opt/conda/lib/python3.6/site-packages/ipykernel_launcher.py:15: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.\n",
-      "  from ipykernel import kernelapp as app\n"
+      "<ipython-input-22-78c27bb59095>:15: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.\n",
+      "  x = (x + torch.tensor([1.0])) / 2.0\n"
      ]
     }
    ],
    "source": [
     "import brevitas.onnx as bo\n",
+    "from brevitas.quant_tensor import QuantTensor\n",
     "\n",
-    "export_onnx_path = \"cybsec-mlp.onnx\"\n",
+    "ready_model_filename = \"cybsec-mlp-ready.onnx\"\n",
     "input_shape = (1, 600)\n",
-    "bo.export_finn_onnx(model_for_export, input_shape, export_onnx_path)\n",
-    "\n",
-    "print(\"Model saved to %s\" % export_onnx_path)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## One final fix: input datatype\n",
-    "\n",
-    "There's one more thing we'll do: we will mark the input tensor datatype as `DataType.BIPOLAR`, which will be used by the compiler later on. To do this, we'll utilize the `ModelWrapper` component from FINN, which lets us examine and manipulate the ONNX graph in an easier way.\n",
-    "\n",
-    "*In the near future it will be possible to add this information to the model [while exporting](https://github.com/Xilinx/brevitas/issues/232), instead of having to add it manually.*"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Input tensor name: 0\n",
-      "Input tensor shape: [1, 600]\n",
-      "Input tensor datatype: DataType.BIPOLAR\n"
-     ]
-    }
-   ],
-   "source": [
-    "from finn.core.modelwrapper import ModelWrapper\n",
-    "from finn.core.datatype import DataType\n",
+    "# create a QuantTensor instance to mark input as bipolar during export\n",
+    "input_a = np.random.randint(0, 1, size=input_shape).astype(np.float32)\n",
+    "input_a = 2 * input_a - 1\n",
+    "scale = 1.0\n",
+    "input_t = torch.from_numpy(input_a * scale)\n",
+    "input_qt = QuantTensor(\n",
+    "    input_t, scale=torch.tensor(scale), bit_width=torch.tensor(1.0), signed=True\n",
+    ")\n",
     "\n",
-    "finn_model = ModelWrapper(export_onnx_path)\n",
+    "bo.export_finn_onnx(\n",
+    "    model_for_export, export_path=ready_model_filename, input_t=input_qt\n",
+    ")\n",
     "\n",
-    "finnonnx_in_tensor_name = finn_model.graph.input[0].name\n",
-    "finnonnx_model_in_shape = finn_model.get_tensor_shape(finnonnx_in_tensor_name)\n",
-    "finn_model.set_tensor_datatype(finnonnx_in_tensor_name, DataType.BIPOLAR)\n",
-    "print(\"Input tensor name: %s\" % finnonnx_in_tensor_name)\n",
-    "print(\"Input tensor shape: %s\" % str(finnonnx_model_in_shape))\n",
-    "print(\"Input tensor datatype: %s\" % str(finn_model.get_tensor_datatype(finnonnx_in_tensor_name)))\n",
-    "\n",
-    "ready_model_filename = \"cybsec-mlp-ready.onnx\"\n",
-    "finn_model.save(ready_model_filename)"
+    "print(\"Model saved to %s\" % ready_model_filename)"
    ]
   },
   {
@@ -870,7 +843,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [
     {
@@ -894,10 +867,10 @@
        "        "
       ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x7f77214fa630>"
+       "<IPython.lib.display.IFrame at 0x7f49738bffa0>"
       ]
      },
-     "execution_count": 27,
+     "execution_count": 28,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -940,7 +913,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.8"
+   "version": "3.8.5"
   }
  },
  "nbformat": 4,
diff --git a/notebooks/end2end_example/cybersecurity/dataloader_quantized.py b/notebooks/end2end_example/cybersecurity/dataloader_quantized.py
index 45651faa5a9a57e9a1d0d784b15ebe8945d9ddd7..738811fa72754f059ce7e62196deae8a824f03f6 100644
--- a/notebooks/end2end_example/cybersecurity/dataloader_quantized.py
+++ b/notebooks/end2end_example/cybersecurity/dataloader_quantized.py
@@ -26,12 +26,12 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import torch
-import pandas as pd
+import math
 import numpy as np
+import pandas as pd
+import torch
 from sklearn import preprocessing
 from sklearn.preprocessing import OneHotEncoder
-import math
 
 # quantize the UNSW_NB15 dataset and convert it to binary vectors
 # reimplementation
@@ -112,7 +112,7 @@ class UNSW_NB15_quantized(torch.utils.data.Dataset):
 
     def round_like_matlab_number(self, n: np.float64) -> int:
         """Round the input "n" like matlab uint32(n) cast (which also rounds) e.g.
-        0.5->1;  1.5->2; 2.3->2;   2.45->2 """
+        0.5->1;  1.5->2; 2.3->2;   2.45->2"""
         if n - math.floor(n) < 0.5:
             return math.floor(n)
         return math.ceil(n)
diff --git a/notebooks/end2end_example/cybersecurity/validate-unsw-nb15.py b/notebooks/end2end_example/cybersecurity/validate-unsw-nb15.py
index 622c69c8d0abdf8025b0486c63bf336e4f8675f5..0ffb525544845757c90f30799fbad472d389348f 100644
--- a/notebooks/end2end_example/cybersecurity/validate-unsw-nb15.py
+++ b/notebooks/end2end_example/cybersecurity/validate-unsw-nb15.py
@@ -27,9 +27,9 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import argparse
+import numpy as np
 from driver import io_shape_dict
 from driver_base import FINNExampleOverlay
-import numpy as np
 
 
 def make_unsw_nb15_test_batches(bsize, dataset_root):
diff --git a/requirements.txt b/requirements.txt
index 6dd4b5724782d01fc2958cc56c04cbc8e70af31f..da0ec0b63092f0618bb7c9982b95fa90e8f91118 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,8 +6,10 @@ future==0.18.2
 gspread==3.6.0
 numpy==1.18.0
 onnx==1.7.0
+onnxoptimizer
 onnxruntime==1.4.0
 pre-commit==2.6.0
+pyscaffold==3.2.1
 scipy==1.5.2
 setupext-janitor>=1.1.2
 toposort==1.5
diff --git a/run-docker.sh b/run-docker.sh
index 19c9f7bd3ce18819bca7bd2f24732b1fa18841cc..5ce65ba201ca9a38d96f3bd42387102ca5a36bb7 100755
--- a/run-docker.sh
+++ b/run-docker.sh
@@ -41,29 +41,19 @@ recho () {
   echo -e "${RED}$1${NC}"
 }
 
-if [ -z "$VIVADO_PATH" ];then
-  recho "Please set the VIVADO_PATH that contains the path to your Vivado installation directory."
-  recho "FINN functionality depending on Vivado or Vivado HLS will not be available."
+if [ -z "$FINN_XILINX_PATH" ];then
+  recho "Please set the FINN_XILINX_PATH environment variable to the path to your Xilinx tools installation directory (e.g. /opt/Xilinx)."
+  recho "FINN functionality depending on Vivado, Vitis or HLS will not be available."
 fi
 
-if [ -z "$PYNQ_IP" ];then
-  recho "Please set the PYNQ_IP env.var. to enable PYNQ deployment tests."
+if [ -z "$FINN_XILINX_VERSION" ];then
+  recho "Please set the FINN_XILINX_VERSION to the version of the Xilinx tools to use (e.g. 2020.1)"
+  recho "FINN functionality depending on Vivado, Vitis or HLS will not be available."
 fi
 
-if [ -z "$VITIS_PATH" ];then
-  recho "Please set the VITIS_PATH that contains the path to your Vitis installation directory."
-  recho "FINN functionality depending on Vitis will not be available."
-else
-  if [ -z "$PLATFORM_REPO_PATHS" ];then
-    recho "Please set PLATFORM_REPO_PATHS pointing to Vitis platform files (DSAs)."
-    recho "This is required to be able to use Vitis."
-    exit -1
-  fi
-  if [ -z "$XILINX_XRT" ];then
-    recho "Please set XILINX_XRT pointing to your XRT installation."
-    recho "This is required to be able to use Vitis."
-    exit -1
-  fi
+if [ -z "$PLATFORM_REPO_PATHS" ];then
+  recho "Please set PLATFORM_REPO_PATHS pointing to Vitis platform files (DSAs)."
+  recho "This is required to be able to use Alveo PCIe cards."
 fi
 
 DOCKER_GID=$(id -g)
@@ -71,15 +61,8 @@ DOCKER_GNAME=$(id -gn)
 DOCKER_UNAME=$(id -un)
 DOCKER_UID=$(id -u)
 DOCKER_PASSWD="finn"
-# generate a random number per-run to allow multiple
-# containers from the same user
-DOCKER_RND=$(shuf -i0-32768 -n1)
-DOCKER_TAG="finn_dev_${DOCKER_UNAME}"
-# uncomment to run multiple instances with different names
-# DOCKER_INST_NAME="finn_${DOCKER_UNAME}_${DOCKER_RND}"
 DOCKER_INST_NAME="finn_dev_${DOCKER_UNAME}"
-# ensure Docker tag and inst. name are all lowercase
-DOCKER_TAG=$(echo "$DOCKER_TAG" | tr '[:upper:]' '[:lower:]')
+# ensure Docker inst. name is all lowercase
 DOCKER_INST_NAME=$(echo "$DOCKER_INST_NAME" | tr '[:upper:]' '[:lower:]')
 # Absolute path to this script, e.g. /home/user/bin/foo.sh
 SCRIPT=$(readlink -f "$0")
@@ -96,15 +79,21 @@ SCRIPTPATH=$(dirname "$SCRIPT")
 : ${PYNQ_PASSWORD="xilinx"}
 : ${PYNQ_BOARD="Pynq-Z1"}
 : ${PYNQ_TARGET_DIR="/home/xilinx/$DOCKER_INST_NAME"}
-: ${NUM_DEFAULT_WORKERS=1}
+: ${NUM_DEFAULT_WORKERS=4}
 : ${FINN_SSH_KEY_DIR="$SCRIPTPATH/ssh_keys"}
 : ${ALVEO_USERNAME="alveo_user"}
 : ${ALVEO_PASSWORD=""}
 : ${ALVEO_BOARD="U250"}
 : ${ALVEO_TARGET_DIR="/tmp"}
-: ${XILINX_XRT="/opt/xilinx/xrt"}
 : ${PLATFORM_REPO_PATHS="/opt/xilinx/platforms"}
+: ${XRT_DEB_VERSION="xrt_202010.2.7.766_18.04-amd64-xrt"}
 : ${FINN_HOST_BUILD_DIR="/tmp/$DOCKER_INST_NAME"}
+: ${FINN_DOCKER_TAG="xilinx/finn:$(git describe --tags --dirty).$XRT_DEB_VERSION"}
+: ${FINN_DOCKER_PREBUILT="0"}
+: ${FINN_DOCKER_RUN_AS_ROOT="0"}
+: ${FINN_DOCKER_GPU="$(docker info | grep nvidia | wc -m)"}
+: ${NVIDIA_VISIBLE_DEVICES=""}
+: ${DOCKER_BUILDKIT="1"}
 : ${FINN_DOCKER_EXTRA=""}
 
 DOCKER_INTERACTIVE=""
@@ -123,7 +112,7 @@ elif [ "$1" = "notebook" ]; then
   else
     JUPYTER_PASSWD_ARG="--NotebookApp.password='$JUPYTER_PASSWD_HASH'"
   fi
-  DOCKER_CMD="jupyter notebook --no-browser --ip=0.0.0.0 --port $JUPYTER_PORT $JUPYTER_PASSWD_ARG notebooks"
+  DOCKER_CMD="jupyter notebook --allow-root --no-browser --ip=0.0.0.0 --port $JUPYTER_PORT $JUPYTER_PASSWD_ARG notebooks"
   DOCKER_EXTRA+="-e JUPYTER_PORT=$JUPYTER_PORT "
   DOCKER_EXTRA+="-e NETRON_PORT=$NETRON_PORT "
   DOCKER_EXTRA+="-p $JUPYTER_PORT:$JUPYTER_PORT "
@@ -148,48 +137,47 @@ else
   DOCKER_INTERACTIVE="-it"
 fi
 
+if [ "$FINN_DOCKER_GPU" != 0 ];then
+  gecho "nvidia-docker detected, enabling GPUs"
+  if [ ! -z "$NVIDIA_VISIBLE_DEVICES" ];then
+    DOCKER_EXTRA+="--runtime nvidia -e NVIDIA_VISIBLE_DEVICES=$NVIDIA_VISIBLE_DEVICES "
+  else
+    DOCKER_EXTRA+="--gpus all"
+  fi
+fi
+
 VIVADO_HLS_LOCAL=$VIVADO_PATH
 VIVADO_IP_CACHE=$FINN_HOST_BUILD_DIR/vivado_ip_cache
-INSTALL_XRT_DEPS=0
 
 # ensure build dir exists locally
 mkdir -p $FINN_HOST_BUILD_DIR
 mkdir -p $FINN_SSH_KEY_DIR
 
 gecho "Docker container is named $DOCKER_INST_NAME"
+gecho "Docker tag is named $FINN_DOCKER_TAG"
 gecho "Mounting $FINN_HOST_BUILD_DIR into $FINN_HOST_BUILD_DIR"
-gecho "Mounting $VIVADO_PATH into $VIVADO_PATH"
-if [ ! -z "$VITIS_PATH" ];then
-  gecho "Mounting $VITIS_PATH into $VITIS_PATH"
-  INSTALL_XRT_DEPS=1
-fi
+gecho "Mounting $FINN_XILINX_PATH into $FINN_XILINX_PATH"
 gecho "Port-forwarding for Jupyter $JUPYTER_PORT:$JUPYTER_PORT"
 gecho "Port-forwarding for Netron $NETRON_PORT:$NETRON_PORT"
 gecho "Vivado IP cache dir is at $VIVADO_IP_CACHE"
 gecho "Using default PYNQ board $PYNQ_BOARD"
 
 # Build the FINN Docker image
-# Need to ensure this is done within the finn/ root folder:
-OLD_PWD=$(pwd)
-cd $SCRIPTPATH
-docker build -f docker/Dockerfile.finn_dev --tag=$DOCKER_TAG \
-             --build-arg GID=$DOCKER_GID \
-             --build-arg GNAME=$DOCKER_GNAME \
-             --build-arg UNAME=$DOCKER_UNAME \
-             --build-arg UID=$DOCKER_UID \
-             --build-arg PASSWD=$DOCKER_PASSWD \
-             --build-arg INSTALL_XRT_DEPS=$INSTALL_XRT_DEPS \
-             .
-cd $OLD_PWD
+if [ "$FINN_DOCKER_PREBUILT" = "0" ]; then
+  # Need to ensure this is done within the finn/ root folder:
+  OLD_PWD=$(pwd)
+  cd $SCRIPTPATH
+  docker build -f docker/Dockerfile.finn --build-arg XRT_DEB_VERSION=$XRT_DEB_VERSION --tag=$FINN_DOCKER_TAG .
+  cd $OLD_PWD
+fi
 # Launch container with current directory mounted
 # important to pass the --init flag here for correct Vivado operation, see:
 # https://stackoverflow.com/questions/55733058/vivado-synthesis-hangs-in-docker-container-spawned-by-jenkins
-DOCKER_EXEC="docker run -t --rm $DOCKER_INTERACTIVE --init "
+DOCKER_EXEC="docker run -t --rm $DOCKER_INTERACTIVE --tty --init "
 DOCKER_EXEC+="--hostname $DOCKER_INST_NAME "
 DOCKER_EXEC+="-e SHELL=/bin/bash "
 DOCKER_EXEC+="-v $SCRIPTPATH:/workspace/finn "
 DOCKER_EXEC+="-v $FINN_HOST_BUILD_DIR:$FINN_HOST_BUILD_DIR "
-DOCKER_EXEC+="-v $FINN_SSH_KEY_DIR:/home/$DOCKER_UNAME/.ssh "
 DOCKER_EXEC+="-e FINN_BUILD_DIR=$FINN_HOST_BUILD_DIR "
 DOCKER_EXEC+="-e FINN_ROOT="/workspace/finn" "
 DOCKER_EXEC+="-e LOCALHOST_URL=$LOCALHOST_URL "
@@ -200,37 +188,42 @@ DOCKER_EXEC+="-e PYNQ_USERNAME=$PYNQ_USERNAME "
 DOCKER_EXEC+="-e PYNQ_PASSWORD=$PYNQ_PASSWORD "
 DOCKER_EXEC+="-e PYNQ_TARGET_DIR=$PYNQ_TARGET_DIR "
 DOCKER_EXEC+="-e NUM_DEFAULT_WORKERS=$NUM_DEFAULT_WORKERS "
+if [ "$FINN_DOCKER_RUN_AS_ROOT" = "0" ];then
+  DOCKER_EXEC+="-v /etc/group:/etc/group:ro "
+  DOCKER_EXEC+="-v /etc/passwd:/etc/passwd:ro "
+  DOCKER_EXEC+="-v /etc/shadow:/etc/shadow:ro "
+  DOCKER_EXEC+="-v /etc/sudoers.d:/etc/sudoers.d:ro "
+  DOCKER_EXEC+="-v $FINN_SSH_KEY_DIR:$HOME/.ssh "
+  DOCKER_EXEC+="--user $DOCKER_UID:$DOCKER_GID "
+else
+  DOCKER_EXEC+="-v $FINN_SSH_KEY_DIR:/root/.ssh "
+fi
 if [ ! -z "$IMAGENET_VAL_PATH" ];then
   DOCKER_EXEC+="-v $IMAGENET_VAL_PATH:$IMAGENET_VAL_PATH "
   DOCKER_EXEC+="-e IMAGENET_VAL_PATH=$IMAGENET_VAL_PATH "
 fi
-if [ ! -z "$VIVADO_PATH" ];then
-  DOCKER_EXEC+="-e "XILINX_VIVADO=$VIVADO_PATH" "
-  DOCKER_EXEC+="-v $VIVADO_PATH:$VIVADO_PATH "
-  DOCKER_EXEC+="-e VIVADO_PATH=$VIVADO_PATH "
-fi
-if [ ! -z "$VITIS_PATH" ];then
-  if [ -z "$PLATFORM_REPO_PATHS" ];then
-    recho "PLATFORM_REPO_PATHS must be set for Vitis/Alveo flows"
-    exit -1
+if [ ! -z "$FINN_XILINX_PATH" ];then
+  VIVADO_PATH="$FINN_XILINX_PATH/Vivado/$FINN_XILINX_VERSION"
+  VITIS_PATH="$FINN_XILINX_PATH/Vitis/$FINN_XILINX_VERSION"
+  DOCKER_EXEC+="-v $FINN_XILINX_PATH:$FINN_XILINX_PATH "
+  if [ -d "$VIVADO_PATH" ];then
+    DOCKER_EXEC+="-e "XILINX_VIVADO=$VIVADO_PATH" "
+    DOCKER_EXEC+="-e VIVADO_PATH=$VIVADO_PATH "
+  fi
+  if [ -d "$VITIS_PATH" ];then
+    DOCKER_EXEC+="-e VITIS_PATH=$VITIS_PATH "
   fi
-  if [ -z "$XILINX_XRT" ];then
-    recho "XILINX_XRT must be set for Vitis/Alveo flows"
-    exit -1
+  if [ -d "$PLATFORM_REPO_PATHS" ];then
+    DOCKER_EXEC+="-v $PLATFORM_REPO_PATHS:$PLATFORM_REPO_PATHS "
+    DOCKER_EXEC+="-e PLATFORM_REPO_PATHS=$PLATFORM_REPO_PATHS "
+    DOCKER_EXEC+="-e ALVEO_IP=$ALVEO_IP "
+    DOCKER_EXEC+="-e ALVEO_USERNAME=$ALVEO_USERNAME "
+    DOCKER_EXEC+="-e ALVEO_PASSWORD=$ALVEO_PASSWORD "
+    DOCKER_EXEC+="-e ALVEO_BOARD=$ALVEO_BOARD "
+    DOCKER_EXEC+="-e ALVEO_TARGET_DIR=$ALVEO_TARGET_DIR "
   fi
-  DOCKER_EXEC+="-v $VITIS_PATH:$VITIS_PATH "
-  DOCKER_EXEC+="-v $PLATFORM_REPO_PATHS:$PLATFORM_REPO_PATHS "
-  DOCKER_EXEC+="-v $XILINX_XRT:$XILINX_XRT "
-  DOCKER_EXEC+="-e VITIS_PATH=$VITIS_PATH "
-  DOCKER_EXEC+="-e PLATFORM_REPO_PATHS=$PLATFORM_REPO_PATHS "
-  DOCKER_EXEC+="-e XILINX_XRT=$XILINX_XRT "
-  DOCKER_EXEC+="-e ALVEO_IP=$ALVEO_IP "
-  DOCKER_EXEC+="-e ALVEO_USERNAME=$ALVEO_USERNAME "
-  DOCKER_EXEC+="-e ALVEO_PASSWORD=$ALVEO_PASSWORD "
-  DOCKER_EXEC+="-e ALVEO_BOARD=$ALVEO_BOARD "
-  DOCKER_EXEC+="-e ALVEO_TARGET_DIR=$ALVEO_TARGET_DIR "
 fi
 DOCKER_EXEC+="$DOCKER_EXTRA "
-DOCKER_EXEC+="$DOCKER_TAG $DOCKER_CMD"
+DOCKER_EXEC+="$FINN_DOCKER_TAG $DOCKER_CMD"
 
 $DOCKER_EXEC
diff --git a/setup.cfg b/setup.cfg
index 45fe40156acd966fed302522e9e8ca716a4d331c..9a6ca312aff459fb29f6e33a866b911e1a038229 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -58,9 +58,6 @@ package_dir =
     =src
 # DON'T CHANGE THE FOLLOWING LINE! IT WILL BE UPDATED BY PYSCAFFOLD!
 setup_requires = pyscaffold>=3.2a0,<3.3a0
-# finn-base is added specifically to be able to build on readthedocs
-install_requires =
-    finn-base @ git+https://github.com/Xilinx/finn-base#egg=finn-base
 # The usage of test_requires is discouraged, see `Dependency Management` docs
 # tests_require = pytest; pytest-cov
 # Require a specific Python version, e.g. Python 2.7 or >= 3.4
@@ -75,6 +72,9 @@ exclude =
 # Add here additional requirements for extra features, to install with:
 # `pip install FINN[PDF]` like:
 # PDF = ReportLab; RXP
+# finn-base is needed to build the full set of docs
+docs =
+    finn-base
 # Add here test requirements (semicolon/line-separated)
 testing =
     pytest
@@ -109,6 +109,7 @@ markers =
     slow: marks tests as slow (deselect with '-m "not slow"')
     vivado: mark tests that require Vivado or Vivado HLS
     vitis: mark tests that require Vitis
+    board: mark tests that require a PYNQ board
 norecursedirs =
     dist
     build
diff --git a/setup.py b/setup.py
index d7e158b56010fbc9ba2fb9f143ea2fc8d8a901d9..8fd781462c22f029071ac441f2be33e2f525bd47 100644
--- a/setup.py
+++ b/setup.py
@@ -35,10 +35,11 @@
     PyScaffold helps you to put up the scaffold of your new Python project.
     Learn more under: https://pyscaffold.org/
 """
-import sys
 from pkg_resources import VersionConflict, require
 from setuptools import setup
 
+import sys
+
 try:
     require("setuptools>=38.3")
 except VersionConflict:
diff --git a/src/finn/analysis/fpgadataflow/floorplan_params.py b/src/finn/analysis/fpgadataflow/floorplan_params.py
index 4c8cbf53de1ae7dc951911678a3f118bd3506dfe..9ba99fb546587ba3c2f385c958ecb172f8903bf7 100644
--- a/src/finn/analysis/fpgadataflow/floorplan_params.py
+++ b/src/finn/analysis/fpgadataflow/floorplan_params.py
@@ -26,8 +26,8 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from finn.util.fpgadataflow import is_fpgadataflow_node
 from finn.custom_op.registry import getCustomOp
+from finn.util.fpgadataflow import is_fpgadataflow_node
 
 
 def floorplan_params(model):
diff --git a/src/finn/analysis/fpgadataflow/hls_synth_res_estimation.py b/src/finn/analysis/fpgadataflow/hls_synth_res_estimation.py
index 39d6332aa42594528fbd5a04dd5efad2c3237e77..aff99efd807d8b04dc6490b299d66c0be8d8fc44 100644
--- a/src/finn/analysis/fpgadataflow/hls_synth_res_estimation.py
+++ b/src/finn/analysis/fpgadataflow/hls_synth_res_estimation.py
@@ -25,8 +25,8 @@
 # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-import warnings
 import os
+import warnings
 import xml.etree.ElementTree as ET
 
 import finn.custom_op.registry as registry
diff --git a/src/finn/analysis/fpgadataflow/post_synth_res.py b/src/finn/analysis/fpgadataflow/post_synth_res.py
index 79204c54cdb8233fd7b65968c25af819fce91959..4b817910949fa750f34a53592413bb38c7557c08 100644
--- a/src/finn/analysis/fpgadataflow/post_synth_res.py
+++ b/src/finn/analysis/fpgadataflow/post_synth_res.py
@@ -29,9 +29,9 @@
 import os
 import xml.etree.ElementTree as ET
 
-from finn.transformation.move_reshape import _is_fpgadataflow_node
 from finn.core.modelwrapper import ModelWrapper
 from finn.custom_op.registry import getCustomOp
+from finn.transformation.move_reshape import _is_fpgadataflow_node
 
 
 def post_synth_res(model, override_synth_report_filename=None):
diff --git a/src/finn/builder/build_dataflow.py b/src/finn/builder/build_dataflow.py
index c46bfa48dff289d37f2cb2a89cdbef8e2789317f..4aa1ad31e1ad73762ef46cc861b1a255ce57b926 100644
--- a/src/finn/builder/build_dataflow.py
+++ b/src/finn/builder/build_dataflow.py
@@ -26,20 +26,21 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from finn.core.modelwrapper import ModelWrapper
-import os
-import json
-import time
 import clize
-import sys
+import json
 import logging
+import os
 import pdb  # NOQA
+import sys
+import time
 import traceback
-from finn.builder.build_dataflow_steps import build_dataflow_step_lookup
+
 from finn.builder.build_dataflow_config import (
     DataflowBuildConfig,
     default_build_dataflow_steps,
 )
+from finn.builder.build_dataflow_steps import build_dataflow_step_lookup
+from finn.core.modelwrapper import ModelWrapper
 
 
 # adapted from https://stackoverflow.com/a/39215961
diff --git a/src/finn/builder/build_dataflow_config.py b/src/finn/builder/build_dataflow_config.py
index bd938f17411ee42e94e95e02776ad8e973ea10fa..4a112699ec9bdf126f447fe2244eb01f6f4fa042 100644
--- a/src/finn/builder/build_dataflow_config.py
+++ b/src/finn/builder/build_dataflow_config.py
@@ -26,14 +26,15 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from typing import List, Optional, Any
-from finn.util.basic import pynq_part_map, alveo_part_map
-from finn.transformation.fpgadataflow.vitis_build import VitisOptStrategy
-from enum import Enum
+import numpy as np
+import os
 from dataclasses import dataclass
 from dataclasses_json import dataclass_json
-import os
-import numpy as np
+from enum import Enum
+from typing import Any, List, Optional
+
+from finn.transformation.fpgadataflow.vitis_build import VitisOptStrategy
+from finn.util.basic import alveo_part_map, pynq_part_map
 
 
 class ShellFlowType(str, Enum):
diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py
index 1c1861e5286e92abcf983056f8263daae14334e8..5bdccebb58ccb6f4906a05dda58da2494366739f 100644
--- a/src/finn/builder/build_dataflow_steps.py
+++ b/src/finn/builder/build_dataflow_steps.py
@@ -26,80 +26,78 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from finn.core.modelwrapper import ModelWrapper
-import os
 import json
+import numpy as np
+import os
+from copy import deepcopy
+from shutil import copy, copytree
+
 import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
 import finn.transformation.streamline.absorb as absorb
-from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
-from finn.transformation.fold_constants import FoldConstants
-from finn.transformation.general import (
-    ApplyConfig,
-    GiveReadableTensorNames,
-    GiveUniqueNodeNames,
-    RemoveUnusedTensors,
-    RemoveStaticGraphInputs,
-)
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.streamline import Streamline
-from finn.transformation.infer_data_layouts import InferDataLayouts
-from finn.transformation.move_reshape import RemoveCNVtoFCFlatten
-from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
-from finn.transformation.streamline.reorder import (
-    MakeMaxPoolNHWC,
-    MoveScalarLinearPastInvariants,
-)
-from shutil import copy, copytree
-from finn.transformation.fpgadataflow.insert_dwc import InsertDWC
-from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
-from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
-from finn.transformation.fpgadataflow.set_fifo_depths import (
-    InsertAndSetFIFODepths,
-    RemoveShallowFIFOs,
-)
-from finn.transformation.fpgadataflow.make_zynq_proj import ZynqBuild
-from finn.transformation.fpgadataflow.vitis_build import VitisBuild
-from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver
-from finn.transformation.fpgadataflow.set_folding import SetFolding
-from finn.transformation.fpgadataflow.create_dataflow_partition import (
-    CreateDataflowPartition,
-)
-from finn.transformation.fpgadataflow.replace_verilog_relpaths import (
-    ReplaceVerilogRelPaths,
-)
-from finn.custom_op.registry import getCustomOp
+from finn.analysis.fpgadataflow.dataflow_performance import dataflow_performance
 from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
-from finn.analysis.fpgadataflow.res_estimation import (
-    res_estimation,
-    res_estimation_complete,
-)
+from finn.analysis.fpgadataflow.hls_synth_res_estimation import hls_synth_res_estimation
 from finn.analysis.fpgadataflow.op_and_param_counts import (
     aggregate_dict_keys,
     op_and_param_counts,
 )
-from finn.analysis.fpgadataflow.dataflow_performance import dataflow_performance
-from finn.analysis.fpgadataflow.hls_synth_res_estimation import hls_synth_res_estimation
-from finn.util.config import extract_model_config_to_json
-from finn.transformation.fpgadataflow.synth_ooc import SynthOutOfContext
+from finn.analysis.fpgadataflow.res_estimation import (
+    res_estimation,
+    res_estimation_complete,
+)
 from finn.builder.build_dataflow_config import (
     DataflowBuildConfig,
     DataflowOutputType,
     ShellFlowType,
     VerificationStepType,
 )
-from finn.transformation.fpgadataflow.annotate_cycles import AnnotateCycles
+from finn.core.modelwrapper import ModelWrapper
 from finn.core.onnx_exec import execute_onnx
-import numpy as np
-from finn.util.test import execute_parent
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.core.throughput_test import throughput_test_rtlsim
+from finn.custom_op.registry import getCustomOp
+from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
+from finn.transformation.fold_constants import FoldConstants
+from finn.transformation.fpgadataflow.annotate_cycles import AnnotateCycles
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
-from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
+from finn.transformation.fpgadataflow.create_dataflow_partition import (
+    CreateDataflowPartition,
+)
+from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.insert_dwc import InsertDWC
+from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
+from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver
+from finn.transformation.fpgadataflow.make_zynq_proj import ZynqBuild
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
-from finn.core.throughput_test import throughput_test_rtlsim
-from copy import deepcopy
+from finn.transformation.fpgadataflow.replace_verilog_relpaths import (
+    ReplaceVerilogRelPaths,
+)
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
+from finn.transformation.fpgadataflow.set_fifo_depths import (
+    InsertAndSetFIFODepths,
+    RemoveShallowFIFOs,
+)
+from finn.transformation.fpgadataflow.set_folding import SetFolding
+from finn.transformation.fpgadataflow.synth_ooc import SynthOutOfContext
+from finn.transformation.fpgadataflow.vitis_build import VitisBuild
+from finn.transformation.general import (
+    ApplyConfig,
+    GiveReadableTensorNames,
+    GiveUniqueNodeNames,
+    RemoveStaticGraphInputs,
+    RemoveUnusedTensors,
+)
+from finn.transformation.infer_data_layouts import InferDataLayouts
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
+from finn.transformation.move_reshape import RemoveCNVtoFCFlatten
+from finn.transformation.streamline import Streamline
+from finn.transformation.streamline.reorder import MakeMaxPoolNHWC
+from finn.util.config import extract_model_config_to_json
+from finn.util.test import execute_parent
 
 
 def verify_step(
@@ -158,13 +156,14 @@ def step_streamline(model: ModelWrapper, cfg: DataflowBuildConfig):
     topologies.
     """
 
-    model = model.transform(MoveScalarLinearPastInvariants())
+    model = model.transform(absorb.AbsorbSignBiasIntoMultiThreshold())
     model = model.transform(Streamline())
     need_lowering = len(model.get_nodes_by_op_type("Conv")) > 0
     if need_lowering:
         model = model.transform(LowerConvsToMatMul())
         model = model.transform(MakeMaxPoolNHWC())
         model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
+        model = model.transform(MakeMaxPoolNHWC())
     model = model.transform(ConvertBipolarMatMulToXnorPopcount())
     model = model.transform(Streamline())
     # absorb final add-mul nodes into TopK
@@ -181,7 +180,7 @@ def step_streamline(model: ModelWrapper, cfg: DataflowBuildConfig):
 def step_convert_to_hls(model: ModelWrapper, cfg: DataflowBuildConfig):
     """Convert eligible nodes to `HLSCustomOp` subclasses that represent HLS
     layers. Which nodes and particular configurations can be converted to HLS
-    is limited, see the source code of the `convert_to_hls` module for more. """
+    is limited, see the source code of the `convert_to_hls` module for more."""
 
     mem_mode = cfg.default_mem_mode.value
     if cfg.standalone_thresholds:
diff --git a/src/finn/custom_op/fpgadataflow/__init__.py b/src/finn/custom_op/fpgadataflow/__init__.py
index a68a2975501806d662e8f0e5fe6519c2fe0f3944..320b947d0dd99b564da5775dfc8624993af57de2 100644
--- a/src/finn/custom_op/fpgadataflow/__init__.py
+++ b/src/finn/custom_op/fpgadataflow/__init__.py
@@ -26,32 +26,35 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+from finn.custom_op.fpgadataflow.addstreams_batch import AddStreams_Batch
+from finn.custom_op.fpgadataflow.channelwise_op_batch import ChannelwiseOp_Batch
 from finn.custom_op.fpgadataflow.convolutioninputgenerator import (
     ConvolutionInputGenerator,
 )
+from finn.custom_op.fpgadataflow.convolutioninputgenerator1d import (
+    ConvolutionInputGenerator1D,
+)
 from finn.custom_op.fpgadataflow.downsampler import DownSampler
-from finn.custom_op.fpgadataflow.streamingfclayer_batch import StreamingFCLayer_Batch
-from finn.custom_op.fpgadataflow.streamingmaxpool_batch import StreamingMaxPool_Batch
-from finn.custom_op.fpgadataflow.streamingfifo import StreamingFIFO
-from finn.custom_op.fpgadataflow.tlastmarker import TLastMarker
+from finn.custom_op.fpgadataflow.duplicatestreams_batch import DuplicateStreams_Batch
+from finn.custom_op.fpgadataflow.fmpadding_batch import FMPadding_Batch
+from finn.custom_op.fpgadataflow.globalaccpool_batch import GlobalAccPool_Batch
+from finn.custom_op.fpgadataflow.iodma import IODMA
+from finn.custom_op.fpgadataflow.labelselect_batch import LabelSelect_Batch
+from finn.custom_op.fpgadataflow.pool_batch import Pool_Batch
+from finn.custom_op.fpgadataflow.streamingdataflowpartition import (
+    StreamingDataflowPartition,
+)
 from finn.custom_op.fpgadataflow.streamingdatawidthconverter_batch import (
     StreamingDataWidthConverter_Batch,
 )
-from finn.custom_op.fpgadataflow.globalaccpool_batch import GlobalAccPool_Batch
-from finn.custom_op.fpgadataflow.pool_batch import Pool_Batch
-from finn.custom_op.fpgadataflow.fmpadding_batch import FMPadding_Batch
+from finn.custom_op.fpgadataflow.streamingfclayer_batch import StreamingFCLayer_Batch
+from finn.custom_op.fpgadataflow.streamingfifo import StreamingFIFO
+from finn.custom_op.fpgadataflow.streamingmaxpool_batch import StreamingMaxPool_Batch
 from finn.custom_op.fpgadataflow.thresholding_batch import Thresholding_Batch
-from finn.custom_op.fpgadataflow.addstreams_batch import AddStreams_Batch
-from finn.custom_op.fpgadataflow.labelselect_batch import LabelSelect_Batch
-from finn.custom_op.fpgadataflow.duplicatestreams_batch import DuplicateStreams_Batch
+from finn.custom_op.fpgadataflow.tlastmarker import TLastMarker
 from finn.custom_op.fpgadataflow.vector_vector_activate_batch import (
     Vector_Vector_Activate_Batch,
 )
-from finn.custom_op.fpgadataflow.channelwise_op_batch import ChannelwiseOp_Batch
-from finn.custom_op.fpgadataflow.iodma import IODMA
-from finn.custom_op.fpgadataflow.streamingdataflowpartition import (
-    StreamingDataflowPartition,
-)
 
 custom_op = dict()
 
@@ -61,6 +64,7 @@ custom_op["DownSampler"] = DownSampler
 custom_op["StreamingMaxPool_Batch"] = StreamingMaxPool_Batch
 custom_op["StreamingFCLayer_Batch"] = StreamingFCLayer_Batch
 custom_op["ConvolutionInputGenerator"] = ConvolutionInputGenerator
+custom_op["ConvolutionInputGenerator1D"] = ConvolutionInputGenerator1D
 custom_op["TLastMarker"] = TLastMarker
 custom_op["StreamingDataWidthConverter_Batch"] = StreamingDataWidthConverter_Batch
 custom_op["StreamingFIFO"] = StreamingFIFO
diff --git a/src/finn/custom_op/fpgadataflow/addstreams_batch.py b/src/finn/custom_op/fpgadataflow/addstreams_batch.py
index fd764212b865d778993e69ec673b0a46180b301a..2558394076a24694f153c4cae19eb3368a02a869 100644
--- a/src/finn/custom_op/fpgadataflow/addstreams_batch.py
+++ b/src/finn/custom_op/fpgadataflow/addstreams_batch.py
@@ -26,13 +26,13 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
-
 import numpy as np
+import os
 import warnings
+from onnx import TensorProto, helper
+
 from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
-from onnx import TensorProto, helper
 from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
 
 
diff --git a/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py b/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py
index 097ec336ff24cd826e6530c42b7cdb1108971fa1..b1dc02131e45b0a04acb25723e09847ee858ebdc 100644
--- a/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py
+++ b/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py
@@ -26,12 +26,12 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from math import ceil
-import os
-
 import numpy as np
-
+import os
+import warnings
+from math import ceil
 from onnx import TensorProto, helper
+
 from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.util.data_packing import (
@@ -39,9 +39,8 @@ from finn.util.data_packing import (
     numpy_to_hls_code,
     rtlsim_output_to_npy,
 )
-from . import templates
 
-import warnings
+from . import templates
 
 # ONNX i/o tensor shape assumptions for channelwise ops:
 # input 0 is the input tensor, shape (..., NumChannels)
@@ -217,7 +216,7 @@ class ChannelwiseOp_Batch(HLSCustomOp):
             return 0
 
     def lut_estimation(self):
-        """Calculates LUT cost, taking memory resource type into account """
+        """Calculates LUT cost, taking memory resource type into account"""
         # TODO add in/out FIFO contributions
         style = self.get_nodeattr("ram_style")
         P = self.get_nodeattr("PE")
@@ -490,7 +489,9 @@ class ChannelwiseOp_Batch(HLSCustomOp):
         numReps = numInputVectors[0]
         self.code_gen_dict["$DEFINES$"] = [
             """#define NumChannels1 {}\n#define PE1 {}\n#define numReps {}""".format(
-                self.get_nodeattr("NumChannels"), self.get_nodeattr("PE"), numReps,
+                self.get_nodeattr("NumChannels"),
+                self.get_nodeattr("PE"),
+                numReps,
             )
         ]
 
@@ -533,7 +534,9 @@ class ChannelwiseOp_Batch(HLSCustomOp):
         self.code_gen_dict["$DOCOMPUTE$"] = [
             """Thresholding_Batch<{}, NumChannels1, PE1, {}, {}>
             (in0, out, threshs, numReps);""".format(
-                imgdim, tmpl_args["TSrcI"], tmpl_args["TDstI"],
+                imgdim,
+                tmpl_args["TSrcI"],
+                tmpl_args["TDstI"],
             )
         ]
 
diff --git a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py
index 3f400053df8de6ec1e53e39fb5a3edee15f3ab30..9ec7bc662d95b1c94ca17bc3c9a1a7b6199cc18a 100644
--- a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py
+++ b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py
@@ -26,15 +26,14 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
-
 import math
 import numpy as np
+import os
+from onnx import TensorProto, helper
 
 from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.custom_op.general.im2col import compute_conv_output_dim
-from onnx import TensorProto, helper
 from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
 
 # ONNX i/o tensor shape assumptions for ConvolutionInputGenerator:
@@ -61,12 +60,14 @@ class ConvolutionInputGenerator(HLSCustomOp):
 
     def get_nodeattr_types(self):
         my_attrs = {
-            "ConvKernelDim": ("i", True, 0),
+            "ConvKernelDim": ("ints", True, []),  # [H, W] = [Y, X]
             "IFMChannels": ("i", True, 0),
-            "IFMDim": ("i", True, 0),
-            "OFMDim": ("i", True, 0),
+            "IFMDim": ("ints", True, []),  # [H, W] = [Y, X]
+            "OFMDim": ("ints", True, []),  # [H, W] = [Y, X]
             "SIMD": ("i", True, 0),
-            "Stride": ("i", True, 0),
+            "Stride": ("ints", True, [1, 1]),  # [H, W] = [Y, X]
+            # note: only dilation=1 supported for now
+            "Dilation": ("ints", True, [1, 1]),  # [H, W] = [Y, X]
             # FINN DataTypes for inputs, weights, outputs
             "inputDataType": ("s", True, ""),
             "outputDataType": ("s", True, ""),
@@ -86,44 +87,59 @@ class ConvolutionInputGenerator(HLSCustomOp):
         my_attrs.update(super().get_nodeattr_types())
         return my_attrs
 
-    def get_normal_input_shape(self):
+    def get_nodeattr(self, name):
+        # overriding get_nodeattr to check for square kernel/img.. requirement
+        # since this can't be done with the attribute restriction in nodeattr_types
+        # TODO non-square can be enabled in theory but needs testing
+        ret = super().get_nodeattr(name)
+        props_to_check = ["ConvKernelDim", "IFMDim", "OFMDim", "Stride", "Dilation"]
+        if name in props_to_check:
+            is_square = ret[0] == ret[1]
+            assert is_square, "Only square %s supported" % name
+        if name == "Dilation":
+            assert ret[0] == ret[1] == 1, "Only dilation=1 supported"
+        return ret
 
-        ifm_dim = self.get_nodeattr("IFMDim")
+    def get_normal_input_shape(self):
+        ifm_dim_h, ifm_dim_w = self.get_nodeattr("IFMDim")
         ifm_ch = self.get_nodeattr("IFMChannels")
-
-        ishape = (1, ifm_dim, ifm_dim, ifm_ch)
+        ishape = (1, ifm_dim_h, ifm_dim_w, ifm_ch)
         return ishape
 
     def get_folded_input_shape(self):
-        ifm_dim = self.get_nodeattr("IFMDim")
+        ifm_dim_h, ifm_dim_w = self.get_nodeattr("IFMDim")
         ifm_ch = self.get_nodeattr("IFMChannels")
         simd = self.get_nodeattr("SIMD")
         assert ifm_ch % simd == 0, "SIMD must divide IFMChannels"
         wf = int(ifm_ch / simd)
-        folded_ishape = (1, ifm_dim, ifm_dim, wf, simd)
+        folded_ishape = (1, ifm_dim_h, ifm_dim_w, wf, simd)
         return folded_ishape
 
     def get_normal_output_shape(self):
-        k = self.get_nodeattr("ConvKernelDim")
-        ifm_dim = self.get_nodeattr("IFMDim")
+        k_h, k_w = self.get_nodeattr("ConvKernelDim")
+        ifm_dim_h, ifm_dim_w = self.get_nodeattr("IFMDim")
         ifm_ch = self.get_nodeattr("IFMChannels")
-        stride = self.get_nodeattr("Stride")
+        stride_h, stride_w = self.get_nodeattr("Stride")
+        dilation_h, dilation_w = self.get_nodeattr("Dilation")
         pad = 0
-        ofm_dim = compute_conv_output_dim(ifm_dim, k, stride, pad)
-        oshape = (1, ofm_dim, ofm_dim, k * k * ifm_ch)
+        ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, pad, dilation_h)
+        ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, pad, dilation_w)
+        oshape = (1, ofm_dim_h, ofm_dim_w, k_h * k_w * ifm_ch)
         return oshape
 
     def get_folded_output_shape(self):
-        k = self.get_nodeattr("ConvKernelDim")
-        ifm_dim = self.get_nodeattr("IFMDim")
+        k_h, k_w = self.get_nodeattr("ConvKernelDim")
+        ifm_dim_h, ifm_dim_w = self.get_nodeattr("IFMDim")
         ifm_ch = self.get_nodeattr("IFMChannels")
-        stride = self.get_nodeattr("Stride")
+        stride_h, stride_w = self.get_nodeattr("Stride")
+        dilation_h, dilation_w = self.get_nodeattr("Dilation")
         simd = self.get_nodeattr("SIMD")
         pad = 0
-        ofm_dim = compute_conv_output_dim(ifm_dim, k, stride, pad)
+        ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, pad, dilation_h)
+        ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, pad, dilation_w)
         assert ifm_ch % simd == 0, "SIMD must divide IFMChannels"
-        wf = int((k * k * ifm_ch) // simd)
-        folded_oshape = (1, ofm_dim, ofm_dim, wf, simd)
+        wf = int((k_h * k_w * ifm_ch) // simd)
+        folded_oshape = (1, ofm_dim_h, ofm_dim_w, wf, simd)
         return folded_oshape
 
     def make_shape_compatible_op(self, model):
@@ -186,26 +202,31 @@ class ConvolutionInputGenerator(HLSCustomOp):
     def get_exp_cycles(self):
         simd = self.get_nodeattr("SIMD")
         ifm_ch = self.get_nodeattr("IFMChannels")
-        k = self.get_nodeattr("ConvKernelDim")
-        ifm_dim = self.get_nodeattr("IFMDim")
-        ofm_dim = self.get_nodeattr("OFMDim")
-        stride = self.get_nodeattr("Stride")
+        k_h, k_w = self.get_nodeattr("ConvKernelDim")
+        ifm_dim_h, ifm_dim_w = self.get_nodeattr("IFMDim")
+        ofm_dim_h, ofm_dim_w = self.get_nodeattr("OFMDim")
+        stride_h, stride_w = self.get_nodeattr("Stride")
+        dilation_h, dilation_w = self.get_nodeattr("Dilation")
+
         # since mmv != 1 is not supported yet, we set mmv for now to 1
         mmv = 1
         # see https://github.com/Xilinx/finn-hlslib/blob/master/slidingwindow.h
-        cycles_write_block = (ofm_dim * k * k * (ifm_ch / simd)) / mmv
-        cycles_read_block = stride * ifm_dim * (ifm_ch / simd)
+        cycles_write_block = (ofm_dim_w * k_w * k_h * (ifm_ch / simd)) / mmv
+        cycles_read_block = stride_w * ifm_dim_w * (ifm_ch / simd)
         max_cycles = max(cycles_write_block, cycles_read_block)
-        exp_cycles = ifm_dim * k * (ifm_ch / simd) + ofm_dim * max_cycles
+        exp_cycles = (
+            ifm_dim_w * k_h * dilation_h * (ifm_ch / simd) + ofm_dim_h * max_cycles
+        )
 
         return int(exp_cycles)
 
     def bram_estimation(self):
+        # NOTE: only tested with a square convolution
         simd = self.get_nodeattr("SIMD")
         ifm_ch = self.get_nodeattr("IFMChannels")
-        ifm_dim = self.get_nodeattr("IFMDim")
-        k = self.get_nodeattr("ConvKernelDim")
-        stride = self.get_nodeattr("Stride")
+        ifm_dim = self.get_nodeattr("IFMDim")[0]
+        k = self.get_nodeattr("ConvKernelDim")[0]
+        stride = self.get_nodeattr("Stride")[0]
         ram_style = self.get_nodeattr("ram_style")
         if ram_style == "block" or ram_style == "auto":
             ram_depth = ifm_dim * ifm_ch / simd
@@ -232,11 +253,12 @@ class ConvolutionInputGenerator(HLSCustomOp):
             return 0
 
     def lut_estimation(self):
+        # NOTE: only tested with a square convolution
         simd = self.get_nodeattr("SIMD")
         ifm_ch = self.get_nodeattr("IFMChannels")
-        ifm_dim = self.get_nodeattr("IFMDim")
-        k = self.get_nodeattr("ConvKernelDim")
-        stride = self.get_nodeattr("Stride")
+        ifm_dim = self.get_nodeattr("IFMDim")[0]
+        k = self.get_nodeattr("ConvKernelDim")[0]
+        stride = self.get_nodeattr("Stride")[0]
         ram_style = self.get_nodeattr("ram_style")
         if ram_style == "distributed":
             ram_luts = int(
@@ -252,11 +274,12 @@ class ConvolutionInputGenerator(HLSCustomOp):
         return 300 + ram_luts
 
     def uram_estimation(self):
+        # NOTE: only tested with a square convolution
         simd = self.get_nodeattr("SIMD")
         ifm_ch = self.get_nodeattr("IFMChannels")
-        ifm_dim = self.get_nodeattr("IFMDim")
-        k = self.get_nodeattr("ConvKernelDim")
-        stride = self.get_nodeattr("Stride")
+        ifm_dim = self.get_nodeattr("IFMDim")[0]
+        k = self.get_nodeattr("ConvKernelDim")[0]
+        stride = self.get_nodeattr("Stride")[0]
         ram_style = self.get_nodeattr("ram_style")
         if ram_style == "ultra":
             return int(
@@ -295,7 +318,7 @@ class ConvolutionInputGenerator(HLSCustomOp):
         assert (
             inp.shape == exp_ishape
         ), """Input shape doesn't
-        match expected shape (1, ifm_dim, ifm_dim, ifm_ch)."""
+        match expected shape (1, ifm_dim_h, ifm_dim_w, ifm_ch)."""
         if self.get_input_datatype() == DataType.BIPOLAR:
             # store bipolar activations as binary
             inp = (inp + 1) / 2
@@ -354,26 +377,27 @@ class ConvolutionInputGenerator(HLSCustomOp):
         assert (
             context[node.output[0]].shape == exp_oshape
         ), """Output
-        shape doesn't match expected shape (1, ofm_dim, ofm_dim, k*k*ifm_ch)."""
+        shape doesn't match expected shape (1, ofm_dim_h, ofm_dim_w, k_h*k_w*ifm_ch)."""
 
     def global_includes(self):
         self.code_gen_dict["$GLOBALS$"] = ['#include "slidingwindow.h"']
 
     def defines(self, var):
         numReps = 1
+        ifm_dim = self.get_nodeattr("IFMDim")[0]
+        ifm_ch = self.get_nodeattr("IFMChannels")
+        ofm_dim = self.get_nodeattr("OFMDim")[0]
+        k = self.get_nodeattr("ConvKernelDim")[0]
+        stride = self.get_nodeattr("Stride")[0]
+        simd = self.get_nodeattr("SIMD")
+        ifm_precision = self.get_input_datatype().bitwidth()
+
         self.code_gen_dict["$DEFINES$"] = [
             """#define ConvKernelDim1 {}\n #define IFMChannels1 {}\n
             #define Input_precision1 {}\n #define IFMDim1 {}\n
             #define OFMDim1 {}\n #define SIMD1 {}\n
             #define Stride1 {}\n #define numReps {}""".format(
-                self.get_nodeattr("ConvKernelDim"),
-                self.get_nodeattr("IFMChannels"),
-                self.get_input_datatype().bitwidth(),
-                self.get_nodeattr("IFMDim"),
-                self.get_nodeattr("OFMDim"),
-                self.get_nodeattr("SIMD"),
-                self.get_nodeattr("Stride"),
-                numReps,
+                k, ifm_ch, ifm_precision, ifm_dim, ofm_dim, simd, stride, numReps
             )
         ]
 
@@ -415,9 +439,11 @@ class ConvolutionInputGenerator(HLSCustomOp):
         }
         hls_ram_style = map_to_hls_ram_style[ram_style]
         hls_call = node.op_type
-        # check if non optimized ConvolutionInputGenerator is needed
-        k = self.get_nodeattr("ConvKernelDim")
-        stride = self.get_nodeattr("Stride")
+
+        # check which ConvolutionInputGenerator is needed
+        k = self.get_nodeattr("ConvKernelDim")[0]
+        stride = self.get_nodeattr("Stride")[0]
+
         if k % stride != 0:
             hls_call += "_kernel_stride"
 
diff --git a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py
new file mode 100644
index 0000000000000000000000000000000000000000..b428210acfd70186f68e7f1b35cfcd945d0a77d9
--- /dev/null
+++ b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py
@@ -0,0 +1,615 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import math
+import numpy as np
+import os
+from onnx import TensorProto, helper
+
+from finn.core.datatype import DataType
+from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
+from finn.custom_op.general.im2col import compute_conv_output_dim
+from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
+
+# This operation should only be used for 1D convolutions. Either the
+# IFMDim_H or IFMDim_W should be '1', which represents the so-called
+# dummy-dimension
+
+# ONNX i/o tensor shape assumptions for ConvolutionInputGenerator1D:
+# input 0 is the input tensor, shape NHWC = (1, IFMDim_H, IFMDim_W, IFMChannels)
+# output 0 is the output tensor, shape NHWC:
+#     = (1, OFMDim_H, OFMDim_W, (ConvKernelDim_H*ConvKernelDim_W)*IFMChannels)
+
+# note: the actual data layout produced by the hlslib kernels is different
+# for depthwise and non-depthwise ops.
+# * non-depthwise SWG: (1, OFMDim_H, OFMDim_W, K_H, K_W, IFMChannels/SIMD, SIMD)
+# * depthwise SWG: (1, OFMDim_H, OFMDim_W, IFMChannels/SIMD, K_H, K_W, SIMD)
+# see test_fpgadataflow_slidingwindow.py for an example of how to transform
+# between the two layouts
+
+
+class ConvolutionInputGenerator1D(HLSCustomOp):
+    """Class that corresponds to one of the 1D finn-hlslib ConvolutionInputGenerator
+    (sliding window) function variants. Depending on the combination of
+    attributes (e.g. depthwise or not, whether dilation is 0) a different
+    variant will be picked for the actual HLS implementation."""
+
+    def __init__(self, onnx_node):
+        super().__init__(onnx_node)
+
+    def get_nodeattr_types(self):
+        my_attrs = {
+            "ConvKernelDim": ("ints", True, []),  # [H, W] = [Y, X]
+            "IFMChannels": ("i", True, 0),
+            "IFMDim": ("ints", True, []),  # [H, W] = [Y, X]
+            "OFMDim": ("ints", True, []),  # [H, W] = [Y, X]
+            "SIMD": ("i", True, 0),
+            "Stride": ("ints", True, []),  # [H, W] = [Y, X]
+            "Dilation": ("ints", True, []),  # [H, W] = [Y, X]
+            # FINN DataTypes for inputs, weights, outputs
+            "inputDataType": ("s", True, ""),
+            "outputDataType": ("s", True, ""),
+            "depthwise": ("i", False, 0, {0, 1}),
+            # FPGA resource type for ConvolutionInputGenerator input buffer
+            # auto -- let Vivado HLS decide
+            # block -- use BRAM
+            # distributed -- use LUTRAM
+            # ultra -- use URAM
+            "ram_style": (
+                "s",
+                False,
+                "distributed",
+                {"auto", "block", "distributed", "ultra"},
+            ),
+        }
+        my_attrs.update(super().get_nodeattr_types())
+        return my_attrs
+
+    def get_normal_input_shape(self):
+        ifm_dim_h, ifm_dim_w = self.get_nodeattr("IFMDim")
+        ifm_ch = self.get_nodeattr("IFMChannels")
+        ishape = (1, ifm_dim_h, ifm_dim_w, ifm_ch)
+        return ishape
+
+    def get_folded_input_shape(self):
+        ifm_dim_h, ifm_dim_w = self.get_nodeattr("IFMDim")
+        ifm_ch = self.get_nodeattr("IFMChannels")
+        simd = self.get_nodeattr("SIMD")
+        assert ifm_ch % simd == 0, "SIMD must divide IFMChannels"
+        wf = int(ifm_ch / simd)
+        folded_ishape = (1, ifm_dim_h, ifm_dim_w, wf, simd)
+        return folded_ishape
+
+    def get_normal_output_shape(self):
+        k_h, k_w = self.get_nodeattr("ConvKernelDim")
+        ifm_dim_h, ifm_dim_w = self.get_nodeattr("IFMDim")
+        ifm_ch = self.get_nodeattr("IFMChannels")
+        stride_h, stride_w = self.get_nodeattr("Stride")
+        dilation_h, dilation_w = self.get_nodeattr("Dilation")
+        pad = 0
+        ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, pad, dilation_h)
+        ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, pad, dilation_w)
+        oshape = (1, ofm_dim_h, ofm_dim_w, k_h * k_w * ifm_ch)
+        return oshape
+
+    def get_folded_output_shape(self):
+        k_h, k_w = self.get_nodeattr("ConvKernelDim")
+        ifm_dim_h, ifm_dim_w = self.get_nodeattr("IFMDim")
+        ifm_ch = self.get_nodeattr("IFMChannels")
+        stride_h, stride_w = self.get_nodeattr("Stride")
+        dilation_h, dilation_w = self.get_nodeattr("Dilation")
+        simd = self.get_nodeattr("SIMD")
+        pad = 0
+        ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, pad, dilation_h)
+        ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, pad, dilation_w)
+        assert ifm_ch % simd == 0, "SIMD must divide IFMChannels"
+        wf = int((k_h * k_w * ifm_ch) // simd)
+        folded_oshape = (1, ofm_dim_h, ofm_dim_w, wf, simd)
+        return folded_oshape
+
+    def make_shape_compatible_op(self, model):
+        exp_ishape = self.get_normal_input_shape()
+        oshape = self.get_normal_output_shape()
+        ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0]))
+        assert ishape == exp_ishape, "Unexpect input shape for ConvInpGen."
+        # implement tensor with correct shape
+        values = np.random.randn(*oshape).astype(np.float32)
+        return helper.make_node(
+            "Constant",
+            inputs=[],
+            outputs=[self.onnx_node.output[0]],
+            value=helper.make_tensor(
+                name="const_tensor",
+                data_type=TensorProto.FLOAT,
+                dims=values.shape,
+                vals=values.flatten().astype(float),
+            ),
+        )
+
+    def infer_node_datatype(self, model):
+        node = self.onnx_node
+        # data type stays the same
+        dtype = model.get_tensor_datatype(node.input[0])
+        model.set_tensor_datatype(node.output[0], dtype)
+
+    def verify_node(self):
+        pass
+
+    def get_input_datatype(self):
+        """Returns FINN DataType of input."""
+        return DataType[self.get_nodeattr("inputDataType")]
+
+    def get_output_datatype(self):
+        """Returns FINN DataType of output."""
+        return DataType[self.get_nodeattr("outputDataType")]
+
+    def get_instream_width(self):
+        """Returns stream width, input and output stream width are equal for
+        the sliding window function"""
+        ibits = self.get_input_datatype().bitwidth()
+        simd = self.get_nodeattr("SIMD")
+        ifm_ch = self.get_nodeattr("IFMChannels")
+        assert ifm_ch % simd == 0, "SIMD must divide IFMChannels"
+        in_width = simd * ibits
+        return in_width
+
+    def get_outstream_width(self):
+        """Returns stream width, input and output stream width are equal for
+        the sliding window function, so the function to determine the input
+        stream width can be reused."""
+        return self.get_instream_width()
+
+    def get_number_output_values(self):
+        folded_oshape = self.get_folded_output_shape()
+        num_output_elems = np.prod(folded_oshape[:-1])
+        return num_output_elems
+
+    def get_1d_conv_attrs_normalized(self):
+        # support both (1, D) and (D, 1) cases transparently:
+        # For the kernel, presenting the input data of size D as
+        # [H, W] = [Y, X] = [1, D] or [D, 1]
+        # effectively gives the same result. Because the
+        # ConvolutionInputGenerator_NonSquare_Dilated(_dws) kernel currently only
+        # supports dilation>1 along the X-axis and the
+        # ConvolutionInputGenerator_NonSquare only works for stride>1 along the
+        # X-axis, we are working with the following assumption:
+        # the dummy ('1') dimension is the Y-dimension, i.e.
+        # images and kernels (and their attributes) of dimension
+        # [H, W] = [Y, X] = [D, 1] or [1, D] are always mapped to [1, D]
+        ifm_ch = self.get_nodeattr("IFMChannels")
+        k = self.get_nodeattr("ConvKernelDim")
+        ifm_dim = self.get_nodeattr("IFMDim")
+        ofm_dim = self.get_nodeattr("OFMDim")
+        stride = self.get_nodeattr("Stride")
+        dilation = self.get_nodeattr("Dilation")
+
+        # see defines() for an explanation
+        if ifm_dim[1] == 1:
+            ifm_dim = ifm_dim[::-1]
+            ofm_dim = ofm_dim[::-1]
+            k = k[::-1]
+            stride = stride[::-1]
+            dilation = dilation[::-1]
+
+        return (ifm_ch, ifm_dim, ofm_dim, k, stride, dilation)
+
+    def get_exp_cycles(self):
+        simd = self.get_nodeattr("SIMD")
+        (
+            ifm_ch,
+            ifm_dim,
+            ofm_dim,
+            k,
+            stride,
+            dilation,
+        ) = self.get_1d_conv_attrs_normalized()
+        ifm_dim_h, ifm_dim_w = ifm_dim
+        ofm_dim_h, ofm_dim_w = ofm_dim
+        k_h, k_w = k
+        stride_h, stride_w = stride
+        dilation_h, dilation_w = dilation
+
+        # since mmv != 1 is not supported yet, we set mmv for now to 1
+        mmv = 1
+        # see https://github.com/Xilinx/finn-hlslib/blob/master/slidingwindow.h
+        cycles_write_block = (ofm_dim_w * k_w * k_h * (ifm_ch / simd)) / mmv
+        cycles_read_block = stride_w * ifm_dim_w * (ifm_ch / simd)
+        max_cycles = max(cycles_write_block, cycles_read_block)
+        exp_cycles = (
+            ifm_dim_w * k_h * dilation_h * (ifm_ch / simd) + ofm_dim_h * max_cycles
+        )
+
+        return int(exp_cycles)
+
+    def bram_estimation(self):
+        # NOTE: not tested for correctness
+        simd = self.get_nodeattr("SIMD")
+        ifm_ch = self.get_nodeattr("IFMChannels")
+        ifm_dim = np.prod(self.get_nodeattr("IFMDim"))
+        k = np.prod(self.get_nodeattr("ConvKernelDim"))
+        stride = np.prod(self.get_nodeattr("Stride"))
+        ram_style = self.get_nodeattr("ram_style")
+        if ram_style == "block" or ram_style == "auto":
+            ram_depth = ifm_dim * ifm_ch / simd
+            if ram_depth <= 512:
+                ram_width = 36
+            elif ram_depth <= 1024:
+                ram_width = 18
+            elif ram_depth <= 2048:
+                ram_width = 9
+            elif ram_depth <= 4096:
+                ram_width = 4
+            elif ram_depth <= 8192:
+                ram_width = 2
+            else:
+                ram_width = 1
+            return int(
+                (k + stride)
+                * (
+                    math.ceil(simd * self.get_input_datatype().bitwidth() / ram_width)
+                    * math.ceil(ifm_dim * ifm_ch / simd / ram_depth)
+                )
+            )
+        else:
+            return 0
+
+    def lut_estimation(self):
+        # NOTE: not tested for correctness
+        simd = self.get_nodeattr("SIMD")
+        ifm_ch = self.get_nodeattr("IFMChannels")
+        ifm_dim = np.prod(self.get_nodeattr("IFMDim"))
+        k = np.prod(self.get_nodeattr("ConvKernelDim"))
+        stride = np.prod(self.get_nodeattr("Stride"))
+        ram_style = self.get_nodeattr("ram_style")
+        if ram_style == "distributed":
+            ram_luts = int(
+                (k + stride)
+                * (
+                    simd
+                    * self.get_input_datatype().bitwidth()
+                    * math.ceil(ifm_dim * ifm_ch / simd / 64)
+                )
+            )
+        else:
+            ram_luts = 0
+        return 300 + ram_luts
+
+    def uram_estimation(self):
+        # NOTE: not tested for correctness
+        simd = self.get_nodeattr("SIMD")
+        ifm_ch = self.get_nodeattr("IFMChannels")
+        ifm_dim = np.prod(self.get_nodeattr("IFMDim"))
+        k = np.prod(self.get_nodeattr("ConvKernelDim"))
+        stride = np.prod(self.get_nodeattr("Stride"))
+        ram_style = self.get_nodeattr("ram_style")
+        if ram_style == "ultra":
+            return int(
+                (k + stride)
+                * (
+                    math.ceil(simd * self.get_input_datatype().bitwidth() / 64)
+                    * math.ceil(ifm_dim * ifm_ch / simd / 4096)
+                )
+            )
+        else:
+            return 0
+
+    def execute_node(self, context, graph):
+        mode = self.get_nodeattr("exec_mode")
+        node = self.onnx_node
+        exp_ishape = self.get_normal_input_shape()
+        exp_oshape = self.get_normal_output_shape()
+        folded_ishape = self.get_folded_input_shape()
+        folded_oshape = self.get_folded_output_shape()
+
+        # TODO ensure codegen dir exists
+        if mode == "cppsim":
+            code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
+        elif mode == "rtlsim":
+            code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen")
+        else:
+            raise Exception(
+                """Invalid value for attribute exec_mode! Is currently set to: {}
+            has to be set to one of the following value ("cppsim", "rtlsim")""".format(
+                    mode
+                )
+            )
+
+        inp = context[node.input[0]]
+        assert str(inp.dtype) == "float32", "Input datatype is not float32"
+        assert (
+            inp.shape == exp_ishape
+        ), """Input shape doesn't
+        match expected shape (1, ifm_dim, ifm_dim, ifm_ch)."""
+        if self.get_input_datatype() == DataType.BIPOLAR:
+            # store bipolar activations as binary
+            inp = (inp + 1) / 2
+            export_idt = DataType.BINARY
+        else:
+            export_idt = self.get_input_datatype()
+        # reshape input into folded form
+        inp = inp.reshape(folded_ishape)
+        # make copy before saving array
+        reshaped_input = inp.copy()
+        np.save(os.path.join(code_gen_dir, "input_0.npy"), reshaped_input)
+
+        if mode == "cppsim":
+            # execute the precompiled model
+            super().exec_precompiled_singlenode_model()
+            # load output npy file
+            super().npy_to_dynamic_output(context)
+            assert (
+                context[node.output[0]].shape == folded_oshape
+            ), "cppsim \
+            did not produce expected ofolded utput shape"
+            context[node.output[0]] = context[node.output[0]].reshape(*exp_oshape)
+        elif mode == "rtlsim":
+            sim = self.get_rtlsim()
+            nbits = self.get_instream_width()
+            rtlsim_inp = npy_to_rtlsim_input(
+                "{}/input_0.npy".format(code_gen_dir), export_idt, nbits
+            )
+            super().reset_rtlsim(sim)
+            super().toggle_clk(sim)
+            rtlsim_output = self.rtlsim(sim, rtlsim_inp)
+            odt = export_idt
+            target_bits = odt.bitwidth()
+            packed_bits = self.get_outstream_width()
+            out_npy_path = "{}/output.npy".format(code_gen_dir)
+            out_shape = self.get_folded_output_shape()
+            rtlsim_output_to_npy(
+                rtlsim_output, out_npy_path, odt, out_shape, packed_bits, target_bits
+            )
+            # load and reshape output
+            output = np.load(out_npy_path)
+            output = np.asarray([output], dtype=np.float32).reshape(*exp_oshape)
+            context[node.output[0]] = output
+        else:
+            raise Exception(
+                """Invalid value for attribute exec_mode! Is currently set to: {}
+            has to be set to one of the following value ("cppsim", "rtlsim")""".format(
+                    mode
+                )
+            )
+        # binary -> bipolar if needed
+        if self.get_output_datatype() == DataType.BIPOLAR:
+            out = context[node.output[0]]
+            out = 2 * out - 1
+            context[node.output[0]] = out
+        assert (
+            context[node.output[0]].shape == exp_oshape
+        ), """Output
+        shape doesn't match expected shape (1, ofm_dim_h, ofm_dim_w, k_h*k_w*ifm_ch)."""
+
+    def global_includes(self):
+        self.code_gen_dict["$GLOBALS$"] = ['#include "slidingwindow.h"']
+
+    def defines(self, var):
+        numReps = 1
+        (
+            ifm_ch,
+            ifm_dim,
+            ofm_dim,
+            k,
+            stride,
+            dilation,
+        ) = self.get_1d_conv_attrs_normalized()
+        simd = self.get_nodeattr("SIMD")
+        ifm_precision = self.get_input_datatype().bitwidth()
+        ifm_dim_y, ifm_dim_x = ifm_dim
+        ofm_dim_y, ofm_dim_x = ofm_dim
+        k_y, k_x = k
+        dilation_y, dilation_x = dilation
+        # For a 1d convolution with stride=[S,1] or [1,S], the finn-hlslib function
+        # of ConvInpGen must be created with [stride_y, stride_x] = [S, S].
+        # TODO: changes in finn-hlslib (slidingwindow.h)
+        stride_y = np.prod(stride)
+        stride_x = np.prod(stride)
+
+        if dilation_x > 1:
+            assert (
+                dilation_y == 1
+            ), "Dilation value greater than 1 along y-axis is not yet supported"
+            self.code_gen_dict["$DEFINES$"] = [
+                """
+            #define ConvKernelDim1_x {}\n
+            #define ConvKernelDim1_y {}\n
+            #define IFMChannels1 {}\n
+            #define Input_precision1 {}\n
+            #define IFMDim1_x {}\n
+            #define IFMDim1_y {}\n
+            #define OFMDim1_x {}\n
+            #define OFMDim1_y {}\n
+            #define SIMD1 {}\n
+            #define Stride1_x {}\n
+            #define Stride1_y {}\n
+            #define Dilation1_x {}\n
+            #define Dilation1_y {}\n
+            #define numReps {}
+            """.format(
+                    k_x,
+                    k_y,
+                    ifm_ch,
+                    ifm_precision,
+                    ifm_dim_x,
+                    ifm_dim_y,
+                    ofm_dim_x,
+                    ofm_dim_y,
+                    simd,
+                    stride_x,
+                    stride_y,
+                    dilation_x,
+                    dilation_y,
+                    numReps,
+                )
+            ]
+        else:
+            ofm_dim = self.get_nodeattr("OFMDim")
+            self.code_gen_dict["$DEFINES$"] = [
+                """
+            #define ConvKernelDim1_x {}\n
+            #define ConvKernelDim1_y {}\n
+            #define IFMChannels1 {}\n
+            #define Input_precision1 {}\n
+            #define IFMDim1_x {}\n
+            #define IFMDim1_y {}\n
+            #define OFMDim1_x {}\n
+            #define OFMDim1_y {}\n
+            #define SIMD1 {}\n
+            #define Stride1_x {}\n
+            #define Stride1_y {}\n
+            #define numReps {}
+            """.format(
+                    k_x,
+                    k_y,
+                    ifm_ch,
+                    ifm_precision,
+                    ifm_dim_x,
+                    ifm_dim_y,
+                    ofm_dim_x,
+                    ofm_dim_y,
+                    simd,
+                    stride_x,
+                    stride_y,
+                    numReps,
+                )
+            ]
+
+    def read_npy_data(self):
+        code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
+        dtype = self.get_input_datatype()
+        if dtype == DataType.BIPOLAR:
+            # use binary for bipolar storage
+            dtype = DataType.BINARY
+        elem_bits = dtype.bitwidth()
+        packed_bits = self.get_instream_width()
+        packed_hls_type = "ap_uint<%d>" % packed_bits
+        elem_hls_type = dtype.get_hls_datatype_str()
+        npy_type = "float"
+        npy_in = "%s/input_0.npy" % code_gen_dir
+        self.code_gen_dict["$READNPYDATA$"] = []
+        self.code_gen_dict["$READNPYDATA$"].append(
+            'npy2apintstream<%s, %s, %d, %s>("%s", in0);'
+            % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in)
+        )
+
+    def strm_decl(self):
+        self.code_gen_dict["$STREAMDECLARATIONS$"] = []
+        self.code_gen_dict["$STREAMDECLARATIONS$"].append(
+            'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width())
+        )
+        self.code_gen_dict["$STREAMDECLARATIONS$"].append(
+            'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width())
+        )
+
+    def docompute(self):
+        ram_style = self.get_nodeattr("ram_style")
+        map_to_hls_ram_style = {
+            "auto": "ap_resource_dflt()",
+            "block": "ap_resource_bram()",
+            "distributed": "ap_resource_lutram()",
+            "ultra": "ap_resource_uram()",
+        }
+        hls_ram_style = map_to_hls_ram_style[ram_style]
+        hls_call = "ConvolutionInputGenerator"
+        # check which ConvolutionInputGenerator is needed
+        dilation_h, dilation_w = self.get_nodeattr("Dilation")
+
+        hls_call += "_NonSquare"
+        if dilation_h > 1 or dilation_w > 1:
+            hls_call += "_Dilated"
+            if self.get_nodeattr("depthwise") == 1:
+                hls_call += "_dws"
+            self.code_gen_dict["$DOCOMPUTE$"] = [
+                """{}<ConvKernelDim1_x, ConvKernelDim1_y, IFMChannels1, Input_precision1,
+                IFMDim1_x, IFMDim1_y, OFMDim1_x, OFMDim1_y, SIMD1, Stride1_x, Stride1_y,
+                Dilation1_x, Dilation1_y> (in0, out, numReps, {});""".format(
+                    hls_call, hls_ram_style
+                )
+            ]
+        elif self.get_nodeattr("depthwise") == 1:
+            hls_call += "_dws"
+            self.code_gen_dict["$DOCOMPUTE$"] = [
+                """{}<ConvKernelDim1_x, ConvKernelDim1_y, IFMChannels1, Input_precision1,
+                IFMDim1_x, IFMDim1_y, OFMDim1_x, OFMDim1_y, SIMD1, Stride1_x, Stride1_y>
+                (in0, out, numReps, {});""".format(
+                    hls_call, hls_ram_style
+                )
+            ]
+        else:
+            self.code_gen_dict["$DOCOMPUTE$"] = [
+                """{}<ConvKernelDim1_x, ConvKernelDim1_y, IFMChannels1, Input_precision1,
+                IFMDim1_x, IFMDim1_y, OFMDim1_x, OFMDim1_y, SIMD1, Stride1_x, Stride1_y>
+                (in0, out, numReps, {});""".format(
+                    hls_call, hls_ram_style
+                )
+            ]
+
+    def dataoutstrm(self):
+        code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
+        dtype = self.get_output_datatype()
+        if dtype == DataType.BIPOLAR:
+            # use binary for bipolar storage
+            dtype = DataType.BINARY
+        elem_bits = dtype.bitwidth()
+        packed_bits = self.get_outstream_width()
+        packed_hls_type = "ap_uint<%d>" % packed_bits
+        elem_hls_type = dtype.get_hls_datatype_str()
+        npy_type = "float"
+        npy_out = "%s/output.npy" % code_gen_dir
+        oshape = self.get_folded_output_shape()
+        oshape_cpp_str = str(oshape).replace("(", "{").replace(")", "}")
+
+        self.code_gen_dict["$DATAOUTSTREAM$"] = [
+            'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s");'
+            % (
+                packed_hls_type,
+                elem_hls_type,
+                elem_bits,
+                npy_type,
+                oshape_cpp_str,
+                npy_out,
+            )
+        ]
+
+    def save_as_npy(self):
+        self.code_gen_dict["$SAVEASCNPY$"] = []
+
+    def blackboxfunction(self):
+        self.code_gen_dict["$BLACKBOXFUNCTION$"] = [
+            """void {}(hls::stream<ap_uint<SIMD1*Input_precision1>> &in0,
+                hls::stream<ap_uint<SIMD1*Input_precision1>> &out)""".format(
+                self.onnx_node.name
+            )
+        ]
+
+    def pragmas(self):
+        self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"]
+        self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out")
+        self.code_gen_dict["$PRAGMAS$"].append(
+            "#pragma HLS INTERFACE ap_ctrl_none port=return"
+        )
diff --git a/src/finn/custom_op/fpgadataflow/downsampler.py b/src/finn/custom_op/fpgadataflow/downsampler.py
index 002f71aa30b4cf94c63d572e536999327eb2a527..2313ab28b41668b93a55298aa2b589dac999070e 100644
--- a/src/finn/custom_op/fpgadataflow/downsampler.py
+++ b/src/finn/custom_op/fpgadataflow/downsampler.py
@@ -1,10 +1,11 @@
-import os
 import numpy as np
+import os
+import warnings
 from onnx import TensorProto, helper
+
 from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
-import warnings
 
 
 class DownSampler(HLSCustomOp):
diff --git a/src/finn/custom_op/fpgadataflow/duplicatestreams_batch.py b/src/finn/custom_op/fpgadataflow/duplicatestreams_batch.py
index 764dee98e278a1d5088419fdb3fedd64a55e17ee..88be0cab26176b3b16524063928c7e0e48b136a2 100644
--- a/src/finn/custom_op/fpgadataflow/duplicatestreams_batch.py
+++ b/src/finn/custom_op/fpgadataflow/duplicatestreams_batch.py
@@ -26,13 +26,13 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
-
 import numpy as np
+import os
 import warnings
+from onnx import TensorProto, helper
+
 from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
-from onnx import helper, TensorProto
 from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
 
 
diff --git a/src/finn/custom_op/fpgadataflow/fmpadding_batch.py b/src/finn/custom_op/fpgadataflow/fmpadding_batch.py
index 27dfab54ec6d483d948dd383e54a44117d7c1a65..ca0b2f12ab6e84bab0b87e5a34917619c2ba289d 100644
--- a/src/finn/custom_op/fpgadataflow/fmpadding_batch.py
+++ b/src/finn/custom_op/fpgadataflow/fmpadding_batch.py
@@ -1,10 +1,11 @@
-import os
 import numpy as np
+import os
+import warnings
 from onnx import TensorProto, helper
+
 from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
-import warnings
 
 
 class FMPadding_Batch(HLSCustomOp):
@@ -17,9 +18,17 @@ class FMPadding_Batch(HLSCustomOp):
     def get_nodeattr_types(self):
         my_attrs = {
             # spatial size of input images
-            "ImgDim": ("i", True, 0),
+            "ImgDim": ("ints", True, []),  # [H, W] = [Y, X]
             # total padding (per dimension) to apply
-            "Padding": ("i", True, 2),
+            # NOTE: Current padding scheme that is applied tries to pad the same
+            # amount of zeros in front and behind the image for each dimension.
+            # As an example, a padding scheme such as [1, x, 3, x] is equal
+            # to [2, x, 2, x]
+            "Padding": (
+                "ints",
+                True,
+                [1, 1, 1, 1],
+            ),  # [H_begin, W_begin, H_end, W_end] = [Y_begin, X_begin, Y_end, X_end]
             # number of channels in input image
             "NumChannels": ("i", True, 0),
             # SIMD Input parallelism
@@ -38,31 +47,33 @@ class FMPadding_Batch(HLSCustomOp):
 
     def get_padded_odim(self):
         "Return the padded spatial size of the output."
-
-        idim = self.get_nodeattr("ImgDim")
+        idim_h, idim_w = self.get_nodeattr("ImgDim")
         pad = self.get_nodeattr("Padding")
-        return idim + pad
+        pad_h = pad[0] + pad[2]
+        pad_w = pad[1] + pad[3]
+        odim_h = idim_h + pad_h
+        odim_w = idim_w + pad_w
+        return [odim_h, odim_w]
 
     def get_exp_cycles(self):
-        odim = self.get_padded_odim()
+        odim_h, odim_w = self.get_padded_odim()
         channels = self.get_nodeattr("NumChannels")
         simd = self.get_nodeattr("SIMD")
         batch_size = self.get_nodeattr("numInputVectors")
-        exp_cycles = (channels / simd) * batch_size * odim * odim
+        exp_cycles = (channels / simd) * batch_size * odim_h * odim_w
         return int(exp_cycles)
 
     def get_normal_input_shape(self):
-        idim = self.get_nodeattr("ImgDim")
+        idim_h, idim_w = self.get_nodeattr("ImgDim")
         num_ch = self.get_nodeattr("NumChannels")
-
-        ishape = (1, idim, idim, num_ch)
+        ishape = (1, idim_h, idim_w, num_ch)
         return ishape
 
     def get_normal_output_shape(self):
-        odim = self.get_padded_odim()
+        odim_h, odim_w = self.get_padded_odim()
         num_ch = self.get_nodeattr("NumChannels")
 
-        oshape = (1, odim, odim, num_ch)
+        oshape = (1, odim_h, odim_w, num_ch)
         return oshape
 
     def get_folded_input_shape(self):
@@ -148,20 +159,53 @@ class FMPadding_Batch(HLSCustomOp):
         self.code_gen_dict["$GLOBALS$"] = ['#include "streamtools.h"']
 
     def defines(self, var):
-        self.code_gen_dict["$DEFINES$"] = [
-            """#define ImgDim1 {}\n#define OutputDim1 {}\n
-            #define Padding1 {}\n#define NumChannels1 {}\n
-            #define PaddingStyle1 {}\n#define numReps {}
-            #define SIMD1 {}\n""".format(
-                self.get_nodeattr("ImgDim"),
-                self.get_padded_odim(),
-                self.get_nodeattr("Padding"),
-                self.get_nodeattr("NumChannels"),
-                self.get_nodeattr("PaddingStyle"),
-                self.get_nodeattr("numInputVectors"),
-                self.get_nodeattr("SIMD"),
-            )
-        ]
+        idim_h, idim_w = self.get_nodeattr("ImgDim")
+        odim_h, odim_w = self.get_padded_odim()
+        pad = self.get_nodeattr("Padding")
+        pad_h = pad[0] + pad[2]
+        pad_w = pad[1] + pad[3]
+        is_square = idim_h == idim_w
+
+        if is_square:
+            assert (
+                pad_h == pad_w
+            ), "Only equal padding along the dimensions for square images is supported"
+            self.code_gen_dict["$DEFINES$"] = [
+                """#define ImgDim1 {}\n#define OutputDim1 {}\n
+                #define Padding1 {}\n#define NumChannels1 {}\n
+                #define SIMD1 {}\n#define PaddingStyle1 {}\n
+                #define numReps {}\n""".format(
+                    idim_h,
+                    odim_h,
+                    pad_h,
+                    self.get_nodeattr("NumChannels"),
+                    self.get_nodeattr("SIMD"),
+                    self.get_nodeattr("PaddingStyle"),
+                    self.get_nodeattr("numInputVectors"),
+                )
+            ]
+        else:
+            self.code_gen_dict["$DEFINES$"] = [
+                """
+                #define OutputDim1_x {}\n
+                #define OutputDim1_y {}\n
+                #define Padding1_x {}\n
+                #define Padding1_y {}\n
+                #define NumChannels1 {}\n
+                #define SIMD1 {}\n
+                #define PaddingStyle1 {}\n
+                #define numReps {}\n
+                """.format(
+                    odim_w,
+                    odim_h,
+                    pad_w,
+                    pad_h,
+                    self.get_nodeattr("NumChannels"),
+                    self.get_nodeattr("SIMD"),
+                    self.get_nodeattr("PaddingStyle"),
+                    self.get_nodeattr("numInputVectors"),
+                )
+            ]
 
     def read_npy_data(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
@@ -193,12 +237,26 @@ class FMPadding_Batch(HLSCustomOp):
     def docompute(self):
         in_t = self.get_input_datatype().get_hls_datatype_str()
         node = self.onnx_node
-        self.code_gen_dict["$DOCOMPUTE$"] = [
-            """{}<ImgDim1, OutputDim1, Padding1, NumChannels1,SIMD1,
-            {}, PaddingStyle1> (in0, out, numReps);""".format(
-                node.op_type, in_t
-            )
-        ]
+
+        idim_h, idim_w = self.get_nodeattr("ImgDim")
+        is_square = idim_h == idim_w
+
+        if is_square:
+            hls_call = node.op_type
+            self.code_gen_dict["$DOCOMPUTE$"] = [
+                """{}<ImgDim1, OutputDim1, Padding1, NumChannels1,SIMD1,
+                {}, PaddingStyle1> (in0, out, numReps);""".format(
+                    hls_call, in_t
+                )
+            ]
+        else:
+            hls_call = "FMPadding_nonsquare_Batch"
+            self.code_gen_dict["$DOCOMPUTE$"] = [
+                """{}<OutputDim1_x, OutputDim1_y, Padding1_x, Padding1_y, NumChannels1,
+                SIMD1, {}, PaddingStyle1> (in0, out, numReps);""".format(
+                    hls_call, in_t
+                )
+            ]
 
     def dataoutstrm(self):
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
@@ -270,7 +328,7 @@ class FMPadding_Batch(HLSCustomOp):
         assert (
             inp.shape == exp_ishape
         ), """Input shape doesn't
-        match expected shape (1, ImgDim, ImgDim, NumChannels)."""
+        match expected shape (1, ImgDim_h, ImgDim_w, NumChannels)."""
         export_idt = self.get_input_datatype()
 
         reshaped_input = inp.reshape(folded_ishape)
@@ -316,4 +374,4 @@ class FMPadding_Batch(HLSCustomOp):
         assert (
             context[node.output[0]].shape == exp_oshape
         ), """Output shape doesn't match expected shape
-            (1, OutputDim, OutputDim, NumChannels)."""
+            (1, OutputDim_H, OutputDim_W, NumChannels)."""
diff --git a/src/finn/custom_op/fpgadataflow/globalaccpool_batch.py b/src/finn/custom_op/fpgadataflow/globalaccpool_batch.py
index 8cc71ce9eb57c2dcf1f743a7b96e501ab833f6cd..eabdcf599d23d35ed13069cb81afa3ec4999e8e7 100644
--- a/src/finn/custom_op/fpgadataflow/globalaccpool_batch.py
+++ b/src/finn/custom_op/fpgadataflow/globalaccpool_batch.py
@@ -26,13 +26,13 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
-
 import numpy as np
+import os
 import warnings
+from onnx import TensorProto, helper
+
 from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
-from onnx import TensorProto, helper
 from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
 
 
diff --git a/src/finn/custom_op/fpgadataflow/hlscustomop.py b/src/finn/custom_op/fpgadataflow/hlscustomop.py
index 2af08b920804881211dae81c61f88a913c315e77..c94e430097ec381a0029d9bb1800909e8cc47f6c 100644
--- a/src/finn/custom_op/fpgadataflow/hlscustomop.py
+++ b/src/finn/custom_op/fpgadataflow/hlscustomop.py
@@ -27,22 +27,24 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 # namespace package, extend path
 
-from abc import abstractmethod
 import numpy as np
 import os
 import subprocess
+from abc import abstractmethod
+
 from finn.custom_op.base import CustomOp
 from finn.util.basic import (
     CppBuilder,
+    get_rtlsim_trace_depth,
     make_build_dir,
     roundup_to_integer_multiple,
-    get_rtlsim_trace_depth,
 )
+from finn.util.hls import CallHLS
 from finn.util.pyverilator import (
     pyverilate_get_liveness_threshold_cycles,
     rtlsim_multi_io,
 )
-from finn.util.hls import CallHLS
+
 from . import templates
 
 try:
diff --git a/src/finn/custom_op/fpgadataflow/iodma.py b/src/finn/custom_op/fpgadataflow/iodma.py
index d812d018fbeb062da8414cceb9f3dc13ddf8f52d..cc1f6722ddb717ad518ca16e71a39f0b9747db62 100644
--- a/src/finn/custom_op/fpgadataflow/iodma.py
+++ b/src/finn/custom_op/fpgadataflow/iodma.py
@@ -26,12 +26,13 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import numpy as np
 import math
+import numpy as np
+import warnings
 from onnx import TensorProto, helper
+
 from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
-import warnings
 
 # the IODMA inerfaces a memory-mapped AXI interface and an AXI stream
 # direction "in": pulls data from AXI-MM to AXI stream
diff --git a/src/finn/custom_op/fpgadataflow/labelselect_batch.py b/src/finn/custom_op/fpgadataflow/labelselect_batch.py
index 1640e2f27c4672449775fa1c6f2d9b9745e305c4..d70d0f6a9b0cacb491ce748b84c8c7c474605170 100644
--- a/src/finn/custom_op/fpgadataflow/labelselect_batch.py
+++ b/src/finn/custom_op/fpgadataflow/labelselect_batch.py
@@ -26,15 +26,14 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
-
 import numpy as np
+import os
+from onnx import TensorProto, helper
 
 from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
-from onnx import TensorProto, helper
-from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
 from finn.util.basic import roundup_to_integer_multiple
+from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
 
 
 class LabelSelect_Batch(HLSCustomOp):
diff --git a/src/finn/custom_op/fpgadataflow/pool_batch.py b/src/finn/custom_op/fpgadataflow/pool_batch.py
index edba084b5258de37198520257e438f90f8cc65e3..cef964acd5192ad254e1086dacead590b51e7ec1 100644
--- a/src/finn/custom_op/fpgadataflow/pool_batch.py
+++ b/src/finn/custom_op/fpgadataflow/pool_batch.py
@@ -26,12 +26,12 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
 import numpy as np
+import os
+from onnx import TensorProto, helper
 
-from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.core.datatype import DataType
-from onnx import TensorProto, helper
+from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
 
 
diff --git a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py
index b276d33876b488073f94a9e58030e4a6ce0a12e3..3caececce8ad3452a959a8516d1d9704fc0241fa 100644
--- a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py
+++ b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py
@@ -26,13 +26,14 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
-import numpy as np
 import math
+import numpy as np
+import os
 import warnings
-from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
-from finn.core.datatype import DataType
 from onnx import TensorProto, helper
+
+from finn.core.datatype import DataType
+from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
 
 # does not do anything at the ONNX node-by-node level, and input-output
diff --git a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
index bdcbf995280c3fb641e91d303c714982a98094f5..cf72585392e0a73a5460fb146fee514d37e087e0 100644
--- a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
+++ b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
@@ -26,27 +26,28 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import warnings
 import math
-import os
 import numpy as np
-
+import os
+import textwrap
+import warnings
 from onnx import TensorProto, helper
+
 from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.util.basic import (
+    calculate_matvec_accumulator_range,
     interleave_matrix_outer_dim_from_partitions,
     roundup_to_integer_multiple,
-    calculate_matvec_accumulator_range,
 )
 from finn.util.data_packing import (
     npy_to_rtlsim_input,
     numpy_to_hls_code,
-    rtlsim_output_to_npy,
     pack_innermost_dim_as_hex_string,
+    rtlsim_output_to_npy,
 )
+
 from . import templates
-import textwrap
 
 # ONNX i/o tensor shape assumptions for StreamingFCLayer:
 # input 0 is the input tensor, shape (.., i_size) = (..., MW)
@@ -238,8 +239,10 @@ class StreamingFCLayer_Batch(HLSCustomOp):
         mem_width = Q * W * P
         mmode = self.get_nodeattr("mem_mode")
         mstyle = self.get_nodeattr("ram_style")
-        if (mmode == "decoupled" and mstyle != "ultra") or (
-            mmode == "const" and self.calc_wmem() <= 128
+        if (
+            (mmode == "decoupled" and mstyle != "ultra")
+            or (mmode == "const" and self.calc_wmem() <= 128)
+            or (mmode == "external")
         ):
             return 0
         width_multiplier = math.ceil(mem_width / 72)
@@ -265,8 +268,10 @@ class StreamingFCLayer_Batch(HLSCustomOp):
         mem_width = Q * W * P
         mmode = self.get_nodeattr("mem_mode")
         mstyle = self.get_nodeattr("ram_style")
-        if (mmode == "decoupled" and mstyle in ["distributed", "ultra"]) or (
-            mmode == "const" and self.calc_wmem() <= 128
+        if (
+            (mmode == "decoupled" and mstyle in ["distributed", "ultra"])
+            or (mmode == "const" and self.calc_wmem() <= 128)
+            or (mmode == "external")
         ):
             return 0
         # assuming SDP mode RAMB18s (see UG573 Table 1-10)
@@ -602,9 +607,11 @@ class StreamingFCLayer_Batch(HLSCustomOp):
                     tdt = DataType.get_smallest_possible(0 - tdt_max)
             else:
                 tdt = DataType.get_smallest_possible(tdt_max)
-            assert np.vectorize(tdt.allowed)(threshold_tensor).all(), (
-                "Thresholds in %s can't be expressed with type %s"
-                % (self.onnx_node.name, str(tdt))
+            assert np.vectorize(tdt.allowed)(
+                threshold_tensor
+            ).all(), "Thresholds in %s can't be expressed with type %s" % (
+                self.onnx_node.name,
+                str(tdt),
             )
             self.set_nodeattr("accDataType", tdt.name)
         else:
@@ -841,9 +848,11 @@ class StreamingFCLayer_Batch(HLSCustomOp):
                 # get computed threshold datatype from attribute
                 tdt = DataType[self.get_nodeattr("accDataType")]
 
-                assert np.vectorize(tdt.allowed)(threshold_tensor).all(), (
-                    "Thresholds in %s can't be expressed with type %s"
-                    % (self.onnx_node.name, str(tdt))
+                assert np.vectorize(tdt.allowed)(
+                    threshold_tensor
+                ).all(), "Thresholds in %s can't be expressed with type %s" % (
+                    self.onnx_node.name,
+                    str(tdt),
                 )
                 thresholds_hls_code = numpy_to_hls_code(
                     threshold_tensor, tdt, "thresholds", False, True
@@ -1003,6 +1012,17 @@ class StreamingFCLayer_Batch(HLSCustomOp):
             self.code_gen_dict["$GLOBALS$"] += ['#include "thresh.h"']
 
     def defines(self, var):
+        # Only ipgen mode: Make sure that SIMD parameter satisfies minimum requirements.
+        if var == "ipgen":
+            SIMD = self.get_nodeattr("SIMD")
+            MW = self.get_nodeattr("MW")
+            condition = SIMD > (MW / 1024)
+            msg = (
+                f"HLS synthesis of StreamingFCLayer_Batch requires: "
+                f"SIMD > MW / 1024. This is not fulfilled with: SIMD={SIMD} "
+                f"and MW={MW} for node: {self.onnx_node.name}."
+            )
+            assert condition, msg
         mem_mode = self.get_nodeattr("mem_mode")
         numInputVectors = list(self.get_nodeattr("numInputVectors"))
         numReps = np.prod(numInputVectors)
diff --git a/src/finn/custom_op/fpgadataflow/streamingfifo.py b/src/finn/custom_op/fpgadataflow/streamingfifo.py
index 3a2aa5016519b4334ca7d233edb90c8a71826458..71be9e7b8dda0793f967736b5f4df3bd678b50cf 100644
--- a/src/finn/custom_op/fpgadataflow/streamingfifo.py
+++ b/src/finn/custom_op/fpgadataflow/streamingfifo.py
@@ -25,16 +25,16 @@
 # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-import os
+import math
 import numpy as np
-from shutil import copy
+import os
 import subprocess
-import math
 import warnings
+from onnx import TensorProto, helper
+from shutil import copy
 
-from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.core.datatype import DataType
-from onnx import TensorProto, helper
+from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
 
 from . import templates
diff --git a/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py b/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py
index 07e1197af54fe5267995cf15424a02df8a5e1500..edbc07300c02c87b47a67297501163766c4cb0dc 100644
--- a/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py
+++ b/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py
@@ -26,13 +26,14 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
 import numpy as np
+import os
 import warnings
+from onnx import TensorProto, helper
+
+from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.custom_op.general.im2col import compute_conv_output_dim
-from finn.core.datatype import DataType
-from onnx import TensorProto, helper
 from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
 
 
diff --git a/src/finn/custom_op/fpgadataflow/thresholding_batch.py b/src/finn/custom_op/fpgadataflow/thresholding_batch.py
index eb09d4c529e81908a389b5ec4bc3dabb3dcb95ef..3c82ea4439427be55441c50496bc4b4c7b62cfbc 100644
--- a/src/finn/custom_op/fpgadataflow/thresholding_batch.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_batch.py
@@ -26,13 +26,13 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from math import ceil, log2
-import textwrap
+import numpy as np
 import os
+import textwrap
 import warnings
-import numpy as np
-
+from math import ceil, log2
 from onnx import TensorProto, helper
+
 from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.util.basic import (
@@ -42,9 +42,10 @@ from finn.util.basic import (
 from finn.util.data_packing import (
     npy_to_rtlsim_input,
     numpy_to_hls_code,
-    rtlsim_output_to_npy,
     pack_innermost_dim_as_hex_string,
+    rtlsim_output_to_npy,
 )
+
 from . import templates
 
 # ONNX i/o tensor shape assumptions for Thresholding:
@@ -180,7 +181,7 @@ class Thresholding_Batch(HLSCustomOp):
             return 0
 
     def lut_estimation(self):
-        """Calculates LUT cost, taking memory resource type into account """
+        """Calculates LUT cost, taking memory resource type into account"""
         # TODO add in/out FIFO contributions
         style = self.get_nodeattr("ram_style")
         P = self.get_nodeattr("PE")
@@ -604,7 +605,9 @@ class Thresholding_Batch(HLSCustomOp):
         numReps = numInputVectors[0]
         self.code_gen_dict["$DEFINES$"] = [
             """#define NumChannels1 {}\n #define PE1 {}\n #define numReps {}""".format(
-                self.get_nodeattr("NumChannels"), self.get_nodeattr("PE"), numReps
+                self.get_nodeattr("NumChannels"),
+                self.get_nodeattr("PE"),
+                numReps,
             )
         ]
         if self.get_nodeattr("mem_mode") == "decoupled":
@@ -686,7 +689,10 @@ class Thresholding_Batch(HLSCustomOp):
             self.code_gen_dict["$DOCOMPUTE$"] = [
                 """{}<{}, NumChannels1, PE1, {}, {}>
                 (in0, out, threshs, numReps);""".format(
-                    node.op_type, imgdim, tmpl_args["TSrcI"], tmpl_args["TDstI"]
+                    node.op_type,
+                    imgdim,
+                    tmpl_args["TSrcI"],
+                    tmpl_args["TDstI"],
                 )
             ]
         elif mem_mode == "decoupled":
diff --git a/src/finn/custom_op/fpgadataflow/tlastmarker.py b/src/finn/custom_op/fpgadataflow/tlastmarker.py
index ea356944ecedb8b9278c94313887e7f556ff1b17..930cca72fd6e5f821d65ea810bbd9f9e5b31b29c 100644
--- a/src/finn/custom_op/fpgadataflow/tlastmarker.py
+++ b/src/finn/custom_op/fpgadataflow/tlastmarker.py
@@ -243,13 +243,14 @@ class TLastMarker(HLSCustomOp):
 
     def get_verilog_top_module_intf_names(self):
         intf_names = super().get_verilog_top_module_intf_names()
+        stream_width = self.get_nodeattr("StreamWidth")
         sname = self.hls_sname()
         if self.get_nodeattr("Direction") == "in":
-            intf_names["s_axis"] = ["in0"]
-            intf_names["m_axis"] = ["out_" + sname]
+            intf_names["s_axis"] = [("in0", stream_width)]
+            intf_names["m_axis"] = [("out_" + sname, stream_width)]
         else:
-            intf_names["s_axis"] = ["in0_" + sname]
-            intf_names["m_axis"] = ["out_r"]
+            intf_names["s_axis"] = [("in0_" + sname, stream_width)]
+            intf_names["m_axis"] = [("out_r", stream_width)]
         if self.get_nodeattr("DynIters") == 1:
             intf_names["axilite"] = ["s_axi_control"]
         return intf_names
diff --git a/src/finn/custom_op/fpgadataflow/vector_vector_activate_batch.py b/src/finn/custom_op/fpgadataflow/vector_vector_activate_batch.py
index 9a897d9fa16064017dfc02f500d2360ae8431b4a..921be6fdfa8839239bb6e746112ed30477b8f529 100644
--- a/src/finn/custom_op/fpgadataflow/vector_vector_activate_batch.py
+++ b/src/finn/custom_op/fpgadataflow/vector_vector_activate_batch.py
@@ -1,20 +1,21 @@
-import os
-import numpy as np
 import math
+import numpy as np
+import os
+import warnings
 from onnx import TensorProto, helper
+
 from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp
 from finn.util.basic import (
+    calculate_matvec_accumulator_range,
     interleave_matrix_outer_dim_from_partitions,
     roundup_to_integer_multiple,
-    calculate_matvec_accumulator_range,
 )
 from finn.util.data_packing import (
     npy_to_rtlsim_input,
     numpy_to_hls_code,
     rtlsim_output_to_npy,
 )
-import warnings
 
 
 class Vector_Vector_Activate_Batch(HLSCustomOp):
@@ -26,9 +27,9 @@ class Vector_Vector_Activate_Batch(HLSCustomOp):
     def get_nodeattr_types(self):
         my_attrs = {
             "PE": ("i", True, 0),
-            "Dim": ("i", True, 0),
+            "Dim": ("ints", True, []),  # [H, W]
             "Channels": ("i", True, 0),
-            "Kernel": ("i", True, 0),
+            "Kernel": ("ints", True, []),  # [H, W]
             "resType": ("s", False, "auto", {"auto", "lut", "dsp"}),
             "ActVal": ("i", False, 0),
             # FINN DataTypes for inputs, weights, outputs
@@ -45,10 +46,10 @@ class Vector_Vector_Activate_Batch(HLSCustomOp):
 
     def minimize_accumulator_width(self, model):
         weights = model.get_initializer(self.onnx_node.input[1])
-        k = self.get_nodeattr("Kernel")
+        k_h, k_w = self.get_nodeattr("Kernel")
         fm = self.get_nodeattr("Channels")
         # put weights into the shape expected by calculate_matvec_accumulator_range
-        weights = weights.reshape(fm, k * k).transpose()
+        weights = weights.reshape(fm, k_h * k_w).transpose()
         if len(self.onnx_node.input) > 2:
             thresholds = model.get_initializer(self.onnx_node.input[2])
         else:
@@ -85,9 +86,11 @@ class Vector_Vector_Activate_Batch(HLSCustomOp):
                     tdt = DataType.get_smallest_possible(0 - tdt_max)
             else:
                 tdt = DataType.get_smallest_possible(tdt_max)
-            assert np.vectorize(tdt.allowed)(threshold_tensor).all(), (
-                "Thresholds in %s can't be expressed with type %s"
-                % (self.onnx_node.name, str(tdt))
+            assert np.vectorize(tdt.allowed)(
+                threshold_tensor
+            ).all(), "Thresholds in %s can't be expressed with type %s" % (
+                self.onnx_node.name,
+                str(tdt),
             )
             self.set_nodeattr("accDataType", tdt.name)
         else:
@@ -110,9 +113,9 @@ class Vector_Vector_Activate_Batch(HLSCustomOp):
     def calc_wmem(self):
         """Calculates and returns WMEM."""
         ch = self.get_nodeattr("Channels")
-        k = self.get_nodeattr("Kernel")
+        k_h, k_w = self.get_nodeattr("Kernel")
         pe = self.get_nodeattr("PE")
-        wmem = k * k * ch // pe
+        wmem = k_h * k_w * ch // pe
         return wmem
 
     def calc_tmem(self):
@@ -181,34 +184,34 @@ class Vector_Vector_Activate_Batch(HLSCustomOp):
         return out_width
 
     def get_folded_input_shape(self):
-        k = self.get_nodeattr("Kernel")
-        sf = k * k
-        dim = self.get_nodeattr("Dim")
+        k_h, k_w = self.get_nodeattr("Kernel")
+        sf = k_h * k_w
+        dim_h, dim_w = self.get_nodeattr("Dim")
         ch = self.get_nodeattr("Channels")
         pe = self.get_nodeattr("PE")
         nf = ch // pe
-        folded_input_shape = tuple([1, dim, dim, sf * nf, pe])
+        folded_input_shape = tuple([1, dim_h, dim_w, sf * nf, pe])
         return folded_input_shape
 
     def get_folded_output_shape(self):
         ch = self.get_nodeattr("Channels")
         pe = self.get_nodeattr("PE")
         nf = ch // pe
-        dim = self.get_nodeattr("Dim")
-        folded_output_shape = tuple([1, dim, dim, nf, pe])
+        dim_h, dim_w = self.get_nodeattr("Dim")
+        folded_output_shape = tuple([1, dim_h, dim_w, nf, pe])
         return folded_output_shape
 
     def get_normal_input_shape(self):
-        dim = self.get_nodeattr("Dim")
+        dim_h, dim_w = self.get_nodeattr("Dim")
         ch = self.get_nodeattr("Channels")
-        k = self.get_nodeattr("Kernel")
-        normal_input_shape = tuple([1, dim, dim, k * k * ch])
+        k_h, k_w = self.get_nodeattr("Kernel")
+        normal_input_shape = tuple([1, dim_h, dim_w, k_h * k_w * ch])
         return normal_input_shape
 
     def get_normal_output_shape(self):
         ch = self.get_nodeattr("Channels")
-        dim = self.get_nodeattr("Dim")
-        normal_output_shape = tuple([1, dim, dim, ch])
+        dim_h, dim_w = self.get_nodeattr("Dim")
+        normal_output_shape = tuple([1, dim_h, dim_w, ch])
         return normal_output_shape
 
     def get_number_output_values(self):
@@ -218,13 +221,13 @@ class Vector_Vector_Activate_Batch(HLSCustomOp):
     def get_exp_cycles(self):
         pe = self.get_nodeattr("PE")
         ch = self.get_nodeattr("Channels")
-        dim = self.get_nodeattr("Dim")
-        k = self.get_nodeattr("Kernel")
+        dim_h, dim_w = self.get_nodeattr("Dim")
+        k_h, k_w = self.get_nodeattr("Kernel")
         # currently FINN supports for vvau a batch size of 1
         batch_size = 1
         # since mmv != 1 is not supported yet, we set mmv for now to 1
         mmv = 1
-        exp_cycles = ((ch * k * k) / pe) * batch_size * (dim * dim) / mmv
+        exp_cycles = ((ch * k_h * k_w) / pe) * batch_size * (dim_h * dim_w) / mmv
         return int(exp_cycles)
 
     def get_template_param_values(self):
@@ -251,17 +254,17 @@ class Vector_Vector_Activate_Batch(HLSCustomOp):
     def get_hls_compatible_weight_tensor(self, orig_weight_matrix):
         pe = self.get_nodeattr("PE")
         ch = self.get_nodeattr("Channels")
-        k = self.get_nodeattr("Kernel")
+        k_h, k_w = self.get_nodeattr("Kernel")
         wmem = self.calc_wmem()
         assert orig_weight_matrix.shape == (
             ch,
             1,
-            k,
-            k,
+            k_h,
+            k_w,
         ), """Weights matrix doesn't
         have expected shape (channels, 1, kernel_size, kernel_size)"""
         ret = orig_weight_matrix
-        ret = ret.reshape(ch, k * k)
+        ret = ret.reshape(ch, k_h * k_w)
         # distribute rows between PEs
         ret = interleave_matrix_outer_dim_from_partitions(ret, pe)
         ret = ret.reshape(1, pe, wmem, 1)
@@ -338,9 +341,11 @@ class Vector_Vector_Activate_Batch(HLSCustomOp):
                 threshold_tensor = self.get_hls_compatible_threshold_tensor(thresholds)
                 # get computed threshold datatype from attribute
                 tdt = DataType[self.get_nodeattr("accDataType")]
-                assert np.vectorize(tdt.allowed)(threshold_tensor).all(), (
-                    "Thresholds in %s can't be expressed with type %s"
-                    % (self.onnx_node.name, str(tdt))
+                assert np.vectorize(tdt.allowed)(
+                    threshold_tensor
+                ).all(), "Thresholds in %s can't be expressed with type %s" % (
+                    self.onnx_node.name,
+                    str(tdt),
                 )
                 thresholds_hls_code = numpy_to_hls_code(
                     threshold_tensor, tdt, "thresholds", False, True
@@ -455,10 +460,10 @@ class Vector_Vector_Activate_Batch(HLSCustomOp):
             self.code_gen_dict["$GLOBALS$"] += ['#include "thresh.h"']
 
     def defines(self, var):
-        dim = self.get_nodeattr("Dim")
-        numReps = 1 * dim * dim
-        kernel = self.get_nodeattr("Kernel")
-        innerProdDim = kernel * kernel
+        dim_h, dim_w = self.get_nodeattr("Dim")
+        numReps = 1 * dim_h * dim_w
+        k_h, k_w = self.get_nodeattr("Kernel")
+        innerProdDim = k_h * k_w
         self.code_gen_dict["$DEFINES$"] = [
             """#define Channels1 {}\n #define InnerProdDim {}\n
             #define SIMD1 1\n #define PE1 {}\n #define numReps {}""".format(
@@ -664,8 +669,8 @@ class Vector_Vector_Activate_Batch(HLSCustomOp):
         else:
             mult_luts = (2 * math.ceil((W + A) / 6) - 1) * (W + A)
         # accumulator
-        k = self.get_nodeattr("Kernel")
-        acc_bits = W + A + math.ceil(math.log(k * k, 2))
+        k_h, k_w = self.get_nodeattr("Kernel")
+        acc_bits = W + A + math.ceil(math.log(k_h * k_w, 2))
         acc_luts = acc_bits
         # thresholds and threshold comparators
         thr_luts = 0
@@ -694,20 +699,20 @@ class Vector_Vector_Activate_Batch(HLSCustomOp):
         return int(mult_dsp)
 
     def get_op_and_param_counts(self):
-        k = self.get_nodeattr("Kernel")
+        k_h, k_w = self.get_nodeattr("Kernel")
         fm = self.get_nodeattr("Channels")
-        dim = self.get_nodeattr("Dim")
+        dim_h, dim_w = self.get_nodeattr("Dim")
         weight_bits = self.get_weight_datatype().bitwidth()
         inp_bits = self.get_input_datatype().bitwidth()
-        num_repetitions = int(dim * dim)
-        mac_count = k * k * fm * num_repetitions
+        num_repetitions = int(dim_h * dim_w)
+        mac_count = k_h * k_w * fm * num_repetitions
         # cannonicalize op type: highest bitwidth operand first s.t.
         # e.g. mac_8bx4b and mac_4bx8b don't appear as two different op types
         bw1 = min(inp_bits, weight_bits)
         bw2 = max(inp_bits, weight_bits)
         mac_op_type = "op_mac_%dbx%db" % (bw1, bw2)
         weight_param_type = "param_weight_%db" % (weight_bits)
-        weight_count = k * k * fm
+        weight_count = k_h * k_w * fm
         ret_dict = {mac_op_type: mac_count, weight_param_type: weight_count}
         if self.get_nodeattr("noActivation") == 0:
             tdt = DataType[self.get_nodeattr("accDataType")]
diff --git a/src/finn/qnn-data/cybsec-mlp/validate-unsw-nb15.py b/src/finn/qnn-data/cybsec-mlp/validate-unsw-nb15.py
index 2fabc716a66a3cc24697e49aa26ec3bbbb231b43..be09abad9c10b8b5e9a32e21233107421fdef95e 100644
--- a/src/finn/qnn-data/cybsec-mlp/validate-unsw-nb15.py
+++ b/src/finn/qnn-data/cybsec-mlp/validate-unsw-nb15.py
@@ -27,9 +27,9 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import argparse
+import numpy as np
 from driver import io_shape_dict
 from driver_base import FINNExampleOverlay
-import numpy as np
 
 
 def make_unsw_nb15_test_batches(bsize, dataset_root, limit_batches):
diff --git a/src/finn/qnn-data/templates/driver/driver_base.py b/src/finn/qnn-data/templates/driver/driver_base.py
index df3c9881372659a4d8f6fceb8a385e6055c161e1..4dd5a080e10e4a0ab5bd14381186e19144f6edb3 100644
--- a/src/finn/qnn-data/templates/driver/driver_base.py
+++ b/src/finn/qnn-data/templates/driver/driver_base.py
@@ -27,19 +27,18 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import numpy as np
-import time
 import os
+import time
 from pynq import Overlay, allocate
 from pynq.ps import Clocks
 
+from finn.core.datatype import DataType
+from finn.util.basic import gen_finn_dt_tensor
 from finn.util.data_packing import (
     finnpy_to_packed_bytearray,
     packed_bytearray_to_finnpy,
 )
 
-from finn.util.basic import gen_finn_dt_tensor
-from finn.core.datatype import DataType
-
 # Driver base class for FINN-generated dataflow accelerators.
 # The particulars of the generated accelerator are specified via the
 # io_shape_dict (generated by the MakePYNQDriver transformation).
diff --git a/src/finn/qnn-data/templates/driver/validate.py b/src/finn/qnn-data/templates/driver/validate.py
index 4aa7d67aa162e91b878d387bee1457e4b477e635..001744cba2b59f6d1a0a67fca3e2ad9668a519c0 100644
--- a/src/finn/qnn-data/templates/driver/validate.py
+++ b/src/finn/qnn-data/templates/driver/validate.py
@@ -27,9 +27,9 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import argparse
+import numpy as np
 from driver import io_shape_dict
 from driver_base import FINNExampleOverlay
-import numpy as np
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(
diff --git a/src/finn/transformation/fpgadataflow/annotate_cycles.py b/src/finn/transformation/fpgadataflow/annotate_cycles.py
index 2c547203df94308b929a7989f1a9102bd3002fed..5ab491dd1031cfec64308aee678edc9c94aa6da2 100644
--- a/src/finn/transformation/fpgadataflow/annotate_cycles.py
+++ b/src/finn/transformation/fpgadataflow/annotate_cycles.py
@@ -27,10 +27,10 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import finn.custom_op.registry as registry
-from finn.transformation.base import Transformation
-from finn.transformation.move_reshape import _is_fpgadataflow_node
 from finn.core.modelwrapper import ModelWrapper
 from finn.custom_op.registry import getCustomOp
+from finn.transformation.base import Transformation
+from finn.transformation.move_reshape import _is_fpgadataflow_node
 
 
 class AnnotateCycles(Transformation):
diff --git a/src/finn/transformation/fpgadataflow/annotate_resources.py b/src/finn/transformation/fpgadataflow/annotate_resources.py
index 4e501510110ef724c6a67f6214654b5454b30a77..d9089cbeba6e0791f6d8375e28b2c2d99b506eda 100644
--- a/src/finn/transformation/fpgadataflow/annotate_resources.py
+++ b/src/finn/transformation/fpgadataflow/annotate_resources.py
@@ -27,13 +27,13 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import finn.custom_op.registry as registry
-from finn.transformation.base import Transformation
-from finn.transformation.move_reshape import _is_fpgadataflow_node
-from finn.analysis.fpgadataflow.res_estimation import res_estimation
 from finn.analysis.fpgadataflow.hls_synth_res_estimation import hls_synth_res_estimation
 from finn.analysis.fpgadataflow.post_synth_res import post_synth_res
+from finn.analysis.fpgadataflow.res_estimation import res_estimation
 from finn.core.modelwrapper import ModelWrapper
 from finn.custom_op.registry import getCustomOp
+from finn.transformation.base import Transformation
+from finn.transformation.move_reshape import _is_fpgadataflow_node
 
 
 class AnnotateResources(Transformation):
diff --git a/src/finn/transformation/fpgadataflow/cleanup.py b/src/finn/transformation/fpgadataflow/cleanup.py
index 5dbe5f0517d07bef07e5ecff6e4c7afff0293d86..f59f4bdeab72a5af9615ecf308306e4fb4b69fb5 100644
--- a/src/finn/transformation/fpgadataflow/cleanup.py
+++ b/src/finn/transformation/fpgadataflow/cleanup.py
@@ -30,8 +30,8 @@ import os
 import shutil
 
 import finn.custom_op.registry as registry
-from finn.util.fpgadataflow import is_fpgadataflow_node
 from finn.transformation.base import Transformation
+from finn.util.fpgadataflow import is_fpgadataflow_node
 
 
 class CleanUp(Transformation):
diff --git a/src/finn/transformation/fpgadataflow/compile_cppsim.py b/src/finn/transformation/fpgadataflow/compile_cppsim.py
index 6321b3335907948fb49de966c80eb21637e0a6ec..5f7c534b4561ffc0fac0c8c2b6160279f4e34fbc 100644
--- a/src/finn/transformation/fpgadataflow/compile_cppsim.py
+++ b/src/finn/transformation/fpgadataflow/compile_cppsim.py
@@ -27,8 +27,8 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import finn.custom_op.registry as registry
-from finn.util.fpgadataflow import is_fpgadataflow_node
 from finn.transformation.base import NodeLocalTransformation
+from finn.util.fpgadataflow import is_fpgadataflow_node
 
 
 class CompileCppSim(NodeLocalTransformation):
diff --git a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
index a221b510ab8d22f4daca1c32e717a9b482246712..03d7b73a567ef8e87890d4ecfdc697ab3c6120fd 100644
--- a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
+++ b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
@@ -27,22 +27,22 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
-from onnx import helper, TensorProto
 import numpy as np
 import warnings
+from onnx import TensorProto, helper
 
+import finn.core.data_layout as DataLayout
 from finn.core.datatype import DataType
-from finn.transformation.base import Transformation
 from finn.custom_op.registry import getCustomOp
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.general import SortGraph
-import finn.core.data_layout as DataLayout
-from finn.util.onnx import nchw_to_nhwc
-from finn.util.basic import get_by_name
+from finn.transformation.base import Transformation
 from finn.transformation.fpgadataflow.minimize_accumulator_width import (
     MinimizeAccumulatorWidth,
 )
+from finn.transformation.general import SortGraph
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
+from finn.util.basic import get_by_name
+from finn.util.onnx import nchw_to_nhwc
 
 
 class InferConvInpGen(Transformation):
@@ -64,30 +64,28 @@ class InferConvInpGen(Transformation):
                     warnings.warn("Input is not int. Can't infer ConvInpGen")
                     continue
                 i2c_inst = getCustomOp(n)
-                stride = i2c_inst.get_nodeattr("stride")
-                k_attr = i2c_inst.get_nodeattr("kernel_size")
-                k_h = k_attr[0]
-                k_w = k_attr[1]
+                stride_h, stride_w = i2c_inst.get_nodeattr("stride")
+                k_h, k_w = i2c_inst.get_nodeattr("kernel_size")
                 pad_attr = i2c_inst.get_nodeattr("pad_amount")
                 pad_h = pad_attr[0] + pad_attr[2]
                 pad_w = pad_attr[1] + pad_attr[3]
+                dilation_h, dilation_w = i2c_inst.get_nodeattr("dilations")
                 # temporary checks until non-square conv support is finalized
-                assert pad_h == pad_w, "Non-square images not yet supported."
-                assert k_h == k_w, "Non-square kernels not yet supported."
-                k = k_h
-                pad = pad_attr[0]
                 pad_val = i2c_inst.get_nodeattr("pad_value")
                 depthwise = i2c_inst.get_nodeattr("depthwise")
                 ifm_ch = i2c_in_shape[-1]
-                ifm_dim = i2c_in_shape[1]
-                ofm_dim = i2c_out_shape[1]
+                ifm_dim_h = i2c_in_shape[1]
+                ifm_dim_w = i2c_in_shape[2]
+                ofm_dim_h = i2c_out_shape[1]
+                ofm_dim_w = i2c_out_shape[2]
 
                 # default params for ConvolutionInputGenerator
                 ConvInpGen_node_idx = node_ind
                 ConvInpGen_input = i2c_input
-                ConvInpGen_idim = ifm_dim
+                ConvInpGen_idim_h = ifm_dim_h
+                ConvInpGen_idim_w = ifm_dim_w
 
-                if pad > 0:
+                if pad_h > 0 or pad_w > 0:
                     # if padding enabled, ensure pad_val supported by DataType
                     # assert dt.allowed(pad_val),"""FMPadding_Batch DataType
                     # must support pad_val"""
@@ -95,12 +93,13 @@ class InferConvInpGen(Transformation):
                         pad_val == 0
                     ), "FMPadding_Batch doesn't currently support pad_val!= 0"
 
-                    odim_padding = ifm_dim + 2 * pad
+                    odim_padding_h = ifm_dim_h + pad_h
+                    odim_padding_w = ifm_dim_w + pad_w
 
                     padding_out = helper.make_tensor_value_info(
                         model.make_new_valueinfo_name(),
                         TensorProto.FLOAT,
-                        (1, odim_padding, odim_padding, ifm_ch),
+                        (1, odim_padding_h, odim_padding_w, ifm_ch),
                     )
                     graph.value_info.append(padding_out)
                     padding_out = padding_out.name
@@ -108,7 +107,8 @@ class InferConvInpGen(Transformation):
 
                     ConvInpGen_node_idx += 1
                     ConvInpGen_input = padding_out
-                    ConvInpGen_idim = odim_padding
+                    ConvInpGen_idim_h = odim_padding_h
+                    ConvInpGen_idim_w = odim_padding_w
 
                     padding_node = helper.make_node(
                         "FMPadding_Batch",
@@ -116,15 +116,31 @@ class InferConvInpGen(Transformation):
                         [padding_out],
                         domain="finn.custom_op.fpgadataflow",
                         backend="fpgadataflow",
-                        ImgDim=ifm_dim,
-                        Padding=2 * pad,
+                        ImgDim=[ifm_dim_h, ifm_dim_w],
+                        Padding=pad_attr,
                         NumChannels=ifm_ch,
                         inputDataType=dt.name,
                         SIMD=ifm_ch,
                     )
                     graph.node.insert(node_ind, padding_node)
 
-                if stride > 1 and k == 1:
+                # Ensure that only supported HLS nodes are inserted
+                is_square_image = ConvInpGen_idim_h == ConvInpGen_idim_w
+                is_square_kernel = k_h == k_w
+                is_kernel_pointwise = k_h == 1 and k_w == 1
+                is_equal_stride = stride_h == stride_w
+                is_1d_convolution = (k_h == 1 and k_w > 1 and ifm_dim_h == 1) or (
+                    k_h > 1 and k_w == 1 and ifm_dim_w == 1
+                )
+
+                if (stride_h > 1 or stride_w > 1) and is_kernel_pointwise:
+                    assert (
+                        is_square_image
+                    ), "DownSampler currently only supports square input images."
+                    assert is_equal_stride, """DownSampler currently only supports equal stride value
+                        along different axes."""
+                    ConvInpGen_idim = ConvInpGen_idim_h
+                    stride = stride_h
                     # create DownSampler node
                     ConvInpGen_node = helper.make_node(
                         "DownSampler",
@@ -141,22 +157,58 @@ class InferConvInpGen(Transformation):
                     graph.node.insert(ConvInpGen_node_idx, ConvInpGen_node)
                 else:
                     # create equivalent ConvolutionInputGenerator node
-                    ConvInpGen_node = helper.make_node(
-                        "ConvolutionInputGenerator",
-                        [ConvInpGen_input],
-                        [i2c_output],
-                        domain="finn.custom_op.fpgadataflow",
-                        backend="fpgadataflow",
-                        ConvKernelDim=k,
-                        IFMChannels=ifm_ch,
-                        IFMDim=ConvInpGen_idim,
-                        OFMDim=ofm_dim,
-                        SIMD=ifm_ch,
-                        Stride=stride,
-                        inputDataType=dt.name,
-                        outputDataType=dt.name,
-                        depthwise=depthwise,
-                    )
+                    if (
+                        is_square_image and is_square_kernel
+                    ):  # square images and square kernels
+                        assert is_equal_stride, """Non-equal strides along different axes is not supported
+                            for (non-)square convolutions"""
+                        assert (
+                            dilation_h == 1 and dilation_w == 1
+                        ), """Dilation value != 1 is not supported
+                            for square convolutions"""
+                        ConvInpGen_node = helper.make_node(
+                            "ConvolutionInputGenerator",
+                            [ConvInpGen_input],
+                            [i2c_output],
+                            domain="finn.custom_op.fpgadataflow",
+                            backend="fpgadataflow",
+                            ConvKernelDim=[k_h, k_w],
+                            IFMChannels=ifm_ch,
+                            IFMDim=[ConvInpGen_idim_h, ConvInpGen_idim_w],
+                            OFMDim=[ofm_dim_h, ofm_dim_w],
+                            SIMD=ifm_ch,
+                            Stride=[stride_h, stride_w],
+                            Dilation=[dilation_h, dilation_w],
+                            inputDataType=dt.name,
+                            outputDataType=dt.name,
+                            depthwise=depthwise,
+                        )
+                    else:  # non-square images and/or kernels
+                        assert (
+                            is_1d_convolution
+                        ), "ConvultionInputGenerator1D works only for 1D convolutions"
+                        if dilation_h > 1 or dilation_w > 1:
+                            assert (
+                                stride_h == 1 and stride_w == 1
+                            ), """Stride value of greater than 1 is not supported for convolutions
+                                with dilation value greater than 1"""
+                        ConvInpGen_node = helper.make_node(
+                            "ConvolutionInputGenerator1D",
+                            [ConvInpGen_input],
+                            [i2c_output],
+                            domain="finn.custom_op.fpgadataflow",
+                            backend="fpgadataflow",
+                            ConvKernelDim=[k_h, k_w],
+                            IFMChannels=ifm_ch,
+                            IFMDim=[ConvInpGen_idim_h, ConvInpGen_idim_w],
+                            OFMDim=[ofm_dim_h, ofm_dim_w],
+                            SIMD=ifm_ch,
+                            Stride=[stride_h, stride_w],
+                            Dilation=[dilation_h, dilation_w],
+                            inputDataType=dt.name,
+                            outputDataType=dt.name,
+                            depthwise=depthwise,
+                        )
                     graph.node.insert(ConvInpGen_node_idx, ConvInpGen_node)
                 # remove old nodes
                 graph.node.remove(n)
@@ -338,7 +390,7 @@ class InferPool_Batch(Transformation):
                     [im2col_in],
                     [im2col_out],
                     domain="finn.custom_op.general",
-                    stride=stride,
+                    stride=[stride, stride],
                     kernel_size=[k, k],
                     pad_amount=[pad, pad, pad, pad],
                     pad_value=pad_value,
@@ -684,7 +736,7 @@ class InferVVAU(Transformation):
             ):
                 sparsity = model.get_tensor_sparsity(n.input[1])
                 try:
-                    k = sparsity["dw"]["kernel_shape"]
+                    k_h, k_w = sparsity["dw"]["kernel_shape"]
                 except KeyError:
                     raise Exception(
                         """Sparsity doesn't indicate that MatMul
@@ -702,25 +754,25 @@ class InferVVAU(Transformation):
                     mm_output = n.output[0]
                     W = model.get_initializer(mm_weight)
                     # infer dense weight tensor from sparse weight matrix
-                    # kernel size k which was extracted above and the value of
+                    # kernel size (k_h, k_w) which was extracted above and the value of
                     # the channels is used.
-                    # the weight matrix has a shape of (k * k * Channels, Channels)
+                    # the weight matrix has a shape of (k_h * k_w * Channels, Channels)
                     # we need to reverse the creation of the sparse weight matrix
-                    # to achieve a weight tensor of shape (Channels, 1, k, k)
+                    # to achieve a weight tensor of shape (Channels, 1, k_h, k_w)
                     channels = int(W.shape[1])
-                    # transpose to achieve a shape of (k * k * Channels, Channels)
+                    # transpose to achieve a shape of (k_h * k_w * Channels, Channels)
                     W = W.T
-                    # reshape to (Channels, k, k, Channels) to transpose afterwards
-                    # to (Channels, Channels, k, k)
-                    W = W.reshape(channels, k, k, channels)
+                    # reshape to (Channels, k_h, k_w, Channels) to transpose afterwards
+                    # to (Channels, Channels, k_h, k_w)
+                    W = W.reshape(channels, k_h, k_w, channels)
                     W = W.transpose(0, 3, 1, 2)
                     # now we can extract the values using a for loop over the channels
                     # and fill a zero numpy array in the correct shape
-                    w_tensor = np.zeros((channels, 1, k, k))
+                    w_tensor = np.zeros((channels, 1, k_h, k_w))
                     for ch in range(channels):
                         w_tensor[ch][0] = W[ch][ch]
                     model.set_initializer(mm_weight, w_tensor)
-                    model.set_tensor_shape(mm_weight, (channels, 1, k, k))
+                    model.set_tensor_shape(mm_weight, (channels, 1, k_h, k_w))
                     # create node with pe=channels as default
                     pe = channels
                     assert (
@@ -762,9 +814,9 @@ class InferVVAU(Transformation):
                             backend="fpgadataflow",
                             resType="lut",
                             PE=pe,
-                            Dim=mm_in_shape[1],
+                            Dim=[mm_in_shape[1], mm_in_shape[2]],
                             Channels=channels,
-                            Kernel=k,
+                            Kernel=[k_h, k_w],
                             inputDataType=idt.name,
                             weightDataType=wdt.name,
                             outputDataType=odt.name,
@@ -790,9 +842,9 @@ class InferVVAU(Transformation):
                             backend="fpgadataflow",
                             resType="lut",
                             PE=pe,
-                            Dim=mm_in_shape[1],
+                            Dim=[mm_in_shape[1], mm_in_shape[2]],
                             Channels=channels,
-                            Kernel=k,
+                            Kernel=[k_h, k_w],
                             inputDataType=idt.name,
                             weightDataType=wdt.name,
                             outputDataType=odt.name,
@@ -987,7 +1039,7 @@ class InferAddStreamsLayer(Transformation):
 
 
 class InferDuplicateStreamsLayer(Transformation):
-    """Insert a DuplicateStreams HLS layer for any tensor with fanout == 2 """
+    """Insert a DuplicateStreams HLS layer for any tensor with fanout == 2"""
 
     def apply(self, model):
         graph = model.graph
@@ -1345,7 +1397,11 @@ class InferGlobalAccPoolLayer(Transformation):
                 )
                 model.graph.value_info.append(mul_value)
                 model.set_initializer(mul_value.name, np.array(1 / (vecs[1] * vecs[2])))
-                new_mul = helper.make_node("Mul", [pool_out, mul_value.name], [result],)
+                new_mul = helper.make_node(
+                    "Mul",
+                    [pool_out, mul_value.name],
+                    [result],
+                )
                 graph.node.insert(insert_point, new_pool)
                 graph.node.insert(insert_point + 1, new_mul)
                 node_ind += 1
diff --git a/src/finn/transformation/fpgadataflow/create_dataflow_partition.py b/src/finn/transformation/fpgadataflow/create_dataflow_partition.py
index 419a6d8c494651862f55e63e6829a61fe8040599..0aba60f9b6f08210c40f305694495b77f517f323 100644
--- a/src/finn/transformation/fpgadataflow/create_dataflow_partition.py
+++ b/src/finn/transformation/fpgadataflow/create_dataflow_partition.py
@@ -27,8 +27,8 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import copy
-
 from onnx import helper
+
 from finn.custom_op.registry import getCustomOp
 from finn.transformation.base import Transformation
 from finn.util.basic import get_by_name, make_build_dir
diff --git a/src/finn/transformation/fpgadataflow/create_stitched_ip.py b/src/finn/transformation/fpgadataflow/create_stitched_ip.py
index 92f5f22201396eb0df15e2411b88642568f491c7..ea00f2cc6a215a22dcaa8736258c01aad7ce7e52 100644
--- a/src/finn/transformation/fpgadataflow/create_stitched_ip.py
+++ b/src/finn/transformation/fpgadataflow/create_stitched_ip.py
@@ -26,19 +26,19 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import json
+import multiprocessing as mp
 import os
-import warnings
 import subprocess
-import json
+import warnings
 
-from finn.transformation.base import Transformation
-from finn.util.basic import make_build_dir, get_num_default_workers
-from finn.util.fpgadataflow import is_fpgadataflow_node
 from finn.custom_op.registry import getCustomOp
-import multiprocessing as mp
+from finn.transformation.base import Transformation
 from finn.transformation.fpgadataflow.replace_verilog_relpaths import (
     ReplaceVerilogRelPaths,
 )
+from finn.util.basic import get_num_default_workers, make_build_dir
+from finn.util.fpgadataflow import is_fpgadataflow_node
 
 
 def is_external_input(model, node, i):
diff --git a/src/finn/transformation/fpgadataflow/floorplan.py b/src/finn/transformation/fpgadataflow/floorplan.py
index c6bedd466e31efb622640cbd203d344ff9b3d88f..2bda7883130d0863b7f67943d19caa00b7290de5 100644
--- a/src/finn/transformation/fpgadataflow/floorplan.py
+++ b/src/finn/transformation/fpgadataflow/floorplan.py
@@ -26,14 +26,14 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import json
+import warnings
+
+from finn.analysis.fpgadataflow.floorplan_params import floorplan_params
 from finn.custom_op.registry import getCustomOp
 from finn.transformation.base import Transformation
-from finn.util.basic import get_by_name
-from finn.analysis.fpgadataflow.floorplan_params import floorplan_params
-from finn.util.basic import make_build_dir
 from finn.transformation.general import ApplyConfig
-import warnings
-import json
+from finn.util.basic import get_by_name, make_build_dir
 
 
 class Floorplan(Transformation):
@@ -58,16 +58,21 @@ class Floorplan(Transformation):
 
         # read in a user-specified floorplan or generate a default one
         if self.user_floorplan is None:
-            floorplan = model.analysis(floorplan_params)
+            self.user_floorplan = model.analysis(floorplan_params)
             json_dir = make_build_dir(prefix="vitis_floorplan_")
             json_file = json_dir + "/floorplan.json"
             model.set_metadata_prop("floorplan_json", json_file)
             with open(json_file, "w") as f:
-                json.dump(floorplan, f, indent=4)
+                json.dump(self.user_floorplan, f, indent=4)
         else:
             model.set_metadata_prop("floorplan_json", self.user_floorplan)
             model = model.transform(ApplyConfig(self.user_floorplan))
 
+        try:
+            default_slr = self.user_floorplan["Defaults"]["slr"][0]
+        except Exception:
+            default_slr = -1
+
         # perform DWC and FIFO specific adjustments
         unassigned_nodes = 0
         for node in model.graph.node:
@@ -75,6 +80,7 @@ class Floorplan(Transformation):
             node_slr = node_inst.get_nodeattr("slr")
             if node_slr == -1:
                 unassigned_nodes += 1
+                node_inst.set_nodeattr("slr", default_slr)
             if node.op_type == "StreamingDataWidthConverter_Batch":
                 # if we have SLR assignment already. use that
                 if node_slr != -1:
@@ -100,8 +106,9 @@ class Floorplan(Transformation):
         if unassigned_nodes > 0:
             warnings.warn(
                 str(unassigned_nodes)
-                + " nodes have no entry in the provided floorplan "
-                + "and no default value was set"
+                + " nodes have no entry in the provided floorplan,"
+                + " SLR was set to "
+                + str(default_slr)
             )
 
         # partition id generation
diff --git a/src/finn/transformation/fpgadataflow/hlssynth_ip.py b/src/finn/transformation/fpgadataflow/hlssynth_ip.py
index bbd012a715e49b61c19daad65f8de889112f92a7..2a7d9e9066836ea0d4af004f01d88953e4adaeb7 100644
--- a/src/finn/transformation/fpgadataflow/hlssynth_ip.py
+++ b/src/finn/transformation/fpgadataflow/hlssynth_ip.py
@@ -27,10 +27,11 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import os
+import warnings
+
 import finn.custom_op.registry as registry
-from finn.util.fpgadataflow import is_fpgadataflow_node
 from finn.transformation.base import NodeLocalTransformation
-import warnings
+from finn.util.fpgadataflow import is_fpgadataflow_node
 
 
 class HLSSynthIP(NodeLocalTransformation):
diff --git a/src/finn/transformation/fpgadataflow/insert_dwc.py b/src/finn/transformation/fpgadataflow/insert_dwc.py
index c8df80659d30e1855fc658bad83c3fe9bccb9bf9..58efe65eb5f9d96d74cdf40672703fabe76afb0d 100644
--- a/src/finn/transformation/fpgadataflow/insert_dwc.py
+++ b/src/finn/transformation/fpgadataflow/insert_dwc.py
@@ -1,10 +1,10 @@
+import warnings
 from onnx import TensorProto
 from onnx import helper as oh
 
 from finn.custom_op.registry import getCustomOp
 from finn.transformation.base import Transformation
 from finn.util.fpgadataflow import is_fpgadataflow_node
-import warnings
 
 
 def _is_dwc_node(node):
diff --git a/src/finn/transformation/fpgadataflow/insert_fifo.py b/src/finn/transformation/fpgadataflow/insert_fifo.py
index c0ac1319dd520794afd66f187b35e529739e5cd7..ef56db6376703ce1eb0134c173de61a562bca6e6 100644
--- a/src/finn/transformation/fpgadataflow/insert_fifo.py
+++ b/src/finn/transformation/fpgadataflow/insert_fifo.py
@@ -1,11 +1,11 @@
+import numpy as np
+import warnings
 from onnx import TensorProto
 from onnx import helper as oh
 
 from finn.custom_op.registry import getCustomOp
 from finn.transformation.base import Transformation
 from finn.util.fpgadataflow import is_fpgadataflow_node
-import warnings
-import numpy as np
 
 
 def _is_fifo_node(node):
@@ -29,11 +29,9 @@ def _suitable_node(node):
 
 
 def _suitable_folded_shapes(ishape, oshape):
-    i_dummy = np.random.rand(*ishape)
-    o_dummy = np.random.rand(*oshape)
-    ishape_canonical = np.squeeze(i_dummy).shape
-    oshape_canonical = np.squeeze(o_dummy).shape
-    return ishape_canonical == oshape_canonical
+    matching_stream_width = ishape[-1] == oshape[-1]
+    matching_size = np.prod(ishape) == np.prod(oshape)
+    return matching_stream_width and matching_size
 
 
 class InsertFIFO(Transformation):
diff --git a/src/finn/transformation/fpgadataflow/insert_iodma.py b/src/finn/transformation/fpgadataflow/insert_iodma.py
index 27055a4fd29dba3849c0e4a889f27802f8c36081..d4b2a1032aeb305c85ffb535ac821692ce747c18 100644
--- a/src/finn/transformation/fpgadataflow/insert_iodma.py
+++ b/src/finn/transformation/fpgadataflow/insert_iodma.py
@@ -26,15 +26,15 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import math
+import numpy as np
 from onnx import TensorProto
 from onnx import helper as oh
 
-from finn.util.basic import get_by_name
 from finn.custom_op.registry import getCustomOp
 from finn.transformation.base import Transformation
 from finn.transformation.general import SortGraph
-import math
-import numpy as np
+from finn.util.basic import get_by_name
 
 
 class InsertIODMA(Transformation):
diff --git a/src/finn/transformation/fpgadataflow/insert_tlastmarker.py b/src/finn/transformation/fpgadataflow/insert_tlastmarker.py
index 3ce9824b14a54f502c90650e7b3b75e9cdaab77f..34cb61346dcd5bcd6f41a4272748764cf385a524 100644
--- a/src/finn/transformation/fpgadataflow/insert_tlastmarker.py
+++ b/src/finn/transformation/fpgadataflow/insert_tlastmarker.py
@@ -26,6 +26,7 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import numpy as np
 from onnx import TensorProto
 from onnx import helper as oh
 
@@ -33,8 +34,6 @@ from finn.custom_op.registry import getCustomOp
 from finn.transformation.base import Transformation
 from finn.util.basic import get_by_name
 
-import numpy as np
-
 
 class InsertTLastMarker(Transformation):
     """Ensure that the graph is started/terminated with a TLastMarker node, inserting
diff --git a/src/finn/transformation/fpgadataflow/make_deployment.py b/src/finn/transformation/fpgadataflow/make_deployment.py
index 6d37f567c9a20cf692df126c1c3560324b61d06d..d43d81716ac7a8b097fc7ec9e38bf5bcb954c7fb 100644
--- a/src/finn/transformation/fpgadataflow/make_deployment.py
+++ b/src/finn/transformation/fpgadataflow/make_deployment.py
@@ -26,15 +26,14 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import warnings
 import os
 import subprocess
 from distutils.dir_util import copy_tree
 from shutil import copy
 
+import finn.transformation.fpgadataflow.templates as templates
 from finn.transformation.base import Transformation
 from finn.util.basic import make_build_dir
-import finn.transformation.fpgadataflow.templates as templates
 
 
 class DeployToPYNQ(Transformation):
@@ -98,22 +97,17 @@ class DeployToPYNQ(Transformation):
         copy_tree(pynq_driver_dir, deployment_dir)
         model.set_metadata_prop("pynq_deploy_dir", deployment_dir)
         model.set_metadata_prop("exec_mode", "remote_pynq")
-        if self.password == "":
-            prefix = ""  # assume we are using an ssh key
-            warnings.warn("Empty password, make sure you've set up an ssh key")
-        else:
-            prefix = "sshpass -p %s " % self.password
 
         # create target directory on PYNQ board
-        cmd = prefix + 'ssh {}@{} -p {} "mkdir -p {}"'.format(
+        cmd = 'ssh {}@{} -p {} "mkdir -p {}"'.format(
             self.username, self.ip, self.port, self.target_dir
         )
         bash_command = ["/bin/bash", "-c", cmd]
         process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE)
         process_compile.communicate()
-        # copy directory to PYNQ board using scp and sshpass
-        cmd = prefix + "scp -P{} -r {} {}@{}:{}".format(
-            self.port, deployment_dir, self.username, self.ip, self.target_dir,
+        # copy directory to PYNQ board using scp
+        cmd = "scp -P{} -r {} {}@{}:{}".format(
+            self.port, deployment_dir, self.username, self.ip, self.target_dir
         )
         bash_command = ["/bin/bash", "-c", cmd]
         process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE)
diff --git a/src/finn/transformation/fpgadataflow/make_pynq_driver.py b/src/finn/transformation/fpgadataflow/make_pynq_driver.py
index 6ab12548abbcbe00496101bd146b2c9b873204c8..be2176a34763fdb5521a0acdfc3137fb4b4a766e 100644
--- a/src/finn/transformation/fpgadataflow/make_pynq_driver.py
+++ b/src/finn/transformation/fpgadataflow/make_pynq_driver.py
@@ -27,24 +27,29 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
-import shutil
-from finn.transformation.base import Transformation
-from finn.util.basic import gen_finn_dt_tensor, make_build_dir
-import finn.util.data_packing as dpk
-import finn.core.datatype as dtp
-from finn.custom_op.registry import getCustomOp
-import os
-import warnings
 import pkg_resources as pk
-from . import template_driver
-from finn.core.modelwrapper import ModelWrapper
+
 import numpy as np
+import os
+import shutil
+import warnings
 
+import finn.core.datatype as dtp
+import finn.util.data_packing as dpk
+from finn.core.modelwrapper import ModelWrapper
+from finn.custom_op.registry import getCustomOp
+from finn.transformation.base import Transformation
+from finn.util.basic import (
+    gen_finn_dt_tensor,
+    make_build_dir,
+    roundup_to_integer_multiple,
+)
 from finn.util.data_packing import (
-    pack_innermost_dim_as_hex_string,
     hexstring2npbytearray,
+    pack_innermost_dim_as_hex_string,
 )
-from finn.util.basic import roundup_to_integer_multiple
+
+from . import template_driver
 
 
 def to_external_tensor(init, w_dtype):
diff --git a/src/finn/transformation/fpgadataflow/make_zynq_proj.py b/src/finn/transformation/fpgadataflow/make_zynq_proj.py
index 59df07e8e578ad0903b9742a5a9e5ad6ef288f91..dbcca1a23051fc3f62f9b402e774c7de9dd0112b 100644
--- a/src/finn/transformation/fpgadataflow/make_zynq_proj.py
+++ b/src/finn/transformation/fpgadataflow/make_zynq_proj.py
@@ -28,27 +28,25 @@
 
 import os
 import subprocess
+from shutil import copy
 
+from finn.core.modelwrapper import ModelWrapper
 from finn.custom_op.registry import getCustomOp
 from finn.transformation.base import Transformation
-from finn.core.modelwrapper import ModelWrapper
-from finn.util.basic import get_by_name, make_build_dir
-from finn.util.basic import pynq_part_map
-
 from finn.transformation.fpgadataflow.create_dataflow_partition import (
     CreateDataflowPartition,
 )
+from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
+from finn.transformation.fpgadataflow.floorplan import Floorplan
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.insert_dwc import InsertDWC
 from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
 from finn.transformation.fpgadataflow.insert_iodma import InsertIODMA
+from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
-from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
-from finn.transformation.fpgadataflow.floorplan import Floorplan
 from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
 from finn.transformation.infer_data_layouts import InferDataLayouts
-from shutil import copy
-from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver
+from finn.util.basic import get_by_name, make_build_dir, pynq_part_map
 
 from . import templates
 
@@ -265,7 +263,10 @@ class MakeZYNQProject(Transformation):
             vivado_pynq_proj_dir + "/finn_zynq_link.runs/impl_1/top_wrapper.bit"
         )
         if not os.path.isfile(bitfile_name):
-            raise Exception("Synthesis failed, no bitfile found")
+            raise Exception(
+                "Synthesis failed, no bitfile found. Check logs under %s"
+                % vivado_pynq_proj_dir
+            )
         deploy_bitfile_name = vivado_pynq_proj_dir + "/resizer.bit"
         copy(bitfile_name, deploy_bitfile_name)
         # set bitfile attribute
@@ -280,8 +281,11 @@ class MakeZYNQProject(Transformation):
         for hwh_name_cand in hwh_name_alts:
             if os.path.isfile(hwh_name_cand):
                 hwh_name = hwh_name_cand
-        if hwh_name is None:
-            raise Exception("Synthesis failed, no hardware handoff file found")
+        if not os.path.isfile(hwh_name):
+            raise Exception(
+                "Synthesis failed, no bitfile found. Check logs under %s"
+                % vivado_pynq_proj_dir
+            )
         deploy_hwh_name = vivado_pynq_proj_dir + "/resizer.hwh"
         copy(hwh_name, deploy_hwh_name)
         model.set_metadata_prop("hw_handoff", deploy_hwh_name)
diff --git a/src/finn/transformation/fpgadataflow/prepare_cppsim.py b/src/finn/transformation/fpgadataflow/prepare_cppsim.py
index 653ec02ff306bf35d5fd3f7265404e61641077ac..8b332972cac6bf001490c0c2396174be175d6d33 100644
--- a/src/finn/transformation/fpgadataflow/prepare_cppsim.py
+++ b/src/finn/transformation/fpgadataflow/prepare_cppsim.py
@@ -26,15 +26,14 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import copy
+import multiprocessing as mp
 import os
 
 import finn.custom_op.registry as registry
-from finn.util.basic import make_build_dir
-from finn.util.fpgadataflow import is_fpgadataflow_node
 from finn.transformation.base import Transformation
-from finn.util.basic import get_num_default_workers
-import multiprocessing as mp
-import copy
+from finn.util.basic import get_num_default_workers, make_build_dir
+from finn.util.fpgadataflow import is_fpgadataflow_node
 
 
 def _codegen_single_node(node, model):
diff --git a/src/finn/transformation/fpgadataflow/prepare_ip.py b/src/finn/transformation/fpgadataflow/prepare_ip.py
index 4ed5e80aa7baa585f83314ec42233d5885dff32d..4fdcf3939fe6d879abe36907a1bf84a417cb9903 100644
--- a/src/finn/transformation/fpgadataflow/prepare_ip.py
+++ b/src/finn/transformation/fpgadataflow/prepare_ip.py
@@ -27,11 +27,12 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import os
+import warnings
+
 import finn.custom_op.registry as registry
 from finn.transformation.base import Transformation
 from finn.util.basic import make_build_dir
 from finn.util.fpgadataflow import is_fpgadataflow_node
-import warnings
 
 
 def _codegen_single_node(node, model, fpgapart, clk):
diff --git a/src/finn/transformation/fpgadataflow/prepare_rtlsim.py b/src/finn/transformation/fpgadataflow/prepare_rtlsim.py
index eaa85b9102b55bf8ecdf3a9f284f87468581e113..66799ff4297ad0e2f8afa9261b0f3f983b27452d 100644
--- a/src/finn/transformation/fpgadataflow/prepare_rtlsim.py
+++ b/src/finn/transformation/fpgadataflow/prepare_rtlsim.py
@@ -27,11 +27,11 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import finn.custom_op.registry as registry
-from finn.util.fpgadataflow import is_fpgadataflow_node
+from finn.transformation.base import NodeLocalTransformation
 from finn.transformation.fpgadataflow.replace_verilog_relpaths import (
     ReplaceVerilogRelPaths,
 )
-from finn.transformation.base import NodeLocalTransformation
+from finn.util.fpgadataflow import is_fpgadataflow_node
 
 try:
     from pyverilator import PyVerilator
diff --git a/src/finn/transformation/fpgadataflow/replace_verilog_relpaths.py b/src/finn/transformation/fpgadataflow/replace_verilog_relpaths.py
index cc7c305b3ec94482e64235a1b1cf4eee543c46e1..7850d37423a9add0880e054c7b035b9e735c7f25 100644
--- a/src/finn/transformation/fpgadataflow/replace_verilog_relpaths.py
+++ b/src/finn/transformation/fpgadataflow/replace_verilog_relpaths.py
@@ -29,8 +29,8 @@
 import os
 
 import finn.custom_op.registry as registry
-from finn.util.fpgadataflow import is_fpgadataflow_node
 from finn.transformation.base import Transformation
+from finn.util.fpgadataflow import is_fpgadataflow_node
 
 
 class ReplaceVerilogRelPaths(Transformation):
diff --git a/src/finn/transformation/fpgadataflow/set_exec_mode.py b/src/finn/transformation/fpgadataflow/set_exec_mode.py
index 4677e59f7b35fec38aeaae65485ed16ba1e18f06..caf891bc4444a65976103746685b2e79abdd708f 100644
--- a/src/finn/transformation/fpgadataflow/set_exec_mode.py
+++ b/src/finn/transformation/fpgadataflow/set_exec_mode.py
@@ -27,8 +27,8 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import finn.custom_op.registry as registry
-from finn.util.fpgadataflow import is_fpgadataflow_node
 from finn.transformation.base import Transformation
+from finn.util.fpgadataflow import is_fpgadataflow_node
 
 
 class SetExecMode(Transformation):
diff --git a/src/finn/transformation/fpgadataflow/set_fifo_depths.py b/src/finn/transformation/fpgadataflow/set_fifo_depths.py
index 3199d8e7fa7024f2506b4abfa7d6ce0630f508dc..c06c34574aa22a23d1307232b0fd8e65224f1983 100644
--- a/src/finn/transformation/fpgadataflow/set_fifo_depths.py
+++ b/src/finn/transformation/fpgadataflow/set_fifo_depths.py
@@ -29,18 +29,19 @@
 import math
 import numpy as np
 import warnings
+
+from finn.analysis.fpgadataflow.dataflow_performance import dataflow_performance
 from finn.custom_op.registry import getCustomOp
 from finn.transformation.base import Transformation
 from finn.transformation.fpgadataflow.annotate_cycles import AnnotateCycles
-from finn.analysis.fpgadataflow.dataflow_performance import dataflow_performance
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.insert_dwc import InsertDWC
 from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
-from finn.transformation.general import GiveUniqueNodeNames, GiveReadableTensorNames
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
 from finn.util.fpgadataflow import is_fpgadataflow_node
-from finn.util.pyverilator import reset_rtlsim, toggle_clk, pyverilate_stitched_ip
+from finn.util.pyverilator import pyverilate_stitched_ip, reset_rtlsim, toggle_clk
 
 
 def reset_implementation(node):
diff --git a/src/finn/transformation/fpgadataflow/set_folding.py b/src/finn/transformation/fpgadataflow/set_folding.py
index bb4e0e1db51d331400e7a294890eb998c2aa4e1d..914dda9554395fc89cac8692e13339ae3ce9baf7 100644
--- a/src/finn/transformation/fpgadataflow/set_folding.py
+++ b/src/finn/transformation/fpgadataflow/set_folding.py
@@ -26,13 +26,14 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import warnings
+
+from finn.analysis.fpgadataflow.dataflow_performance import dataflow_performance
 from finn.custom_op.registry import getCustomOp
 from finn.transformation.base import Transformation
-from finn.util.fpgadataflow import is_fpgadataflow_node
-from finn.analysis.fpgadataflow.dataflow_performance import dataflow_performance
 from finn.transformation.fpgadataflow.annotate_cycles import AnnotateCycles
 from finn.transformation.general import GiveUniqueNodeNames
-import warnings
+from finn.util.fpgadataflow import is_fpgadataflow_node
 
 
 def divisors(num):
diff --git a/src/finn/transformation/fpgadataflow/synth_ooc.py b/src/finn/transformation/fpgadataflow/synth_ooc.py
index acc20e4ad0d0f4a17b20fd77625e9954f09e69aa..49cd6c82bca9ff7578314180c1ba433d63a32087 100644
--- a/src/finn/transformation/fpgadataflow/synth_ooc.py
+++ b/src/finn/transformation/fpgadataflow/synth_ooc.py
@@ -30,8 +30,8 @@ import os
 from shutil import copy2
 
 from finn.transformation.base import Transformation
-from finn.util.vivado import out_of_context_synth
 from finn.util.basic import make_build_dir
+from finn.util.vivado import out_of_context_synth
 
 
 class SynthOutOfContext(Transformation):
@@ -52,10 +52,11 @@ class SynthOutOfContext(Transformation):
         top_module_name = model.get_metadata_prop("wrapper_filename")
         top_module_name = file_to_basename(top_module_name).strip(".v")
         build_dir = make_build_dir("synth_out_of_context_")
+        verilog_extensions = [".v", ".vh"]
         with open(vivado_stitch_proj_dir + "/all_verilog_srcs.txt", "r") as f:
             all_verilog_srcs = f.read().split()
         for file in all_verilog_srcs:
-            if file.endswith(".v"):
+            if any([file.endswith(x) for x in verilog_extensions]):
                 copy2(file, build_dir)
         ret = out_of_context_synth(
             build_dir, top_module_name, self.part, self.clk_name, self.clk_period_ns
diff --git a/src/finn/transformation/fpgadataflow/templates.py b/src/finn/transformation/fpgadataflow/templates.py
index 9c0169a98f515d0b32e10bdfc834eca5fb681ffd..ae13f6e4e464fea8884f89e7f071e53e28a5c623 100644
--- a/src/finn/transformation/fpgadataflow/templates.py
+++ b/src/finn/transformation/fpgadataflow/templates.py
@@ -103,8 +103,8 @@ create_project finn_zynq_link ./ -part $FPGA_PART
 # set board part repo paths to find PYNQ-Z1/Z2
 set paths_prop [get_property BOARD_PART_REPO_PATHS [current_project]]
 set paths_param [get_param board.repoPaths]
-lappend paths_prop /workspace/finn/board_files
-lappend paths_param /workspace/finn/board_files
+lappend paths_prop /workspace/board_files
+lappend paths_param /workspace/board_files
 set_property BOARD_PART_REPO_PATHS $paths_prop [current_project]
 set_param board.repoPaths $paths_param
 
@@ -115,7 +115,7 @@ if {$BOARD == "ZCU104"} {
     set_property board_part xilinx.com:zcu102:part0:3.3 [current_project]
     set ZYNQ_TYPE "zynq_us+"
 } elseif {$BOARD == "Ultra96"} {
-    set_property board_part em.avnet.com:ultra96v1:part0:1.2 [current_project]
+    set_property board_part avnet.com:ultra96v1:part0:1.2 [current_project]
     set ZYNQ_TYPE "zynq_us+"
 } elseif {$BOARD == "Pynq-Z2"} {
     set ZYNQ_TYPE "zynq_7000"
diff --git a/src/finn/transformation/fpgadataflow/vitis_build.py b/src/finn/transformation/fpgadataflow/vitis_build.py
index 0fe4276096852c08d0798be8e1ee715cc5769286..502b6f2bffd0d64980ae911d28b845ad90633a44 100644
--- a/src/finn/transformation/fpgadataflow/vitis_build.py
+++ b/src/finn/transformation/fpgadataflow/vitis_build.py
@@ -26,34 +26,34 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import json
 import os
 import subprocess
-import json
+from enum import Enum
 
 from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.base import Transformation
 from finn.custom_op.registry import getCustomOp
-
+from finn.transformation.base import Transformation
 from finn.transformation.fpgadataflow.create_dataflow_partition import (
     CreateDataflowPartition,
 )
+from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
+from finn.transformation.fpgadataflow.floorplan import Floorplan
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.insert_dwc import InsertDWC
 from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
 from finn.transformation.fpgadataflow.insert_iodma import InsertIODMA
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
-from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
-from finn.transformation.fpgadataflow.floorplan import Floorplan
 from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.general import (
     GiveReadableTensorNames,
     GiveUniqueNodeNames,
     RemoveUnusedTensors,
 )
-from finn.util.basic import make_build_dir
 from finn.transformation.infer_data_layouts import InferDataLayouts
+from finn.util.basic import make_build_dir
+
 from . import templates
-from enum import Enum
 
 
 def _check_vitis_envvars():
@@ -116,7 +116,7 @@ class CreateVitisXO(Transformation):
                 )
                 arg_id += 1
                 args_string.append(
-                    "{numReps:0:%s:%s:0x4:0x1C:uint:0}" 
+                    "{numReps:0:%s:%s:0x4:0x1C:uint:0}"
                     % (str(arg_id), axilite_intf_name)
                 )
                 arg_id += 1
@@ -207,8 +207,6 @@ class VitisLink(Transformation):
             # has axis, aximm and axilite
             # everything else is axis-only
             # assume only one connection from each ip to the next
-            # all aximm allocated to DDR[0]
-            # all kernels allocated to SLR0
             producer = model.find_producer(node.input[0])
             consumer = model.find_consumers(node.output[0])
             # define kernel instances
@@ -225,12 +223,36 @@ class VitisLink(Transformation):
             else:
                 instance_names[node.name] = node.name
                 config.append("nk=%s:1:%s" % (node.name, instance_names[node.name]))
-            # assign SLRs
-            config.append("slr=%s:SLR0" % instance_names[node.name])
+            # explicitly assign SLRs if the slr attribute is not -1
+            node_slr = sdp_node.get_nodeattr("slr")
+            if node_slr != -1:
+                config.append("slr=%s:SLR%d" % (instance_names[node.name], node_slr))
             # assign memory banks
             if producer is None or consumer is None:
+                node_mem_port = sdp_node.get_nodeattr("mem_port")
+                if node_mem_port == "":
+                    # configure good defaults based on board
+                    if "u50" in self.platform or "u280" in self.platform:
+                        # Use HBM where available (also U50 does not have DDR)
+                        mem_type = "HBM"
+                        mem_idx = 0
+                    elif "u200" in self.platform:
+                        # Use DDR controller in static region of U200
+                        mem_type = "DDR"
+                        mem_idx = 1
+                    elif "u250" in self.platform:
+                        # Use DDR controller on the node's SLR if set, otherwise 0
+                        mem_type = "DDR"
+                        if node_slr == -1:
+                            mem_idx = 0
+                        else:
+                            mem_idx = node_slr
+                    else:
+                        mem_type = "DDR"
+                        mem_idx = 1
+                    node_mem_port = "%s[%d]" % (mem_type, mem_idx)
                 config.append(
-                    "sp=%s.m_axi_gmem0:DDR[%d]" % (instance_names[node.name], 0)
+                    "sp=%s.m_axi_gmem0:%s" % (instance_names[node.name], node_mem_port)
                 )
             # connect streams
             if producer is not None:
@@ -340,7 +362,8 @@ class VitisBuild(Transformation):
     floorplan_file: path to a JSON containing a dictionary with SLR assignments
                     for each node in the ONNX graph. Must be parse-able by
                     the ApplyConfig transform.
-
+    enable_link: enable linking kernels (.xo files), otherwise just synthesize
+                    them independently.
     """
 
     def __init__(
@@ -351,6 +374,7 @@ class VitisBuild(Transformation):
         strategy=VitisOptStrategy.PERFORMANCE,
         enable_debug=False,
         floorplan_file=None,
+        enable_link=True,
     ):
         super().__init__()
         self.fpga_part = fpga_part
@@ -359,16 +383,14 @@ class VitisBuild(Transformation):
         self.strategy = strategy
         self.enable_debug = enable_debug
         self.floorplan_file = floorplan_file
+        self.enable_link = enable_link
 
     def apply(self, model):
         _check_vitis_envvars()
         # first infer layouts
         model = model.transform(InferDataLayouts())
         # prepare at global level, then break up into kernels
-        prep_transforms = [
-            InsertIODMA(512),
-            InsertDWC(),
-        ]
+        prep_transforms = [InsertIODMA(512), InsertDWC()]
         for trn in prep_transforms:
             model = model.transform(trn)
             model = model.transform(GiveUniqueNodeNames())
@@ -405,17 +427,18 @@ class VitisBuild(Transformation):
             kernel_model.set_metadata_prop("platform", "alveo")
             kernel_model.save(dataflow_model_filename)
         # Assemble design from kernels
-        model = model.transform(
-            VitisLink(
-                self.platform,
-                round(1000 / self.period_ns),
-                strategy=self.strategy,
-                enable_debug=self.enable_debug,
+        if self.enable_link:
+            model = model.transform(
+                VitisLink(
+                    self.platform,
+                    round(1000 / self.period_ns),
+                    strategy=self.strategy,
+                    enable_debug=self.enable_debug,
+                )
             )
-        )
         # set platform attribute for correct remote execution
         model.set_metadata_prop("platform", "alveo")
 
-        #create driver
+        # create driver
         model = model.transform(MakePYNQDriver(platform="alveo"))
         return (model, False)
diff --git a/src/finn/transformation/move_reshape.py b/src/finn/transformation/move_reshape.py
index cb8deaeec4b79d3c47d7705ff8f9bf72a085dfc0..6c9a2973376be2c4744bc23db2cc975be8e7d52a 100644
--- a/src/finn/transformation/move_reshape.py
+++ b/src/finn/transformation/move_reshape.py
@@ -1,3 +1,6 @@
+import warnings
+
+from finn.custom_op.registry import getCustomOp
 from finn.transformation.base import Transformation
 from finn.util.basic import get_by_name, is_finn_op
 
@@ -18,33 +21,66 @@ def _is_fpgadataflow_node(node):
 
 
 class RemoveCNVtoFCFlatten(Transformation):
-    """Removes a node that implements a (1, -1) reshape if it is
-    between two fpgadataflow nodes"""
+    """Removes a flatten node if it is between two fpgadataflow nodes.
+    For an NHWC-Conv to FC transition, the preceding transpose is absorbed.
+    The flatten operation can also be implemented by a reshape node."""
 
     def apply(self, model):
-
         graph = model.graph
         graph_modified = False
         for n in graph.node:
-            if n.op_type == "Reshape":
-                shape = model.get_initializer(n.input[1])
-                if (shape == [1, -1]).all():
+            # also support implicit flatten via reshape, e.g. reshape(1,-1)
+            if n.op_type == "Flatten" or n.op_type == "Reshape":
+                ishape = model.get_tensor_shape(n.input[0])
+                oshape = model.get_tensor_shape(n.output[0])
+                if len(oshape) == 2 and ishape[0] == oshape[0]:
                     producer = model.find_producer(n.input[0])
                     if _is_fpgadataflow_node(producer) is True:
+                        # standalone flatten, remove
                         consumer = model.find_consumer(n.output[0])
                         if _is_fpgadataflow_node(consumer) is True:
                             graph_modified = True
                             consumer.input[0] = n.input[0]
                             graph.node.remove(n)
                     elif producer.op_type == "Transpose":
+                        # transpose + flatten, absorb into following node
                         transp_node = producer
-                        producer = model.find_producer(transp_node.input[0])
-                        if _is_fpgadataflow_node(producer) is True:
-                            consumer = model.find_consumer(n.output[0])
-                            if _is_fpgadataflow_node(consumer) is True:
-                                graph_modified = True
-                                consumer.input[0] = transp_node.input[0]
-                                graph.node.remove(n)
-                                graph.node.remove(transp_node)
+                        # check if transpose converts NHWC to NCHW
+                        perms = list(get_by_name(transp_node.attribute, "perm").ints)
+                        if perms == [0, 3, 1, 2]:
+                            producer = model.find_producer(transp_node.input[0])
+                            if _is_fpgadataflow_node(producer) is True:
+                                consumer = model.find_consumer(n.output[0])
+                                if consumer.op_type == "StreamingFCLayer_Batch":
+                                    fc_inst = getCustomOp(consumer)
+                                    mw = fc_inst.get_nodeattr("MW")
+                                    mh = fc_inst.get_nodeattr("MH")
+                                    (b, h, w, c) = model.get_tensor_shape(
+                                        transp_node.input[0]
+                                    )
+                                    # absorb transpose into weight matrix,
+                                    # allowing FC layer to operate on the NHWC input
+                                    W = model.get_initializer(consumer.input[1])
+                                    assert (
+                                        W is not None
+                                    ), "Initializer for matmul weights is not set."
+                                    W_new = W.reshape(c, h, w, mh)
+                                    W_new = W_new.transpose((1, 2, 0, 3))
+                                    W_new = W_new.reshape(mw, mh)
+                                    model.set_initializer(consumer.input[1], W_new)
+                                    # remove transpose & flatten nodes
+                                    consumer.input[0] = transp_node.input[0]
+                                    graph.node.remove(n)
+                                    graph.node.remove(transp_node)
+                                    graph_modified = True
+                                else:
+                                    warnings.warn(
+                                        "Could not absorb transpose->flatten \
+                                        into subsequent node"
+                                    )
+                        else:
+                            warnings.warn(
+                                "Unsupported transpose node before flatten layer"
+                            )
 
         return (model, graph_modified)
diff --git a/src/finn/transformation/streamline/__init__.py b/src/finn/transformation/streamline/__init__.py
index 876f8892dbc9c42189ee8dc06ff5eb407f7a0946..ea547571677a9d90a226b55de8582145b8c298f4 100644
--- a/src/finn/transformation/streamline/__init__.py
+++ b/src/finn/transformation/streamline/__init__.py
@@ -31,41 +31,38 @@ from pkgutil import extend_path
 __path__ = extend_path(__path__, __name__)
 
 from finn.transformation.base import Transformation
-from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.batchnorm_to_affine import BatchNormToAffine
 from finn.transformation.general import (
-    ConvertSubToAdd,
     ConvertDivToMul,
+    ConvertSubToAdd,
     GiveReadableTensorNames,
     GiveUniqueNodeNames,
 )
-
+from finn.transformation.infer_datatypes import InferDataTypes
 from finn.transformation.streamline.absorb import (
+    Absorb1BitMulIntoConv,
+    Absorb1BitMulIntoMatMul,
     AbsorbAddIntoMultiThreshold,
     AbsorbMulIntoMultiThreshold,
-    FactorOutMulSignMagnitude,
-    Absorb1BitMulIntoMatMul,
-    Absorb1BitMulIntoConv,
     AbsorbSignBiasIntoMultiThreshold,
+    FactorOutMulSignMagnitude,
 )
-
 from finn.transformation.streamline.collapse_repeated import (
     CollapseRepeatedAdd,
     CollapseRepeatedMul,
 )
-
+from finn.transformation.streamline.remove import RemoveIdentityOps
 from finn.transformation.streamline.reorder import (
+    MoveAddPastConv,
     MoveAddPastMul,
-    MoveScalarMulPastMatMul,
+    MoveMulPastMaxPool,
     MoveScalarAddPastMatMul,
-    MoveAddPastConv,
+    MoveScalarLinearPastInvariants,
     MoveScalarMulPastConv,
-    MoveMulPastMaxPool,
+    MoveScalarMulPastMatMul,
 )
-
 from finn.transformation.streamline.round_thresholds import RoundAndClipThresholds
 from finn.transformation.streamline.sign_to_thres import ConvertSignToThres
-from finn.transformation.batchnorm_to_affine import BatchNormToAffine
-from finn.transformation.streamline.remove import RemoveIdentityOps
 
 
 class Streamline(Transformation):
@@ -78,6 +75,7 @@ class Streamline(Transformation):
             BatchNormToAffine(),
             ConvertSignToThres(),
             MoveMulPastMaxPool(),
+            MoveScalarLinearPastInvariants(),
             AbsorbSignBiasIntoMultiThreshold(),
             MoveAddPastMul(),
             MoveScalarAddPastMatMul(),
diff --git a/src/finn/transformation/streamline/absorb.py b/src/finn/transformation/streamline/absorb.py
index fa2d7a714ad894ebb19099c7ed73e42e12ffdf44..1e2830356fe0133038caaa1dbc43f97ca98378d1 100644
--- a/src/finn/transformation/streamline/absorb.py
+++ b/src/finn/transformation/streamline/absorb.py
@@ -27,16 +27,16 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import numpy as np
-from onnx import helper as oh
 import warnings
+from onnx import helper as oh
 
-from finn.core.datatype import DataType
 import finn.core.data_layout as DataLayout
-from finn.transformation.base import Transformation
-from finn.util.basic import get_by_name
+from finn.core.datatype import DataType
 from finn.custom_op.registry import getCustomOp
-from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.base import Transformation
 from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
+from finn.util.basic import get_by_name
 
 
 class AbsorbSignBiasIntoMultiThreshold(Transformation):
@@ -308,8 +308,8 @@ class Absorb1BitMulIntoConv(Transformation):
 
 
 class AbsorbTransposeIntoMultiThreshold(Transformation):
-    """Change (NHWCTranpose -> MultiThreshold -> NCHWTranspose) to (MultiThreshold)
-    with NHWC mode."""
+    """Change (NCHWTranspose -> MultiThreshold -> NHWCTranspose) to (MultiThreshold)
+    with NHWC mode. For (NCHWTranspose -> MultiThreshold) move Transpose past MT."""
 
     def apply(self, model):
         graph = model.graph
@@ -338,24 +338,26 @@ class AbsorbTransposeIntoMultiThreshold(Transformation):
                                 graph.node.remove(n)
                                 graph.node.remove(final_t_cand)
                                 graph_modified = True
-                        elif final_t_cand.op_type == "Reshape":
-                            oshape = model.get_tensor_shape(final_t_cand.output[0])
-                            if len(oshape) == 2:
-                                # transition to FC part, can still use NHWC
-                                mt = getCustomOp(mt_cand)
-                                mt.set_nodeattr("data_layout", "NHWC")
-                                # get rid of first tranpose node
-                                mt_cand.input[0] = n.input[0]
-                                # fix output shape for MultiThreshold
-                                mt_ishape = model.get_tensor_shape(mt_cand.input[0])
-                                (b, h, w, c) = mt_ishape
-                                assert (
-                                    h == 1 and w == 1
-                                ), """Untested spatial dim
-                                in conv->fc transition, proceed with caution!"""
-                                model.set_tensor_shape(mt_cand.output[0], mt_ishape)
-                                graph.node.remove(n)
-                                graph_modified = True
+                        else:
+                            mt = getCustomOp(mt_cand)
+                            mt.set_nodeattr("data_layout", "NHWC")
+                            # get rid of first tranpose node
+                            mt_cand.input[0] = n.input[0]
+                            graph.node.remove(n)
+                            # fix output shape for MultiThreshold
+                            mt_ishape = model.get_tensor_shape(mt_cand.input[0])
+                            model.set_tensor_shape(mt_cand.output[0], mt_ishape)
+                            # re-insert Transpose behind MultiThreshold
+                            transpose_output = model.make_new_valueinfo_name()
+                            new_transpose = oh.make_node(
+                                "Transpose",
+                                [mt_cand.output[0]],
+                                [transpose_output],
+                                perm=[0, 3, 1, 2],
+                            )
+                            graph.node.insert(node_ind + 1, new_transpose)
+                            final_t_cand.input[0] = transpose_output
+                            graph_modified = True
         if graph_modified:
             model = model.transform(InferDataTypes())
         return (model, graph_modified)
diff --git a/src/finn/transformation/streamline/collapse_repeated.py b/src/finn/transformation/streamline/collapse_repeated.py
index 19f1ec3e836f3ec38aa4b716f5d6a25ca0782197..50265046d94db1e7233a45b934fd68f08431a95d 100644
--- a/src/finn/transformation/streamline/collapse_repeated.py
+++ b/src/finn/transformation/streamline/collapse_repeated.py
@@ -28,15 +28,15 @@
 
 from onnx import helper as oh
 
+from finn.core.datatype import DataType
 from finn.transformation.base import Transformation
 from finn.transformation.infer_shapes import InferShapes
-from finn.core.datatype import DataType
 
 
 class CollapseRepeatedOp(Transformation):
     """Collapse repeated consecutive operations with constant parameters into
     a single operation. make_collapsed_param_fxn must take two tensors and
-    return a tensor which gives the equivalent result using a single op. """
+    return a tensor which gives the equivalent result using a single op."""
 
     def __init__(self, op_name, make_collapsed_param_fxn):
         super().__init__()
diff --git a/src/finn/transformation/streamline/remove.py b/src/finn/transformation/streamline/remove.py
index 12c6984c6e66e1917d2a1e0a74c8620ccb6afabc..27e420a7936c2d9203150d2d682bf45e1aff0638 100644
--- a/src/finn/transformation/streamline/remove.py
+++ b/src/finn/transformation/streamline/remove.py
@@ -27,13 +27,37 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
+import numpy as np
+
 from finn.transformation.base import Transformation
 from finn.transformation.infer_shapes import InferShapes
-import numpy as np
+
+
+def _remove_node_and_rewire(model, node):
+    producer = model.find_producer(node.input[0])
+    if producer is not None:
+        # wire output tensor to
+        # output of producer node
+        producer.output[0] = node.output[0]
+    else:
+        # node is first in graph
+        consumer = model.find_consumer(node.output[0])
+        assert consumer is not None, "Whole graph is identity"
+        assert consumer.input[0] == node.output[0]
+        # rewire consumer's input directly to graph input
+        consumer.input[0] = node.input[0]
+    # remove node
+    model.graph.node.remove(node)
 
 
 class RemoveIdentityOps(Transformation):
-    """Remove identity ops like Add/Sub with zero or Mul/Div with one"""
+    """Remove identity ops like Add/Sub with zero or Mul/Div with one. A tolerance
+    value (defaults to 1e-05) can be specified during init for the comparison
+    to zero/one."""
+
+    def __init__(self, atol=1e-05):
+        super().__init__()
+        self.atol = atol
 
     def apply(self, model):
         graph = model.graph
@@ -47,12 +71,11 @@ class RemoveIdentityOps(Transformation):
                 and not model.is_join_node(n)
             ):
                 A = model.get_initializer(n.input[1])
-                if A is not None and (A == np.zeros_like(A)).all():
-                    producer = model.find_producer(n.input[0])
-                    # remove node and wire output tensor to
-                    # output of producer node
-                    producer.output[0] = n.output[0]
-                    graph.node.remove(n)
+                if (
+                    A is not None
+                    and np.isclose(A, np.zeros_like(A), atol=self.atol).all()
+                ):
+                    _remove_node_and_rewire(model, n)
 
             elif (
                 n.op_type in ["Mul", "Div"]
@@ -60,11 +83,10 @@ class RemoveIdentityOps(Transformation):
                 and not model.is_join_node(n)
             ):
                 A = model.get_initializer(n.input[1])
-                if A is not None and (A == np.ones_like(A)).all():
-                    producer = model.find_producer(n.input[0])
-                    # remove node and wire output tensor to
-                    # output of producer node
-                    producer.output[0] = n.output[0]
-                    graph.node.remove(n)
+                if (
+                    A is not None
+                    and np.isclose(A, np.ones_like(A), atol=self.atol).all()
+                ):
+                    _remove_node_and_rewire(model, n)
         model = model.transform(InferShapes())
         return (model, graph_modified)
diff --git a/src/finn/transformation/streamline/reorder.py b/src/finn/transformation/streamline/reorder.py
index 7163a95c4dbbe5c8bcee4ebeea87c5e9611c179e..1b22f474abe3f59ac91551efa3661b2612442776 100644
--- a/src/finn/transformation/streamline/reorder.py
+++ b/src/finn/transformation/streamline/reorder.py
@@ -28,19 +28,19 @@
 
 import numpy as np
 import warnings
-from onnx import helper as oh
 from onnx import TensorProto
+from onnx import helper as oh
 
-from finn.transformation.base import Transformation
 import finn.core.data_layout as DataLayout
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.infer_data_layouts import InferDataLayouts
 from finn.core.datatype import DataType
 from finn.core.onnx_exec import execute_node
-from finn.util.basic import get_by_name
 from finn.custom_op.registry import getCustomOp
+from finn.transformation.base import Transformation
 from finn.transformation.general import SortGraph
+from finn.transformation.infer_data_layouts import InferDataLayouts
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
+from finn.util.basic import get_by_name
 
 
 class MoveAddPastMul(Transformation):
@@ -645,7 +645,8 @@ class MoveScalarLinearPastInvariants(Transformation):
 
 
 class MakeMaxPoolNHWC(Transformation):
-    """Convert (MaxPool, NHWCTranpose) into (MaxPoolNHWC)."""
+    """Convert (MaxPool, NHWCTranspose) into (NHWCTranspose, MaxPoolNHWC)
+    and (NCHWTranspose, MaxPool) into (MaxPoolNHWC, NCHWTranspose)."""
 
     def apply(self, model):
         graph = model.graph
@@ -655,6 +656,7 @@ class MakeMaxPoolNHWC(Transformation):
             node_ind += 1
             if n.op_type == "MaxPool":
                 consumer = model.find_consumer(n.output[0])
+                producer = model.find_producer(n.input[0])
                 if consumer is not None and consumer.op_type == "Transpose":
                     perms = list(get_by_name(consumer.attribute, "perm").ints)
                     if perms == [0, 2, 3, 1]:
@@ -674,6 +676,25 @@ class MakeMaxPoolNHWC(Transformation):
                         graph.node.remove(consumer)
                         graph.node.insert(node_ind - 1, consumer)
                         graph_modified = True
+                elif producer is not None and producer.op_type == "Transpose":
+                    perms = list(get_by_name(producer.attribute, "perm").ints)
+                    if perms == [0, 3, 1, 2]:
+                        n.op_type = "MaxPoolNHWC"
+                        n.domain = "finn.custom_op.general"
+                        start_name = producer.input[0]
+                        mid_name = n.input[0]
+                        end_name = n.output[0]
+                        (b, hi, wi, c) = model.get_tensor_shape(start_name)
+                        (b, c, ho, wo) = model.get_tensor_shape(end_name)
+                        producer.input[0] = mid_name
+                        producer.output[0] = end_name
+                        n.input[0] = start_name
+                        n.output[0] = mid_name
+                        model.set_tensor_shape(mid_name, (b, ho, wo, c))
+                        model.set_tensor_shape(end_name, (b, c, ho, wo))
+                        graph.node.remove(producer)
+                        graph.node.insert(node_ind, producer)
+                        graph_modified = True
         return (model, graph_modified)
 
 
diff --git a/src/finn/util/imagenet.py b/src/finn/util/imagenet.py
index 71ed9d9d260e2b38b5d9ec47f728ad401e526ca8..abd412e8d963cbcc80370298fb833de86a218c41 100644
--- a/src/finn/util/imagenet.py
+++ b/src/finn/util/imagenet.py
@@ -26,11 +26,12 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
 import numpy as np
+import os
 from PIL import Image
+
 from finn.core.data_layout import NCHW, NHWC
-from finn.util.test import resize_smaller_side, crop_center
+from finn.util.test import crop_center, resize_smaller_side
 
 
 def get_val_images(n_images=100, interleave_classes=False):
diff --git a/src/finn/util/pytorch.py b/src/finn/util/pytorch.py
index f174c24601578cf827cb0da770f29889344e62b8..18010083f7beb8c71c3a6ae5abae075d51e57cf9 100644
--- a/src/finn/util/pytorch.py
+++ b/src/finn/util/pytorch.py
@@ -26,7 +26,6 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 import torch
-
 from torch.nn import Module, Sequential
 
 
diff --git a/src/finn/util/test.py b/src/finn/util/test.py
index 0a34751786170a03361d6a17a24c7250c5ce49fd..9c5462ae7f3ca3122fe672f8f01e939e398963a8 100644
--- a/src/finn/util/test.py
+++ b/src/finn/util/test.py
@@ -26,22 +26,25 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import onnx
-import onnx.numpy_helper as nph
 import pkg_resources as pk
-from pkgutil import get_data
-from brevitas_examples import bnn_pynq, imagenet_classification
-import numpy as np
+
 import pytest
+
+import numpy as np
+import onnx
+import onnx.numpy_helper as nph
+import os
+import torchvision.transforms.functional as torchvision_util
 import warnings
+from brevitas_examples import bnn_pynq, imagenet_classification
+from pkgutil import get_data
+
 from finn.core.modelwrapper import ModelWrapper
-import os
-from finn.util.basic import pynq_part_map, alveo_part_map, alveo_default_platform
+from finn.core.onnx_exec import execute_onnx
+from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.make_zynq_proj import ZynqBuild
 from finn.transformation.fpgadataflow.vitis_build import VitisBuild, VitisOptStrategy
-from finn.custom_op.registry import getCustomOp
-from finn.core.onnx_exec import execute_onnx
-import torchvision.transforms.functional as torchvision_util
+from finn.util.basic import alveo_default_platform, alveo_part_map, pynq_part_map
 
 # map of (wbits,abits) -> model
 example_map = {
diff --git a/src/finn/util/vcd.py b/src/finn/util/vcd.py
index a4400f7bd7e75549189f081ce255fd67c49b3746..6a5a68f09930783f5a4e094ea88d6eeb9e07b99a 100644
--- a/src/finn/util/vcd.py
+++ b/src/finn/util/vcd.py
@@ -26,9 +26,10 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import multiprocessing as mp
 from vcdvcd import VCDVCD
+
 from finn.util.basic import get_num_default_workers
-import multiprocessing as mp
 
 # string patterns to search for to find particular interfaces
 # streaming interfaces
@@ -162,7 +163,9 @@ def _get_stats(x):
     return (x[0], get_stream_if_stats(x[1], x[0]))
 
 
-def get_all_stream_if_stats(vcd_file, stream_ifs=None, sort_by="{'V': 1, 'R': 0}", num_workers=None):
+def get_all_stream_if_stats(
+    vcd_file, stream_ifs=None, sort_by="{'V': 1, 'R': 0}", num_workers=None
+):
     """Return a list of streaming interface stats, sorted by the percentage
     for the given sort_by key. If stream_ifs is None, all streaming interface
     stats will be returned, otherwise treated as a list of interface names to
diff --git a/src/finn/util/visualization.py b/src/finn/util/visualization.py
index d8547a32e06aa3b688601aa550abb2c50bcf77d6..397bebb64c21e9c5a1cb09d2a01fe3e10502b558 100644
--- a/src/finn/util/visualization.py
+++ b/src/finn/util/visualization.py
@@ -27,8 +27,8 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import inspect
-import os
 import netron
+import os
 from IPython.display import IFrame
 
 
@@ -36,7 +36,27 @@ def showSrc(what):
     print("".join(inspect.getsourcelines(what)[0]))
 
 
-def showInNetron(model_filename):
-    netron.start(model_filename, address=("0.0.0.0", 8081))
-    localhost_url = os.getenv("LOCALHOST_URL", default="localhost")
-    return IFrame(src="http://%s:8081/" % localhost_url, width="100%", height=400)
+def showInNetron(model_filename: str, localhost_url: str = None, port: int = None):
+    """Shows a ONNX model file in the Jupyter Notebook using Netron.
+
+    :param model_filename: The path to the ONNX model file.
+    :type model_filename: str
+
+    :param localhost_url: The IP address used by the Jupyter IFrame to show the model.
+     Defaults to localhost.
+    :type localhost_url: str, optional
+
+    :param port: The port number used by Netron and the Jupyter IFrame to show
+     the ONNX model.  Defaults to 8081.
+    :type port: int, optional
+
+    :return: The IFrame displaying the ONNX model.
+    :rtype: IPython.lib.display.IFrame
+    """
+    try:
+        port = port or int(os.getenv("NETRON_PORT", default="8081"))
+    except ValueError:
+        port = 8081
+    localhost_url = localhost_url or os.getenv("LOCALHOST_URL", default="localhost")
+    netron.start(model_filename, address=("0.0.0.0", port), browse=False)
+    return IFrame(src=f"http://{localhost_url}:{port}/", width="100%", height=400)
diff --git a/tests/brevitas/test_brevitas_QConv2d.py b/tests/brevitas/test_brevitas_QConv2d.py
index 198f1e7961a9e160589989b8b34b45b5fda53817..c1f790946bfa5f53194b96b1fea9c1722797a4a0 100644
--- a/tests/brevitas/test_brevitas_QConv2d.py
+++ b/tests/brevitas/test_brevitas_QConv2d.py
@@ -1,17 +1,46 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 import pytest
-import os
+
+import brevitas.onnx as bo
 import numpy as np
+import os
 import torch
-import brevitas.onnx as bo
-from brevitas.nn import QuantConv2d
-from brevitas.core.restrict_val import RestrictValueType
 from brevitas.core.quant import QuantType
+from brevitas.core.restrict_val import RestrictValueType
 from brevitas.core.scaling import ScalingImplType
 from brevitas.core.stats import StatsOp
+from brevitas.nn import QuantConv2d
 
-from finn.core.modelwrapper import ModelWrapper
-from finn.core.datatype import DataType
 import finn.core.onnx_exec as oxe
+from finn.core.datatype import DataType
+from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.infer_shapes import InferShapes
 from finn.util.basic import gen_finn_dt_tensor
 
@@ -19,8 +48,9 @@ export_onnx_path = "test_brevitas_conv.onnx"
 
 
 @pytest.mark.parametrize("dw", [False, True])
+@pytest.mark.parametrize("bias", [True, False])
 @pytest.mark.parametrize("in_channels", [32])
-def test_brevitas_QConv2d(dw, in_channels):
+def test_brevitas_QConv2d(dw, bias, in_channels):
     ishape = (1, 32, 111, 111)
     if dw is True:
         groups = in_channels
@@ -45,10 +75,8 @@ def test_brevitas_QConv2d(dw, in_channels):
         kernel_size=kernel_size,
         padding=padding,
         stride=stride,
-        bias=False,
+        bias=bias,
         bias_quant_type=QuantType.FP,
-        compute_output_bit_width=False,
-        compute_output_scale=False,
         weight_bit_width=4,
         weight_quant_type=QuantType.INT,
         weight_scaling_impl_type=ScalingImplType.STATS,
@@ -60,7 +88,7 @@ def test_brevitas_QConv2d(dw, in_channels):
     )
     weight_tensor = gen_finn_dt_tensor(DataType.INT4, w_shape)
     b_conv.weight = torch.nn.Parameter(torch.from_numpy(weight_tensor).float())
-
+    b_conv.eval()
     bo.export_finn_onnx(b_conv, ishape, export_onnx_path)
     model = ModelWrapper(export_onnx_path)
     model = model.transform(InferShapes())
@@ -69,7 +97,6 @@ def test_brevitas_QConv2d(dw, in_channels):
     odict = oxe.execute_onnx(model, idict, True)
     produced = odict[model.graph.output[0].name]
     inp_tensor = torch.from_numpy(inp_tensor).float()
-    b_conv.eval()
     expected = b_conv.forward(inp_tensor).detach().numpy()
 
     assert np.isclose(produced, expected, atol=1e-3).all()
diff --git a/tests/brevitas/test_brevitas_avg_pool_export.py b/tests/brevitas/test_brevitas_avg_pool_export.py
index f3d6c5dde7179bec8fe97e2a6c791afb5733514c..68e563da6351dad6e61d5a2d1ffcbfed9859d0f5 100644
--- a/tests/brevitas/test_brevitas_avg_pool_export.py
+++ b/tests/brevitas/test_brevitas_avg_pool_export.py
@@ -1,27 +1,52 @@
-import os
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+import pytest
 
-import onnx  # noqa
-import torch
 import numpy as np
-import brevitas.onnx as bo
+import os
+import torch
+from brevitas.export import FINNManager
 from brevitas.nn import QuantAvgPool2d
-from brevitas.quant_tensor import pack_quant_tensor
-from brevitas.core.quant import QuantType
-from finn.core.modelwrapper import ModelWrapper
+from brevitas.quant_tensor import QuantTensor
+
+import finn.core.onnx_exec as oxe
 from finn.core.datatype import DataType
-from finn.transformation.infer_shapes import InferShapes
+from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
 from finn.util.basic import gen_finn_dt_tensor
-import finn.core.onnx_exec as oxe
-
-import pytest
 
 export_onnx_path = "test_brevitas_avg_pool_export.onnx"
 
 
 @pytest.mark.parametrize("kernel_size", [2, 3])
 @pytest.mark.parametrize("stride", [1, 2])
-@pytest.mark.parametrize("signed", [False, True])
+@pytest.mark.parametrize("signed", [True, False])
 @pytest.mark.parametrize("bit_width", [2, 4])
 @pytest.mark.parametrize("input_bit_width", [4, 8, 16])
 @pytest.mark.parametrize("channels", [2, 4])
@@ -29,73 +54,46 @@ export_onnx_path = "test_brevitas_avg_pool_export.onnx"
 def test_brevitas_avg_pool_export(
     kernel_size, stride, signed, bit_width, input_bit_width, channels, idim
 ):
-    ishape = (1, channels, idim, idim)
-    ibw_tensor = torch.Tensor([input_bit_width])
 
-    b_avgpool = QuantAvgPool2d(
-        kernel_size=kernel_size,
-        stride=stride,
-        bit_width=bit_width,
-        quant_type=QuantType.INT,
+    quant_avgpool = QuantAvgPool2d(
+        kernel_size=kernel_size, stride=stride, bit_width=bit_width
     )
-    # call forward pass manually once to cache scale factor and bitwidth
-    input_tensor = torch.from_numpy(np.zeros(ishape)).float()
-    scale = np.ones((1, channels, 1, 1))
-    output_scale = torch.from_numpy(scale).float()
-    input_quant_tensor = pack_quant_tensor(
-        tensor=input_tensor, scale=output_scale, bit_width=ibw_tensor, signed=signed
-    )
-    bo.export_finn_onnx(b_avgpool, ishape, export_onnx_path, input_t=input_quant_tensor)
-    model = ModelWrapper(export_onnx_path)
+    quant_avgpool.eval()
 
-    # determine input FINN datatype
-    if signed is True:
-        prefix = "INT"
-    else:
-        prefix = "UINT"
+    # determine input
+    prefix = "INT" if signed else "UINT"
     dt_name = prefix + str(input_bit_width)
     dtype = DataType[dt_name]
-    model = model.transform(InferShapes())
-    model = model.transform(InferDataTypes())
-
-    # execution with input tensor using integers and scale = 1
-    # calculate golden output
-    inp = gen_finn_dt_tensor(dtype, ishape)
-    input_tensor = torch.from_numpy(inp).float()
-    input_quant_tensor = pack_quant_tensor(
-        tensor=input_tensor, scale=output_scale, bit_width=ibw_tensor, signed=signed
-    )
-    b_avgpool.eval()
-    expected = b_avgpool.forward(input_quant_tensor).tensor.detach().numpy()
-
-    # finn execution
-    idict = {model.graph.input[0].name: inp}
-    odict = oxe.execute_onnx(model, idict, True)
-    produced = odict[model.graph.output[0].name]
-    assert (expected == produced).all()
-
-    # execution with input tensor using float and scale != 1
-    scale = np.random.uniform(low=0, high=1, size=(1, channels, 1, 1)).astype(
+    input_shape = (1, channels, idim, idim)
+    input_array = gen_finn_dt_tensor(dtype, input_shape)
+    # Brevitas QuantAvgPool layers need QuantTensors to export correctly
+    # which requires setting up a QuantTensor instance with the scale
+    # factor, zero point, bitwidth and signedness
+    scale_array = np.random.uniform(low=0, high=1, size=(1, channels, 1, 1)).astype(
         np.float32
     )
-    inp_tensor = inp * scale
-    input_tensor = torch.from_numpy(inp_tensor).float()
-    input_scale = torch.from_numpy(scale).float()
-    input_quant_tensor = pack_quant_tensor(
-        tensor=input_tensor, scale=input_scale, bit_width=ibw_tensor, signed=signed
+    input_tensor = torch.from_numpy(input_array * scale_array).float()
+    scale_tensor = torch.from_numpy(scale_array).float()
+    zp = torch.tensor(0.0)
+    input_quant_tensor = QuantTensor(
+        input_tensor, scale_tensor, zp, input_bit_width, signed
+    )
+
+    # export
+    FINNManager.export(
+        quant_avgpool, export_path=export_onnx_path, input_t=input_quant_tensor
     )
-    # export again to set the scale values correctly
-    bo.export_finn_onnx(b_avgpool, ishape, export_onnx_path, input_t=input_quant_tensor)
     model = ModelWrapper(export_onnx_path)
     model = model.transform(InferShapes())
     model = model.transform(InferDataTypes())
-    b_avgpool.eval()
-    expected = b_avgpool.forward(input_quant_tensor).tensor.detach().numpy()
-    # finn execution
-    idict = {model.graph.input[0].name: inp_tensor}
-    odict = oxe.execute_onnx(model, idict, True)
-    produced = odict[model.graph.output[0].name]
-
-    assert np.isclose(expected, produced).all()
 
+    # reference brevitas output
+    ref_output_array = quant_avgpool(input_quant_tensor).tensor.detach().numpy()
+    # finn output
+    idict = {model.graph.input[0].name: input_array}
+    odict = oxe.execute_onnx(model, idict, True)
+    finn_output = odict[model.graph.output[0].name]
+    # compare outputs
+    assert np.isclose(ref_output_array, finn_output).all()
+    # cleanup
     os.remove(export_onnx_path)
diff --git a/tests/brevitas/test_brevitas_cnv.py b/tests/brevitas/test_brevitas_cnv.py
index 4b072535bdfe102a6c59ebd4c730de9ae827c00e..8a1783ae9468244ad7e0999b59c3c7b696682dae 100644
--- a/tests/brevitas/test_brevitas_cnv.py
+++ b/tests/brevitas/test_brevitas_cnv.py
@@ -26,19 +26,20 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
 import pkg_resources as pk
+
 import pytest
 
 import brevitas.onnx as bo
 import numpy as np
+import os
 import torch
 
 import finn.core.onnx_exec as oxe
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.fold_constants import FoldConstants
-from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.general import GiveUniqueNodeNames, RemoveStaticGraphInputs
+from finn.transformation.infer_shapes import InferShapes
 from finn.util.test import get_test_model_trained
 
 export_onnx_path = "test_brevitas_cnv.onnx"
diff --git a/tests/brevitas/test_brevitas_debug.py b/tests/brevitas/test_brevitas_debug.py
index 9115352796b0b90257d64ce9b14163ad372c9c98..4418368350b627644c76a7483c5c5dfaf031cda0 100644
--- a/tests/brevitas/test_brevitas_debug.py
+++ b/tests/brevitas/test_brevitas_debug.py
@@ -26,14 +26,13 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from pkgutil import get_data
-
-import os
 import brevitas.onnx as bo
 import numpy as np
 import onnx
 import onnx.numpy_helper as nph
+import os
 import torch
+from pkgutil import get_data
 
 import finn.core.onnx_exec as oxe
 from finn.core.modelwrapper import ModelWrapper
diff --git a/tests/brevitas/test_brevitas_fc.py b/tests/brevitas/test_brevitas_fc.py
index 24a453007515ba2eba4369a6b76829099f722168..b280ab9e116f8b4735f31d16e08d8f1055470155 100644
--- a/tests/brevitas/test_brevitas_fc.py
+++ b/tests/brevitas/test_brevitas_fc.py
@@ -26,8 +26,6 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from pkgutil import get_data
-
 import pytest
 
 import brevitas.onnx as bo
@@ -35,6 +33,7 @@ import numpy as np
 import onnx
 import onnx.numpy_helper as nph
 import torch
+from pkgutil import get_data
 
 import finn.core.onnx_exec as oxe
 from finn.core.modelwrapper import ModelWrapper
@@ -46,6 +45,7 @@ from finn.util.test import get_test_model_trained
 
 export_onnx_path = make_build_dir("test_brevitas_fc_")
 
+
 # act bits
 @pytest.mark.parametrize("abits", [1, 2])
 # weight bits
diff --git a/tests/brevitas/test_brevitas_mobilenet.py b/tests/brevitas/test_brevitas_mobilenet.py
index 94f937ef2afc9eb86665e26d703be9f01e2163a0..eb642adada9bd9abb8a328518770899d3da96ada 100644
--- a/tests/brevitas/test_brevitas_mobilenet.py
+++ b/tests/brevitas/test_brevitas_mobilenet.py
@@ -1,26 +1,56 @@
-from PIL import Image
-import numpy as np
-import brevitas.onnx as bo
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 import pytest
+
+import brevitas.onnx as bo
+import numpy as np
 import torch
-from finn.util.basic import make_build_dir
-from finn.util.pytorch import NormalizePreProc
-from finn.util.test import get_test_model_trained, resize_smaller_side, crop_center
-from finn.core.modelwrapper import ModelWrapper
+from PIL import Image
+
+import finn.core.onnx_exec as oxe
+import finn.transformation.streamline.absorb as absorb
 from finn.core.datatype import DataType
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.infer_data_layouts import InferDataLayouts
+from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.fold_constants import FoldConstants
-from finn.transformation.infer_datatypes import InferDataTypes
 from finn.transformation.general import (
     GiveReadableTensorNames,
     GiveUniqueNodeNames,
     GiveUniqueParameterTensors,
 )
-from finn.transformation.merge_onnx_models import MergeONNXModels
-import finn.transformation.streamline.absorb as absorb
+from finn.transformation.infer_data_layouts import InferDataLayouts
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.insert_topk import InsertTopK
-import finn.core.onnx_exec as oxe
+from finn.transformation.merge_onnx_models import MergeONNXModels
+from finn.util.basic import make_build_dir
+from finn.util.pytorch import NormalizePreProc
+from finn.util.test import crop_center, get_test_model_trained, resize_smaller_side
 
 
 @pytest.mark.xfail
diff --git a/tests/brevitas/test_brevitas_non_scaled_QuantHardTanh_export.py b/tests/brevitas/test_brevitas_non_scaled_QuantHardTanh_export.py
index 9c7296b7b3b6d36cfb43b6d9e96e7fba6bbce49a..6ddf71a5cba14916e3bcb13e65b1da2f4fddc63f 100644
--- a/tests/brevitas/test_brevitas_non_scaled_QuantHardTanh_export.py
+++ b/tests/brevitas/test_brevitas_non_scaled_QuantHardTanh_export.py
@@ -1,16 +1,46 @@
-import os
-import onnx  # noqa
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import pytest
+
+import brevitas.onnx as bo
 import numpy as np
+import onnx  # noqa
+import os
 import torch
-import brevitas.onnx as bo
-from brevitas.nn import QuantHardTanh
+from brevitas.core.quant import QuantType
 from brevitas.core.restrict_val import RestrictValueType
 from brevitas.core.scaling import ScalingImplType
-import pytest
-from finn.core.modelwrapper import ModelWrapper
+from brevitas.nn import QuantHardTanh
+
 import finn.core.onnx_exec as oxe
+from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.infer_shapes import InferShapes
-from brevitas.core.quant import QuantType
 
 export_onnx_path = "test_brevitas_non_scaled_QuantHardTanh_export.onnx"
 
diff --git a/tests/brevitas/test_brevitas_qlinear.py b/tests/brevitas/test_brevitas_qlinear.py
new file mode 100644
index 0000000000000000000000000000000000000000..873866b37727730b7cedd035f5edd93f7c1afe32
--- /dev/null
+++ b/tests/brevitas/test_brevitas_qlinear.py
@@ -0,0 +1,80 @@
+# Copyright (c) 2021, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import pytest
+
+import brevitas.onnx as bo
+import numpy as np
+import os
+import torch
+from brevitas.core.quant import QuantType
+from brevitas.nn import QuantLinear
+
+import finn.core.onnx_exec as oxe
+from finn.core.datatype import DataType
+from finn.core.modelwrapper import ModelWrapper
+from finn.transformation.infer_shapes import InferShapes
+from finn.util.basic import gen_finn_dt_tensor
+
+export_onnx_path = "test_brevitas_qlinear.onnx"
+
+
+@pytest.mark.parametrize("bias", [False, True])
+@pytest.mark.parametrize("out_features", [4])
+@pytest.mark.parametrize("in_features", [3])
+@pytest.mark.parametrize("w_bits", [4])
+@pytest.mark.parametrize("i_dtype", [DataType.UINT4])
+def test_brevitas_qlinear(bias, out_features, in_features, w_bits, i_dtype):
+    i_shape = (1, in_features)
+    w_shape = (out_features, in_features)
+    b_linear = QuantLinear(
+        out_features=out_features,
+        in_features=in_features,
+        bias=bias,
+        bias_quant_type=QuantType.FP,
+        weight_bit_width=w_bits,
+        weight_quant_type=QuantType.INT,
+        weight_scaling_per_output_channel=True,
+    )
+    weight_tensor_fp = np.random.uniform(low=-1.0, high=1.0, size=w_shape).astype(
+        np.float32
+    )
+    b_linear.weight.data = torch.from_numpy(weight_tensor_fp)
+    b_linear.eval()
+    bo.export_finn_onnx(b_linear, i_shape, export_onnx_path)
+    model = ModelWrapper(export_onnx_path)
+    model = model.transform(InferShapes())
+    inp_tensor = gen_finn_dt_tensor(i_dtype, i_shape)
+    idict = {model.graph.input[0].name: inp_tensor}
+    odict = oxe.execute_onnx(model, idict, True)
+    produced = odict[model.graph.output[0].name]
+    inp_tensor = torch.from_numpy(inp_tensor).float()
+    expected = b_linear.forward(inp_tensor).detach().numpy()
+
+    assert np.isclose(produced, expected, atol=1e-3).all()
+    os.remove(export_onnx_path)
diff --git a/tests/brevitas/test_brevitas_relu_act_export.py b/tests/brevitas/test_brevitas_relu_act_export.py
index fa114585d31fca629aa759e386aa3fbd04280a2e..bb59a8414feffbb8362de629f8b30ac200a5227f 100644
--- a/tests/brevitas/test_brevitas_relu_act_export.py
+++ b/tests/brevitas/test_brevitas_relu_act_export.py
@@ -1,15 +1,45 @@
-import os
-import onnx  # noqa
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import pytest
+
+import brevitas.onnx as bo
 import numpy as np
+import onnx  # noqa
+import os
 import torch
-import brevitas.onnx as bo
-from brevitas.nn import QuantReLU
 from brevitas.core.quant import QuantType
 from brevitas.core.restrict_val import RestrictValueType
 from brevitas.core.scaling import ScalingImplType
-import pytest
-from finn.core.modelwrapper import ModelWrapper
+from brevitas.nn import QuantReLU
+
 import finn.core.onnx_exec as oxe
+from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.infer_shapes import InferShapes
 
 export_onnx_path = "test_brevitas_relu_act_export.onnx"
diff --git a/tests/brevitas/test_brevitas_scaled_QHardTanh_export.py b/tests/brevitas/test_brevitas_scaled_QHardTanh_export.py
index e0ec82ebed44e2e984be9f62e02bc1721a7f9c33..345fae872119c75aa8e85cb5812c94dfc15bad7f 100644
--- a/tests/brevitas/test_brevitas_scaled_QHardTanh_export.py
+++ b/tests/brevitas/test_brevitas_scaled_QHardTanh_export.py
@@ -1,15 +1,45 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import pytest
+
+import brevitas.onnx as bo
+import numpy as np
 import onnx  # noqa
 import os
-import numpy as np
 import torch
-import brevitas.onnx as bo
-from brevitas.nn import QuantHardTanh
-from brevitas.core.restrict_val import RestrictValueType
 from brevitas.core.quant import QuantType
+from brevitas.core.restrict_val import RestrictValueType
 from brevitas.core.scaling import ScalingImplType
-import pytest
-from finn.core.modelwrapper import ModelWrapper
+from brevitas.nn import QuantHardTanh
+
 import finn.core.onnx_exec as oxe
+from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.infer_shapes import InferShapes
 
 export_onnx_path = "test_brevitas_scaled_QHardTanh_export.onnx"
diff --git a/tests/brevitas/test_brevitas_validate_mobilenet.py b/tests/brevitas/test_brevitas_validate_mobilenet.py
index 42bc3942d1a4f5fbdf70dbb1f1b5e853357abff8..12e7e7aff2ec2ebae3e2ec7713a24046553dc5f2 100644
--- a/tests/brevitas/test_brevitas_validate_mobilenet.py
+++ b/tests/brevitas/test_brevitas_validate_mobilenet.py
@@ -1,30 +1,60 @@
-import os
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import pytest
+
+import brevitas.onnx as bo
 import csv
 import numpy as np
-import brevitas.onnx as bo
+import os
 import torch
-from finn.util.basic import make_build_dir
-from finn.util.pytorch import NormalizePreProc
-from finn.util.test import get_test_model_trained
+import torchvision.datasets as datasets
+import torchvision.transforms as transforms
+
+import finn.core.onnx_exec as oxe
+import finn.transformation.streamline.absorb as absorb
+import finn.util.imagenet as imagenet_util
 from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.infer_data_layouts import InferDataLayouts
 from finn.transformation.fold_constants import FoldConstants
-from finn.transformation.general import RemoveStaticGraphInputs
-from finn.transformation.infer_datatypes import InferDataTypes
 from finn.transformation.general import (
     GiveReadableTensorNames,
     GiveUniqueNodeNames,
     GiveUniqueParameterTensors,
+    RemoveStaticGraphInputs,
 )
-from finn.transformation.merge_onnx_models import MergeONNXModels
-import finn.transformation.streamline.absorb as absorb
+from finn.transformation.infer_data_layouts import InferDataLayouts
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.insert_topk import InsertTopK
-import finn.core.onnx_exec as oxe
-import finn.util.imagenet as imagenet_util
-import pytest
-import torchvision.datasets as datasets
-import torchvision.transforms as transforms
+from finn.transformation.merge_onnx_models import MergeONNXModels
+from finn.util.basic import make_build_dir
+from finn.util.pytorch import NormalizePreProc
+from finn.util.test import get_test_model_trained
 
 # normalization (preprocessing) settings for MobileNet-v1 w4a4
 mean = [0.485, 0.456, 0.406]
diff --git a/tests/end2end/test_end2end_access_board.py b/tests/end2end/test_end2end_access_board.py
new file mode 100644
index 0000000000000000000000000000000000000000..ee15980ffb1b750c993a4b499dce57a1b8133e57
--- /dev/null
+++ b/tests/end2end/test_end2end_access_board.py
@@ -0,0 +1,55 @@
+# Copyright (c) 2021, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import pytest
+
+import subprocess
+
+from finn.util.test import get_build_env
+
+
+@pytest.mark.board
+def test_end2end_access_board():
+    build_env = get_build_env("zynq", 5)
+    if build_env["ip"] == "":
+        pytest.skip("PYNQ board IP address not specified")
+    remote_cmd_base = [
+        "ssh",
+        "-o",
+        "PreferredAuthentications=publickey",
+        "-o",
+        "PasswordAuthentication=no",
+        "%s@%s" % (build_env["username"], build_env["ip"]),
+    ]
+    test_text = "BoardIsAccessible"
+    touch_cmd = remote_cmd_base + ["echo %s" % test_text]
+    verif_res = subprocess.run(
+        touch_cmd, stdout=subprocess.PIPE, universal_newlines=True
+    )
+    assert verif_res.returncode == 0
+    assert verif_res.stdout.split("\n")[0] == test_text
diff --git a/tests/end2end/test_end2end_bnn_pynq.py b/tests/end2end/test_end2end_bnn_pynq.py
index ddea2dafce02c181a279d9c95759b97dee00a504..00a9fa721a320a8b70ee913e878955b9caddc3bf 100644
--- a/tests/end2end/test_end2end_bnn_pynq.py
+++ b/tests/end2end/test_end2end_bnn_pynq.py
@@ -26,77 +26,76 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
 import pytest
 
+import brevitas.onnx as bo
 import numpy as np
 
 # as of Feb'20 there is a bug that segfaults ONNX shape inference if we
 # import pytorch before onnx, so we make sure to import onnx first
 import onnx  # NOQA
+import os
+import subprocess
 import torch
-import brevitas.onnx as bo
+import warnings
+from collections import OrderedDict
+from dataset_loading import cifar, mnist
+from datetime import datetime
+from scipy.stats import linregress
 
 import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
 import finn.transformation.streamline.absorb as absorb
+from finn.analysis.fpgadataflow.dataflow_performance import dataflow_performance
+from finn.core.datatype import DataType
+from finn.core.modelwrapper import ModelWrapper
 from finn.core.onnx_exec import execute_onnx
+from finn.core.throughput_test import throughput_test_remote, throughput_test_rtlsim
 from finn.custom_op.registry import getCustomOp
 from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
 from finn.transformation.fold_constants import FoldConstants
-
+from finn.transformation.fpgadataflow.annotate_cycles import AnnotateCycles
+from finn.transformation.fpgadataflow.annotate_resources import AnnotateResources
+from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.create_dataflow_partition import (
     CreateDataflowPartition,
 )
+from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.insert_dwc import InsertDWC
 from finn.transformation.fpgadataflow.make_deployment import DeployToPYNQ
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
+from finn.transformation.fpgadataflow.set_fifo_depths import InsertAndSetFIFODepths
 from finn.transformation.general import (
-    RemoveUnusedTensors,
-    RemoveStaticGraphInputs,
     GiveReadableTensorNames,
     GiveUniqueNodeNames,
+    RemoveStaticGraphInputs,
+    RemoveUnusedTensors,
 )
+from finn.transformation.infer_data_layouts import InferDataLayouts
 from finn.transformation.infer_datatypes import InferDataTypes
 from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.streamline import Streamline
-from finn.util.test import (
-    get_build_env,
-    load_test_checkpoint_or_skip,
-    get_example_input,
-    get_trained_network_and_ishape,
-    execute_parent,
-    get_topk,
-)
-from finn.transformation.fpgadataflow.annotate_resources import AnnotateResources
-from finn.transformation.infer_data_layouts import InferDataLayouts
-from finn.transformation.move_reshape import RemoveCNVtoFCFlatten
+from finn.transformation.insert_topk import InsertTopK
 from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
+from finn.transformation.merge_onnx_models import MergeONNXModels
+from finn.transformation.move_reshape import RemoveCNVtoFCFlatten
+from finn.transformation.streamline import Streamline
 from finn.transformation.streamline.reorder import (
     MakeMaxPoolNHWC,
     MoveScalarLinearPastInvariants,
 )
-import warnings
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
-from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
-from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
-from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
-from finn.transformation.fpgadataflow.insert_dwc import InsertDWC
-from finn.transformation.fpgadataflow.annotate_cycles import AnnotateCycles
-from finn.transformation.fpgadataflow.set_fifo_depths import InsertAndSetFIFODepths
-from finn.analysis.fpgadataflow.dataflow_performance import dataflow_performance
-from finn.core.modelwrapper import ModelWrapper
-from scipy.stats import linregress
-from finn.core.throughput_test import throughput_test_remote, throughput_test_rtlsim
-from finn.util.pytorch import ToTensor
-from finn.transformation.merge_onnx_models import MergeONNXModels
-from finn.transformation.insert_topk import InsertTopK
-from finn.core.datatype import DataType
-from dataset_loading import mnist, cifar
-from datetime import datetime
-import subprocess
 from finn.util.gdrive import upload_to_end2end_dashboard
-from collections import OrderedDict
+from finn.util.pytorch import ToTensor
+from finn.util.test import (
+    execute_parent,
+    get_build_env,
+    get_example_input,
+    get_topk,
+    get_trained_network_and_ishape,
+    load_test_checkpoint_or_skip,
+)
 
 build_dir = os.environ["FINN_BUILD_DIR"]
 target_clk_ns = 10
@@ -129,12 +128,7 @@ def update_dashboard_data(topology, wbits, abits, key, val):
 def fold_tfc(model):
     fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch")
     # (PE, SIMD, ramstyle) for each layer
-    config = [
-        (16, 49, "block"),
-        (8, 8, "auto"),
-        (8, 8, "auto"),
-        (10, 8, "distributed"),
-    ]
+    config = [(16, 49, "block"), (8, 8, "auto"), (8, 8, "auto"), (10, 8, "distributed")]
     for fcl, (pe, simd, ramstyle) in zip(fc_layers, config):
         fcl_inst = getCustomOp(fcl)
         fcl_inst.set_nodeattr("PE", pe)
@@ -312,8 +306,8 @@ class TestEnd2End:
     def test_export(self, topology, wbits, abits):
         if wbits > abits:
             pytest.skip("No wbits > abits end2end network configs for now")
-        if topology == "lfc" and wbits > 1:
-            pytest.skip("Skipping non-existing lfc configs")
+        if topology == "lfc" and not (wbits == 1 and abits == 1):
+            pytest.skip("Skipping certain lfc configs")
         (model, ishape) = get_trained_network_and_ishape(topology, wbits, abits)
         chkpt_name = get_checkpoint_name(topology, wbits, abits, "export")
         bo.export_finn_onnx(model, ishape, chkpt_name)
@@ -352,6 +346,8 @@ class TestEnd2End:
         assert os.path.isfile(chkpt_preproc_name)
         # join preprocessing and core model
         pre_model = ModelWrapper(chkpt_preproc_name)
+        pre_model = pre_model.transform(InferShapes())
+        pre_model = pre_model.transform(FoldConstants())
         model = model.transform(MergeONNXModels(pre_model))
         # add input quantization annotation: UINT8 for all BNN-PYNQ models
         global_inp_name = model.graph.input[0].name
@@ -372,6 +368,7 @@ class TestEnd2End:
     def test_streamline(self, topology, wbits, abits):
         prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "pre_post")
         model = load_test_checkpoint_or_skip(prev_chkpt_name)
+        model = model.transform(absorb.AbsorbSignBiasIntoMultiThreshold())
         # move past any reshapes to be able to streamline input scaling
         model = model.transform(MoveScalarLinearPastInvariants())
         model = model.transform(Streamline())
diff --git a/tests/end2end/test_end2end_cybsec_mlp.py b/tests/end2end/test_end2end_cybsec_mlp.py
index eedbf97f389754440a116cf8755c25d597c433ee..7b4cebb52b3e4758746d4054827c6f96e8a4d681 100644
--- a/tests/end2end/test_end2end_cybsec_mlp.py
+++ b/tests/end2end/test_end2end_cybsec_mlp.py
@@ -26,26 +26,29 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import pkg_resources as pk
+
+import pytest
+
+import brevitas.onnx as bo
+import json
+import numpy as np
+import os
+import shutil
+import subprocess
 import torch
-from brevitas.nn import QuantLinear, QuantReLU
 import torch.nn as nn
-import numpy as np
+import wget
 from brevitas.core.quant import QuantType
-from brevitas.nn import QuantIdentity
-import brevitas.onnx as bo
-from finn.core.modelwrapper import ModelWrapper
-from finn.core.datatype import DataType
+from brevitas.nn import QuantIdentity, QuantLinear, QuantReLU
+from brevitas.quant_tensor import QuantTensor
+
 import finn.builder.build_dataflow as build
 import finn.builder.build_dataflow_config as build_cfg
-import os
-import shutil
-from finn.util.test import get_build_env, load_test_checkpoint_or_skip
-import pytest
+from finn.core.datatype import DataType
+from finn.core.modelwrapper import ModelWrapper
 from finn.util.basic import make_build_dir
-import pkg_resources as pk
-import json
-import wget
-import subprocess
+from finn.util.test import get_build_env, load_test_checkpoint_or_skip
 
 target_clk_ns = 10
 build_kind = "zynq"
@@ -115,19 +118,32 @@ def test_end2end_cybsec_mlp_export():
     model_for_export = CybSecMLPForExport(model)
     export_onnx_path = get_checkpoint_name("export")
     input_shape = (1, 600)
-    bo.export_finn_onnx(model_for_export, input_shape, export_onnx_path)
+    # create a QuantTensor instance to mark the input as bipolar during export
+    input_a = np.random.randint(0, 1, size=input_shape).astype(np.float32)
+    input_a = 2 * input_a - 1
+    scale = 1.0
+    input_t = torch.from_numpy(input_a * scale)
+    input_qt = QuantTensor(
+        input_t, scale=torch.tensor(scale), bit_width=torch.tensor(1.0), signed=True
+    )
+
+    bo.export_finn_onnx(
+        model_for_export, export_path=export_onnx_path, input_t=input_qt
+    )
     assert os.path.isfile(export_onnx_path)
     # fix input datatype
     finn_model = ModelWrapper(export_onnx_path)
     finnonnx_in_tensor_name = finn_model.graph.input[0].name
-    finn_model.set_tensor_datatype(finnonnx_in_tensor_name, DataType.BIPOLAR)
-    finn_model.save(export_onnx_path)
     assert tuple(finn_model.get_tensor_shape(finnonnx_in_tensor_name)) == (1, 600)
-    assert len(finn_model.graph.node) == 30
-    assert finn_model.graph.node[0].op_type == "Add"
-    assert finn_model.graph.node[1].op_type == "Div"
-    assert finn_model.graph.node[2].op_type == "MatMul"
+    # verify a few exported ops
+    assert finn_model.graph.node[1].op_type == "Add"
+    assert finn_model.graph.node[2].op_type == "Div"
+    assert finn_model.graph.node[3].op_type == "MatMul"
     assert finn_model.graph.node[-1].op_type == "MultiThreshold"
+    # verify datatypes on some tensors
+    assert finn_model.get_tensor_datatype(finnonnx_in_tensor_name) == DataType.BIPOLAR
+    first_matmul_w_name = finn_model.graph.node[3].input[1]
+    assert finn_model.get_tensor_datatype(first_matmul_w_name) == DataType.INT2
 
 
 @pytest.mark.slow
@@ -213,22 +229,9 @@ echo %s | sudo -S python3.6 validate-unsw-nb15.py --batchsize=10 --limit_batches
         build_env["ip"],
         build_env["target_dir"],
     )
-    rsync_res = subprocess.run(
-        [
-            "sshpass",
-            "-p",
-            build_env["password"],
-            "rsync",
-            "-avz",
-            deploy_dir,
-            remote_target,
-        ]
-    )
+    rsync_res = subprocess.run(["rsync", "-avz", deploy_dir, remote_target])
     assert rsync_res.returncode == 0
     remote_verif_cmd = [
-        "sshpass",
-        "-p",
-        build_env["password"],
         "ssh",
         "%s@%s" % (build_env["username"], build_env["ip"]),
         "sh",
diff --git a/tests/end2end/test_end2end_mobilenet_v1.py b/tests/end2end/test_end2end_mobilenet_v1.py
index c23749829a9d75c9a9519663a872aa1281bd46d3..1289b02636f030397075a9f580ed0977cd465a88 100644
--- a/tests/end2end/test_end2end_mobilenet_v1.py
+++ b/tests/end2end/test_end2end_mobilenet_v1.py
@@ -25,67 +25,55 @@
 # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-import time
 import pytest
 
-from PIL import Image
-import os
-import numpy as np
 import brevitas.onnx as bo
+import numpy as np
+import os
+import time
 import torch
+from PIL import Image
 
-from finn.custom_op.registry import getCustomOp
-from finn.util.pytorch import NormalizePreProc
-from finn.util.test import (
-    get_test_model_trained,
-    load_test_checkpoint_or_skip,
-    resize_smaller_side,
-    crop_center,
-)
-
-from finn.core.modelwrapper import ModelWrapper
+import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
+import finn.transformation.streamline.absorb as absorb
+import finn.transformation.streamline.reorder as reorder
 from finn.core.datatype import DataType
-
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.infer_data_layouts import InferDataLayouts
+from finn.core.modelwrapper import ModelWrapper
+from finn.core.onnx_exec import execute_onnx
+from finn.custom_op.registry import getCustomOp
+from finn.transformation.change_datalayout import ChangeDataLayoutQuantAvgPool2d
+from finn.transformation.double_to_single_float import DoubleToSingleFloat
 from finn.transformation.fold_constants import FoldConstants
-from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
+from finn.transformation.fpgadataflow.create_dataflow_partition import (
+    CreateDataflowPartition,
+)
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.general import (
     GiveReadableTensorNames,
     GiveUniqueNodeNames,
     GiveUniqueParameterTensors,
     RemoveUnusedTensors,
 )
-from finn.transformation.merge_onnx_models import MergeONNXModels
+from finn.transformation.infer_data_layouts import InferDataLayouts
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.insert_topk import InsertTopK
-import finn.transformation.streamline.absorb as absorb
-import finn.transformation.streamline.reorder as reorder
+from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
+from finn.transformation.merge_onnx_models import MergeONNXModels
 from finn.transformation.streamline import Streamline
-from finn.transformation.double_to_single_float import DoubleToSingleFloat
-from finn.transformation.streamline.remove import RemoveIdentityOps
 from finn.transformation.streamline.collapse_repeated import CollapseRepeatedMul
-from finn.transformation.change_datalayout import ChangeDataLayoutQuantAvgPool2d
+from finn.transformation.streamline.remove import RemoveIdentityOps
 from finn.transformation.streamline.round_thresholds import RoundAndClipThresholds
-from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
-import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
-from finn.transformation.fpgadataflow.create_dataflow_partition import (
-    CreateDataflowPartition,
-)
-from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
-from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
-from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
-from finn.transformation.fpgadataflow.replace_verilog_relpaths import (
-    ReplaceVerilogRelPaths,
+from finn.util.basic import alveo_default_platform, alveo_part_map
+from finn.util.pytorch import NormalizePreProc
+from finn.util.test import (
+    crop_center,
+    get_test_model_trained,
+    load_test_checkpoint_or_skip,
+    resize_smaller_side,
 )
-from finn.transformation.fpgadataflow.annotate_resources import AnnotateResources
-from finn.transformation.fpgadataflow.set_fifo_depths import InsertAndSetFIFODepths
-from finn.core.onnx_exec import execute_onnx
-from finn.util.basic import alveo_part_map, alveo_default_platform
-from finn.util.config import extract_model_config_to_json
-from finn.transformation.fpgadataflow.vitis_build import VitisBuild, VitisOptStrategy
 
 build_dir = os.environ["FINN_BUILD_DIR"]
 
@@ -111,6 +99,7 @@ def test_end2end_mobilenet_export():
     # set input finn datatype to UINT8
     preproc_model.set_tensor_datatype(preproc_model.graph.input[0].name, DataType.UINT8)
     preproc_model = preproc_model.transform(InferShapes())
+    preproc_model = preproc_model.transform(FoldConstants())
     preproc_model = preproc_model.transform(GiveUniqueNodeNames())
     preproc_model = preproc_model.transform(GiveUniqueParameterTensors())
     preproc_model = preproc_model.transform(GiveReadableTensorNames())
@@ -197,6 +186,10 @@ def test_end2end_mobilenet_streamline():
         model = model.transform(GiveReadableTensorNames())
         model = model.transform(InferDataTypes())
     model.save(build_dir + "/end2end_mobilenet_streamlined.onnx")
+    assert (
+        len(model.get_nodes_by_op_type("Add")) == 1
+    )  # only final quantized bias Add op remains
+    assert len(model.get_nodes_by_op_type("Mul")) == 0  # no Mul ops remain
 
 
 def test_end2end_mobilenet_lowering():
@@ -334,101 +327,3 @@ def test_end2end_mobilenet_cppsim():
 
     assert (golden == res_cppsim).all()
     assert np.isclose(golden_prob, res_cppsim_prob).all()
-
-
-@pytest.mark.slow
-@pytest.mark.vivado
-def test_end2end_mobilenet_gen_hls_ip():
-    model = load_test_checkpoint_or_skip(
-        build_dir + "/end2end_mobilenet_dataflow_model.onnx"
-    )
-    start = time.time()
-    model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
-    model = model.transform(HLSSynthIP())
-    model = model.transform(ReplaceVerilogRelPaths())
-    end = time.time()
-    elapsed_time = end - start
-    f = open(build_dir + "/end2end_mobilenet_ipgen_time.txt", "w+")
-    f.write("Execution time in seconds: " + str(elapsed_time))
-    f.close()
-
-    model = model.transform(AnnotateResources("hls"))
-    model.save(build_dir + "/end2end_mobilenet_ipgen.onnx")
-
-
-@pytest.mark.slow
-@pytest.mark.vivado
-@pytest.mark.xfail
-def test_end2end_mobilenet_rtlsim():
-    model = load_test_checkpoint_or_skip(build_dir + "/end2end_mobilenet_ipgen.onnx")
-    x = np.load(build_dir + "/end2end_mobilenet_input.npy")
-    inp_name = model.graph.input[0].name
-    out_name = model.graph.output[0].name
-    inp_dict = {inp_name: x}
-    # node-by-node rtlsim
-    model = model.transform(SetExecMode("rtlsim"))
-    model = model.transform(PrepareRTLSim())
-    model.save(build_dir + "/end2end_mobilenet_ipgen_nodebynode_rtlsim.onnx")
-    ret_rtlsim_nodebynode = execute_onnx(model, inp_dict, True)
-    res_rtlsim_nodebynode = ret_rtlsim_nodebynode[out_name]
-    np.save(
-        build_dir + "/end2end_mobilenet_result_rtlsim_nodebynode.npy",
-        res_rtlsim_nodebynode,
-    )
-    a0 = np.load(build_dir + "/end2end_mobilenet_topk_scale.npy")
-    res_rtlsim_nodebynode_prob = (
-        ret_rtlsim_nodebynode[model.graph.node[-2].output[0]] * a0
-    )
-    np.save(
-        build_dir + "/end2end_mobilenet_result_rtlsim_nodebynode_prob.npy",
-        res_rtlsim_nodebynode_prob,
-    )
-
-    # check result with golden values
-    golden = np.load(build_dir + "/end2end_mobilenet_golden_top5.npy")
-    golden_prob = np.load(build_dir + "/end2end_mobilenet_golden_top5_prob.npy")
-
-    assert (golden == res_rtlsim_nodebynode).all()
-    assert np.isclose(golden_prob, res_rtlsim_nodebynode_prob).all()
-
-
-@pytest.mark.slow
-@pytest.mark.vivado
-def test_end2end_mobilenet_set_fifo_depths():
-    model = load_test_checkpoint_or_skip(build_dir + "/end2end_mobilenet_ipgen.onnx")
-    start = time.time()
-    model = model.transform(
-        InsertAndSetFIFODepths(
-            test_fpga_part, target_clk_ns, vivado_ram_style=large_fifo_ram_style
-        )
-    )
-    end = time.time()
-    elapsed_time = end - start
-    f = open(build_dir + "/end2end_mobilenet_fifoset_time.txt", "w+")
-    f.write("Execution time in seconds: " + str(elapsed_time))
-    f.close()
-    extract_model_config_to_json(
-        model,
-        build_dir + "/end2end_mobilenet_folded_and_fifo_config.json",
-        ["PE", "SIMD", "impl_style", "ram_style", "depth"],
-    )
-    model.save(build_dir + "/end2end_mobilenet_fifodepth.onnx")
-
-
-@pytest.mark.slow
-@pytest.mark.vitis
-def test_end2end_mobilenet_build():
-    model = load_test_checkpoint_or_skip(
-        build_dir + "/end2end_mobilenet_fifodepth.onnx"
-    )
-    model = model.transform(
-        VitisBuild(
-            test_fpga_part,
-            target_clk_ns,
-            test_platform,
-            strategy=VitisOptStrategy.PERFORMANCE_BEST,
-        )
-    )
-    model.save(build_dir + "/end2end_mobilenet_build.onnx")
-    model = model.transform(AnnotateResources("synth"))
-    model.save(build_dir + "/end2end_mobilenet_final.onnx")
diff --git a/tests/end2end/test_ext_weights.py b/tests/end2end/test_ext_weights.py
index 0407395ed57dc07c6700efcebbb1fc8a767877bb..550dab4d0321001547efe97487abc543271dcf2e 100644
--- a/tests/end2end/test_ext_weights.py
+++ b/tests/end2end/test_ext_weights.py
@@ -26,16 +26,19 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import finn.builder.build_dataflow as build
-import finn.builder.build_dataflow_config as build_cfg
+import pkg_resources as pk
+
+import pytest
+
 import os
 import shutil
-from finn.util.test import get_build_env, load_test_checkpoint_or_skip
-import pytest
-from finn.util.basic import make_build_dir
-import pkg_resources as pk
-import wget
 import subprocess
+import wget
+
+import finn.builder.build_dataflow as build
+import finn.builder.build_dataflow_config as build_cfg
+from finn.util.basic import make_build_dir
+from finn.util.test import get_build_env, load_test_checkpoint_or_skip
 
 target_clk_ns = 10
 build_kind = "zynq"
@@ -44,6 +47,14 @@ onnx_zip_url = "https://github.com/Xilinx/finn-examples"
 onnx_zip_url += "/releases/download/v0.0.1a/onnx-models-bnn-pynq.zip"
 onnx_zip_local = build_dir + "/onnx-models-bnn-pynq.zip"
 onnx_dir_local = build_dir + "/onnx-models-bnn-pynq"
+mnist_url = "https://raw.githubusercontent.com/fgnt/mnist/master"
+mnist_local = build_dir + "/mnist"
+mnist_files = [
+    "train-images-idx3-ubyte.gz",
+    "train-labels-idx1-ubyte.gz",
+    "t10k-images-idx3-ubyte.gz",
+    "t10k-labels-idx1-ubyte.gz",
+]
 
 
 def get_checkpoint_name(step):
@@ -98,6 +109,22 @@ def test_end2end_ext_weights_build():
     shutil.copytree(output_dir + "/deploy", get_checkpoint_name("build"))
 
 
+@pytest.mark.board
+def test_end2end_ext_weights_dataset():
+    # make sure we have local copies of mnist dataset files
+    subprocess.check_output(["mkdir", "-p", mnist_local])
+    for f in mnist_files:
+        if not os.path.isfile(mnist_local + "/" + f):
+            wget.download(mnist_url + "/" + f, out=mnist_local + "/" + f)
+        assert os.path.isfile(mnist_local + "/" + f)
+    # rsync to board
+    build_env = get_build_env(build_kind, target_clk_ns)
+    mnist_target = "%s@%s:%s" % (build_env["username"], build_env["ip"], "/tmp/")
+
+    rsync_dataset_cmd = ["rsync", "-rv", mnist_local + "/", mnist_target]
+    subprocess.check_output(rsync_dataset_cmd)
+
+
 def test_end2end_ext_weights_run_on_hw():
     build_env = get_build_env(build_kind, target_clk_ns)
     deploy_dir = get_checkpoint_name("build")
@@ -124,22 +151,9 @@ echo %s | sudo -S python3.6 validate.py --dataset mnist --bitfile %s
         build_env["ip"],
         build_env["target_dir"],
     )
-    rsync_res = subprocess.run(
-        [
-            "sshpass",
-            "-p",
-            build_env["password"],
-            "rsync",
-            "-avz",
-            deploy_dir,
-            remote_target,
-        ]
-    )
+    rsync_res = subprocess.run(["rsync", "-avz", deploy_dir, remote_target])
     assert rsync_res.returncode == 0
     remote_verif_cmd = [
-        "sshpass",
-        "-p",
-        build_env["password"],
         "ssh",
         "%s@%s" % (build_env["username"], build_env["ip"]),
         "sh",
diff --git a/tests/fpgadataflow/test_code_gen_trafo.py b/tests/fpgadataflow/test_code_gen_trafo.py
index cf3e064804216e192909eae75f01880554f03d9f..89fab37d6d5225383ccb13a748c83573d6ee4516 100644
--- a/tests/fpgadataflow/test_code_gen_trafo.py
+++ b/tests/fpgadataflow/test_code_gen_trafo.py
@@ -26,10 +26,11 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
+import pytest
 
+import os
 from onnx import TensorProto, helper
-import pytest
+
 import finn.util.basic as util
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
diff --git a/tests/fpgadataflow/test_compilation_trafo.py b/tests/fpgadataflow/test_compilation_trafo.py
index a12c69285b7b335f075d8ffd7ba27e039ebc6f8c..6284748b9ccdc422b42bd9e301eb395d8dd1ad45 100644
--- a/tests/fpgadataflow/test_compilation_trafo.py
+++ b/tests/fpgadataflow/test_compilation_trafo.py
@@ -26,16 +26,16 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
+import pytest
 
+import os
 from onnx import TensorProto, helper
 
-import pytest
 import finn.util.basic as util
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 
 
 @pytest.mark.vivado
diff --git a/tests/fpgadataflow/test_convert_to_hls_1d_conv_layer.py b/tests/fpgadataflow/test_convert_to_hls_1d_conv_layer.py
new file mode 100644
index 0000000000000000000000000000000000000000..4e7030449c87b81d7a492b0e76dd05a047be3858
--- /dev/null
+++ b/tests/fpgadataflow/test_convert_to_hls_1d_conv_layer.py
@@ -0,0 +1,188 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import pytest
+
+import numpy as np
+from onnx import TensorProto, helper
+
+import finn.core.onnx_exec as oxe
+import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
+from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
+from finn.core.datatype import DataType
+from finn.core.modelwrapper import ModelWrapper
+from finn.custom_op.general.im2col import compute_conv_output_dim
+from finn.custom_op.registry import getCustomOp
+from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
+from finn.transformation.general import GiveUniqueNodeNames
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
+from finn.util.basic import gen_finn_dt_tensor
+
+
+# conv_config:
+# [pad_h_begin, pad_w_begin, pad_h_end, pad_w_end]
+# [kernel_size_h, kernel_size_w]
+# [stride_h, stride_w]
+# [dilation_h, dilation_w]
+@pytest.mark.parametrize(
+    "conv_config",
+    [
+        [[0, 0, 0, 0], [4, 1], [1, 1], [1, 1]],
+        [[1, 0, 1, 0], [4, 1], [1, 1], [1, 1]],
+        [[1, 0, 1, 0], [4, 1], [2, 1], [1, 1]],
+        # [[1, 0, 1, 0], [4, 1], [1, 1], [2, 1]]
+    ],
+)
+@pytest.mark.parametrize("depthwise", [False, True])
+@pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"])
+@pytest.mark.slow
+@pytest.mark.vivado
+def test_convert_to_hls_1d_conv_layer(conv_config, depthwise, exec_mode):
+    pad, kernel_size, stride, dilation = conv_config
+    np.random.seed(0)
+    idt = DataType.UINT4
+
+    in_feature_dim_h, in_feature_dim_w = [10, 1]
+    in_chn = 16
+
+    k_h, k_w = kernel_size
+    stride_h, stride_w = stride
+    dilation_h, dilation_w = dilation
+    pad_h = pad[0] + pad[2]
+    pad_w = pad[1] + pad[3]
+
+    if depthwise is True:
+        group = out_chn = in_chn
+        conv_param_shape = [out_chn, 1, k_h, k_w]
+    else:
+        group = 1
+        out_chn = 20
+        conv_param_shape = [out_chn, in_chn, k_h, k_w]
+
+    out_feature_dim_h = compute_conv_output_dim(
+        in_feature_dim_h, k_h, stride_h, pad_h, dilation_h
+    )
+    out_feature_dim_w = compute_conv_output_dim(
+        in_feature_dim_w, k_w, stride_w, pad_w, dilation_w
+    )
+
+    input_shape = [1, in_chn, in_feature_dim_h, in_feature_dim_w]
+    output_shape = [1, out_chn, out_feature_dim_h, out_feature_dim_w]
+
+    conv_weight_dt = DataType.UINT4
+
+    conv_config = {}
+    conv_config["dilations"] = [dilation_h, dilation_w]
+    conv_config["group"] = group
+    conv_config["kernel_shape"] = [k_h, k_w]
+    conv_config["pads"] = pad
+    conv_config["strides"] = [stride_h, stride_w]
+
+    top_in = helper.make_tensor_value_info("top_in", TensorProto.FLOAT, input_shape)
+    top_out = helper.make_tensor_value_info("top_out", TensorProto.FLOAT, output_shape)
+    value_info = [
+        helper.make_tensor_value_info("p1", TensorProto.FLOAT, conv_param_shape)
+    ]
+
+    modelproto = helper.make_model(
+        helper.make_graph(
+            name="conv_test",
+            inputs=[top_in],
+            outputs=[top_out],
+            value_info=value_info,
+            nodes=[
+                helper.make_node("Conv", ["top_in", "p1"], ["top_out"], **conv_config)
+            ],
+        )
+    )
+
+    model = ModelWrapper(modelproto)
+    model.set_tensor_datatype("top_in", idt)
+    model.set_tensor_datatype("top_out", idt)
+    model.set_tensor_datatype("p1", conv_weight_dt)
+    model.set_initializer("p1", gen_finn_dt_tensor(conv_weight_dt, conv_param_shape))
+
+    model = model.transform(InferShapes())
+    model = model.transform(InferDataTypes())
+
+    new_model = model.transform(LowerConvsToMatMul())
+    new_model = new_model.transform(to_hls.InferConvInpGen())
+    if depthwise is True:
+        new_model = new_model.transform(to_hls.InferVVAU())
+    else:
+        new_model = new_model.transform(to_hls.InferQuantizedStreamingFCLayer())
+        fc_node = new_model.get_nodes_by_op_type("StreamingFCLayer_Batch")[0]
+        fc_inst = getCustomOp(fc_node)
+        mw = fc_inst.get_nodeattr("MW")
+        mh = fc_inst.get_nodeattr("MH")
+        pe_cands = list(filter(lambda x: mh % x == 0, range(2, mh + 1)))
+        simd_cands = list(filter(lambda x: mw % x == 0, range(2, mw + 1)))
+        fc_inst.set_nodeattr("PE", pe_cands[0])
+        fc_inst.set_nodeattr("SIMD", simd_cands[0])
+
+    new_model = new_model.transform(GiveUniqueNodeNames())
+    new_model = new_model.transform(InferShapes())
+    new_model = new_model.transform(InferDataTypes())
+
+    if exec_mode == "cppsim":
+        new_model = new_model.transform(PrepareCppSim())
+        new_model = new_model.transform(CompileCppSim())
+        new_model = new_model.transform(SetExecMode("cppsim"))
+    elif exec_mode == "rtlsim":
+        new_model = new_model.transform(SetExecMode("rtlsim"))
+        new_model = new_model.transform(GiveUniqueNodeNames())
+        new_model = new_model.transform(PrepareIP("xc7z020clg400-1", 5))
+        new_model = new_model.transform(HLSSynthIP())
+        new_model = new_model.transform(PrepareRTLSim())
+    else:
+        raise Exception("Unknown exec_mode")
+
+    x = gen_finn_dt_tensor(idt, input_shape)
+    inp_dict = {model.graph.input[0].name: x}
+    assert oxe.compare_execution(model, new_model, inp_dict)
+
+    if pad_h == 1 and pad_w == 1:
+        padding_node = new_model.get_nodes_by_op_type("FMPadding_Batch")[0]
+        padding_inst = getCustomOp(padding_node)
+        assert padding_inst.get_nodeattr("SIMD") == in_chn
+
+    if depthwise is True and exec_mode == "rtlsim":
+        node = new_model.get_nodes_by_op_type("Vector_Vector_Activate_Batch")[0]
+        inst = getCustomOp(node)
+        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
+        exp_cycles_dict = new_model.analysis(exp_cycles_per_layer)
+        exp_cycles = exp_cycles_dict[node.name]
+        assert np.isclose(exp_cycles, cycles_rtlsim, atol=11)
+        assert exp_cycles != 0
diff --git a/tests/fpgadataflow/test_convert_to_hls_channelwise_layer.py b/tests/fpgadataflow/test_convert_to_hls_channelwise_layer.py
index 40f0a620c6cd5db873a731c038a737b35c1cce9d..8dd927fa7628d1500fe644b030278fbaa3f18810 100644
--- a/tests/fpgadataflow/test_convert_to_hls_channelwise_layer.py
+++ b/tests/fpgadataflow/test_convert_to_hls_channelwise_layer.py
@@ -28,24 +28,23 @@
 
 import pytest
 
+import numpy as np
 from onnx import TensorProto, helper
 
 import finn.core.onnx_exec as oxe
+import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
-import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
-from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
-from finn.transformation.infer_data_layouts import InferDataLayouts
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.general import GiveUniqueNodeNames
-from finn.util.basic import gen_finn_dt_tensor
+from finn.transformation.infer_data_layouts import InferDataLayouts
 from finn.transformation.infer_shapes import InferShapes
-import numpy as np
+from finn.util.basic import gen_finn_dt_tensor
 
 
 def prepare_inputs(input_tensor):
diff --git a/tests/fpgadataflow/test_convert_to_hls_conv_fc_transition.py b/tests/fpgadataflow/test_convert_to_hls_conv_fc_transition.py
new file mode 100755
index 0000000000000000000000000000000000000000..cf2903a5789d7d3892ac549338b274268c1661b3
--- /dev/null
+++ b/tests/fpgadataflow/test_convert_to_hls_conv_fc_transition.py
@@ -0,0 +1,246 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import pytest
+
+import numpy as np
+from onnx import TensorProto, helper
+
+import finn.core.data_layout as DataLayout
+import finn.core.onnx_exec as oxe
+import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
+import finn.transformation.streamline.absorb as absorb
+from finn.core.datatype import DataType
+from finn.core.modelwrapper import ModelWrapper
+from finn.custom_op.general.im2col import compute_conv_output_dim
+from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
+from finn.transformation.general import GiveUniqueNodeNames, RemoveUnusedTensors
+from finn.transformation.infer_data_layouts import InferDataLayouts
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
+from finn.transformation.move_reshape import RemoveCNVtoFCFlatten
+from finn.transformation.streamline import Streamline
+from finn.transformation.streamline.reorder import MoveScalarLinearPastInvariants
+from finn.util.basic import gen_finn_dt_tensor
+
+
+def get_multithreshold_rand_params(channels, num_of_thres, seed=None):
+    if seed is not None:
+        np.random.seed(seed)
+    steps = np.random.rand(channels, 1) * 30
+    bias = np.random.rand(channels, 1) * -10
+    thres = [np.arange(num_of_thres) for chn in range(channels)]
+    thres = ((thres + bias) * steps).astype(np.float32)
+    thres = np.round(thres)
+    return thres
+
+
+# conv_config: input_shape, kernel_shape, stride, pad
+@pytest.mark.parametrize(
+    "conv_config",
+    [
+        ((6, 6), (3, 3), (1, 1), (1, 1)),
+        # TODO: enable 1d conv test cases
+        # ((12, 1), (3, 1), (1, 1), (1, 0)),
+        # ((1, 15), (1, 5), (1, 1), (0, 2)),
+    ],
+)
+@pytest.mark.parametrize("depthwise", [False, True])
+@pytest.mark.parametrize("use_reshape", [False, True])
+@pytest.mark.vivado
+@pytest.mark.slow
+def test_convert_to_hls_conv_fc_transition(conv_config, depthwise, use_reshape):
+    np.random.seed(0)
+    idt = DataType.UINT4
+    odt = DataType.UINT4
+    conv_weight_dt = DataType.INT4
+    fc_weight_dt = DataType.INT4
+
+    input_shape, kernel_shape, stride, pad = conv_config
+    kernel_size_h, kernel_size_w = kernel_shape
+    input_size_h, input_size_w = input_shape
+    stride_h, stride_w = stride
+    pad_h, pad_w = pad
+
+    in_chn = 4
+    fc_filters = 16
+
+    if depthwise is True:
+        group = out_chn = in_chn
+        conv_param_shape = [out_chn, 1, kernel_size_h, kernel_size_w]
+    else:
+        group = 1
+        out_chn = 8
+        conv_param_shape = [out_chn, in_chn, kernel_size_h, kernel_size_w]
+
+    output_size_h = compute_conv_output_dim(
+        input_size_h, kernel_size_h, stride_h, 2 * pad_h
+    )
+    output_size_w = compute_conv_output_dim(
+        input_size_w, kernel_size_w, stride_w, 2 * pad_w
+    )
+
+    input_shape = [1, in_chn, input_size_h, input_size_w]
+    fc_param_shape = [out_chn * output_size_h * output_size_w, fc_filters]
+    output_shape = [1, fc_filters]
+
+    conv_config = {}
+    conv_config["dilations"] = [1, 1]
+    conv_config["group"] = group
+    conv_config["kernel_shape"] = [kernel_size_h, kernel_size_w]
+    conv_config["pads"] = [pad_h, pad_w, pad_h, pad_w]
+    conv_config["strides"] = [stride_h, stride_w]
+
+    global_in = helper.make_tensor_value_info(
+        "global_in", TensorProto.FLOAT, input_shape
+    )
+    global_out = helper.make_tensor_value_info(
+        "global_out", TensorProto.FLOAT, output_shape
+    )
+    value_info = [
+        helper.make_tensor_value_info(
+            "conv_param", TensorProto.FLOAT, conv_param_shape
+        ),
+        helper.make_tensor_value_info("thres1_param", TensorProto.FLOAT, (out_chn, 15)),
+        helper.make_tensor_value_info(
+            "matmul_param", TensorProto.FLOAT, fc_param_shape
+        ),
+        helper.make_tensor_value_info(
+            "thres2_param", TensorProto.FLOAT, (fc_filters, 15)
+        ),
+        helper.make_tensor_value_info("reshape_shape", TensorProto.INT64, []),
+    ]
+
+    if use_reshape:
+        flatten_node = helper.make_node(
+            "Reshape", ["thres1_out", "reshape_shape"], ["flatten_out"]
+        )
+    else:
+        flatten_node = helper.make_node(
+            "Flatten", ["thres1_out"], ["flatten_out"], axis=1
+        )
+
+    modelproto = helper.make_model(
+        helper.make_graph(
+            name="test",
+            inputs=[global_in],
+            outputs=[global_out],
+            value_info=value_info,
+            nodes=[
+                helper.make_node(
+                    "Conv", ["global_in", "conv_param"], ["conv_out"], **conv_config
+                ),
+                helper.make_node(
+                    "MultiThreshold",
+                    ["conv_out", "thres1_param"],
+                    ["thres1_out"],
+                    domain="finn.custom_op.general",
+                    out_dtype="UINT4",
+                ),
+                flatten_node,
+                helper.make_node(
+                    "MatMul", ["flatten_out", "matmul_param"], ["matmul_out"]
+                ),
+                helper.make_node(
+                    "MultiThreshold",
+                    ["matmul_out", "thres2_param"],
+                    ["global_out"],
+                    domain="finn.custom_op.general",
+                    out_dtype="UINT4",
+                ),
+            ],
+        )
+    )
+
+    model = ModelWrapper(modelproto)
+    model.set_tensor_datatype("global_in", idt)
+    model.set_tensor_layout("global_in", DataLayout.NCHW)
+    model.set_tensor_datatype("global_out", odt)
+    model.set_tensor_datatype("conv_param", conv_weight_dt)
+    model.set_tensor_datatype("matmul_param", fc_weight_dt)
+    model.set_tensor_datatype("thres1_param", DataType.INT32)
+    model.set_tensor_datatype("thres2_param", DataType.INT32)
+
+    model.set_initializer(
+        "conv_param", gen_finn_dt_tensor(conv_weight_dt, conv_param_shape)
+    )
+    model.set_initializer(
+        "thres1_param", get_multithreshold_rand_params(out_chn, 15, seed=0)
+    )
+    model.set_initializer(
+        "thres2_param", get_multithreshold_rand_params(fc_filters, 15, seed=0)
+    )
+    model.set_initializer(
+        "matmul_param", gen_finn_dt_tensor(fc_weight_dt, fc_param_shape)
+    )
+    model.set_initializer("reshape_shape", np.array([1, -1]))
+
+    model = model.transform(InferShapes())
+    model = model.transform(InferDataTypes())
+    model = model.transform(InferDataLayouts())
+
+    # streamlining
+    new_model = model.transform(MoveScalarLinearPastInvariants())
+    new_model = new_model.transform(Streamline())
+    new_model = new_model.transform(LowerConvsToMatMul())
+    new_model = new_model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
+    new_model = new_model.transform(Streamline())
+    new_model = new_model.transform(InferDataLayouts())
+    new_model = new_model.transform(RemoveUnusedTensors())
+
+    # convert_to_hls
+    if depthwise is True:
+        new_model = new_model.transform(to_hls.InferVVAU())
+    new_model = new_model.transform(to_hls.InferQuantizedStreamingFCLayer())
+    new_model = new_model.transform(to_hls.InferThresholdingLayer())
+    new_model = new_model.transform(to_hls.InferConvInpGen())
+    new_model = new_model.transform(to_hls.InferStreamingMaxPool())
+    new_model = new_model.transform(RemoveCNVtoFCFlatten())
+    new_model = new_model.transform(absorb.AbsorbConsecutiveTransposes())
+    new_model = new_model.transform(GiveUniqueNodeNames())
+    new_model = new_model.transform(InferDataLayouts())
+
+    # prepare cppsim
+    new_model = new_model.transform(PrepareCppSim())
+    new_model = new_model.transform(CompileCppSim())
+    new_model = new_model.transform(SetExecMode("cppsim"))
+
+    # check for correct execution
+    x = gen_finn_dt_tensor(idt, input_shape)
+    inp_dict = {model.graph.input[0].name: x}
+    assert oxe.compare_execution(model, new_model, inp_dict)
+
+    num_transpose = len(new_model.get_nodes_by_op_type("Transpose"))
+    num_flatten = len(new_model.get_nodes_by_op_type("Flatten"))
+    num_reshape = len(new_model.get_nodes_by_op_type("Reshape"))
+
+    # check if transpose->flatten was removed
+    assert num_transpose == 1 and num_flatten == 0 and num_reshape == 0
diff --git a/tests/fpgadataflow/test_convert_to_hls_conv_layer.py b/tests/fpgadataflow/test_convert_to_hls_conv_layer.py
index d88576583eaacb7579b02bc00e4e0f9b77b16f7e..deca7c96127fdf03d9feb7504d5a6daebb41a5d5 100644
--- a/tests/fpgadataflow/test_convert_to_hls_conv_layer.py
+++ b/tests/fpgadataflow/test_convert_to_hls_conv_layer.py
@@ -26,30 +26,29 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from onnx import TensorProto, helper
-import numpy as np
 import pytest
 
-from finn.core.datatype import DataType
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.general import GiveUniqueNodeNames
-from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
+import numpy as np
+from onnx import TensorProto, helper
 
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
-from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 import finn.core.onnx_exec as oxe
-from finn.core.modelwrapper import ModelWrapper
-from finn.util.basic import gen_finn_dt_tensor
 import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
-
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
-from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
-from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
+from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
+from finn.core.datatype import DataType
+from finn.core.modelwrapper import ModelWrapper
 from finn.custom_op.general.im2col import compute_conv_output_dim
 from finn.custom_op.registry import getCustomOp
-from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
+from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
+from finn.transformation.general import GiveUniqueNodeNames
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
+from finn.util.basic import gen_finn_dt_tensor
 
 # conv_config  kernel_size,stride, pad
 
diff --git a/tests/fpgadataflow/test_convert_to_hls_layers_cnv.py b/tests/fpgadataflow/test_convert_to_hls_layers_cnv.py
index 20751a5877a879eeabf1ed6b67a7573208cf9367..37a1c8d8486a535c8ff87f4b06905b3059bba35a 100644
--- a/tests/fpgadataflow/test_convert_to_hls_layers_cnv.py
+++ b/tests/fpgadataflow/test_convert_to_hls_layers_cnv.py
@@ -26,29 +26,31 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
 import pkg_resources as pk
 
+import pytest
+
 import brevitas.onnx as bo
 import numpy as np
-import pytest
+import os
+
 import finn.core.onnx_exec as oxe
+import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
 import finn.transformation.streamline.absorb as absorb
-from finn.transformation.streamline.reorder import MakeMaxPoolNHWC
 from finn.core.modelwrapper import ModelWrapper
+from finn.custom_op.registry import getCustomOp
+from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
 from finn.transformation.fold_constants import FoldConstants
+from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
-from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.infer_data_layouts import InferDataLayouts
+from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
 from finn.transformation.streamline import Streamline
+from finn.transformation.streamline.reorder import MakeMaxPoolNHWC
 from finn.util.test import get_test_model_trained
-from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
-from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
-import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
-from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
-from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-from finn.custom_op.registry import getCustomOp
 
 export_onnx_path_cnv = "test_convert_to_hls_layers_cnv.onnx"
 
@@ -115,8 +117,8 @@ def test_convert_to_hls_layers_cnv_w1a1(fused_activation):
         thr_nodes = model.get_nodes_by_op_type("Thresholding_Batch")
         assert len(thr_nodes) == 8
     non_finn_nodes = model.get_non_finn_nodes()
-    assert len(non_finn_nodes) == 4
-    exp_non_finn_nodes = ["Transpose", "Reshape", "Mul", "Add"]
+    assert len(non_finn_nodes) == 5
+    exp_non_finn_nodes = ["Transpose", "Transpose", "Reshape", "Mul", "Add"]
     assert [x.op_type for x in non_finn_nodes] == exp_non_finn_nodes
     fc_nodes = model.get_nodes_by_op_type("StreamingFCLayer_Batch")
     assert len(fc_nodes) == 9
diff --git a/tests/fpgadataflow/test_convert_to_hls_layers_fc.py b/tests/fpgadataflow/test_convert_to_hls_layers_fc.py
index cb66fa7237416579b509aa4f508c9105d386d08a..a1dc11e0eee5aab462beb0ec34b8771ced20a379 100644
--- a/tests/fpgadataflow/test_convert_to_hls_layers_fc.py
+++ b/tests/fpgadataflow/test_convert_to_hls_layers_fc.py
@@ -26,15 +26,16 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
-from pkgutil import get_data
+import pytest
 
 import brevitas.onnx as bo
 import numpy as np
 import onnx
 import onnx.numpy_helper as nph
+import os
 import torch
-import pytest
+from pkgutil import get_data
+
 import finn.core.onnx_exec as oxe
 import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
 import finn.transformation.streamline.absorb as absorb
@@ -42,8 +43,8 @@ from finn.core.modelwrapper import ModelWrapper
 from finn.custom_op.registry import getCustomOp
 from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
 from finn.transformation.fold_constants import FoldConstants
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
 from finn.transformation.infer_shapes import InferShapes
@@ -51,7 +52,6 @@ from finn.transformation.streamline import Streamline
 from finn.transformation.streamline.round_thresholds import RoundAndClipThresholds
 from finn.util.test import get_test_model_trained
 
-
 export_onnx_path = "test_convert_to_hls_layers_fc.onnx"
 
 
diff --git a/tests/fpgadataflow/test_convert_to_hls_layers_synthetic.py b/tests/fpgadataflow/test_convert_to_hls_layers_synthetic.py
index 86875d2ac7f37e697c5de198e15aa3045a9e3d42..b0780c073114351ba136fefe6973114bd1a8505b 100644
--- a/tests/fpgadataflow/test_convert_to_hls_layers_synthetic.py
+++ b/tests/fpgadataflow/test_convert_to_hls_layers_synthetic.py
@@ -26,42 +26,43 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
-import numpy as np
+import pytest
 
+import numpy as np
+import os
 from onnx import TensorProto, helper
 
 import finn.core.onnx_exec as oxe
+import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.fold_constants import FoldConstants
+from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.general import (
     GiveReadableTensorNames,
     GiveUniqueNodeNames,
     SortGraph,
 )
-from finn.transformation.streamline.reorder import MoveScalarLinearPastInvariants
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.infer_datatypes import InferDataTypes
 from finn.transformation.infer_data_layouts import InferDataLayouts
-from finn.util.basic import gen_finn_dt_tensor
-from finn.util.test import soft_verify_topk
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.insert_topk import InsertTopK
-import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
-from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
-from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.streamline.absorb import (
-    AbsorbScalarMulAddIntoTopK,
     AbsorbConsecutiveTransposes,
+    AbsorbScalarMulAddIntoTopK,
 )
 from finn.transformation.streamline.collapse_repeated import (
-    CollapseRepeatedMul,
     CollapseRepeatedAdd,
+    CollapseRepeatedMul,
 )
-from finn.transformation.streamline.reorder import MoveAddPastMul
-
-import pytest
+from finn.transformation.streamline.reorder import (
+    MoveAddPastMul,
+    MoveScalarLinearPastInvariants,
+)
+from finn.util.basic import gen_finn_dt_tensor
+from finn.util.test import soft_verify_topk
 
 export_onnx_path = "test_output_synthetic.onnx"
 
diff --git a/tests/fpgadataflow/test_convert_to_hls_pool_batch.py b/tests/fpgadataflow/test_convert_to_hls_pool_batch.py
index e8f3c3ae3290b5bdc23e46f7e9991222fdfac000..70716e88a4de827be37416b63a925b30d01c342a 100644
--- a/tests/fpgadataflow/test_convert_to_hls_pool_batch.py
+++ b/tests/fpgadataflow/test_convert_to_hls_pool_batch.py
@@ -28,23 +28,24 @@
 
 import pytest
 
-from onnx import TensorProto, helper
 import numpy as np
+from onnx import TensorProto, helper
+
 import finn.core.onnx_exec as oxe
+import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
+from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
-from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
-import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.general import GiveUniqueNodeNames
-from finn.custom_op.registry import getCustomOp
-from finn.util.basic import gen_finn_dt_tensor
 from finn.transformation.infer_shapes import InferShapes
-from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
+from finn.util.basic import gen_finn_dt_tensor
 
 
 def make_single_maxpool_modelwrapper(k, stride, pad, ifm_ch, ifm_dim, ofm_dim, idt):
diff --git a/tests/fpgadataflow/test_depthwise_convolution.py b/tests/fpgadataflow/test_depthwise_convolution.py
index c406d78158c52226fea881c48bc178139653fea5..75ce055c0e9a093a5ddeab6b13af8d36d6152fb8 100644
--- a/tests/fpgadataflow/test_depthwise_convolution.py
+++ b/tests/fpgadataflow/test_depthwise_convolution.py
@@ -27,30 +27,29 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import pytest
+
+import numpy as np
 import onnx.helper as oh
 from onnx import TensorProto
-import numpy as np
 
-from finn.core.modelwrapper import ModelWrapper
+import finn.core.onnx_exec as oxe
 from finn.core.datatype import DataType
-from finn.transformation.infer_shapes import InferShapes
+from finn.core.modelwrapper import ModelWrapper
+from finn.custom_op.general.im2col import compute_conv_output_dim
+from finn.custom_op.registry import getCustomOp
+from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.convert_to_hls_layers import (
     InferConvInpGen,
     InferVVAU,
 )
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
-from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
-from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-
-import finn.core.onnx_exec as oxe
-from finn.custom_op.general.im2col import compute_conv_output_dim
-from finn.util.basic import calculate_signed_dot_prod_range, gen_finn_dt_tensor
-from finn.custom_op.registry import getCustomOp
-
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
-from finn.transformation.general import GiveUniqueNodeNames
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
+from finn.transformation.general import GiveUniqueNodeNames
+from finn.transformation.infer_shapes import InferShapes
+from finn.util.basic import calculate_signed_dot_prod_range, gen_finn_dt_tensor
 
 
 def set_up_reference_model(act, idt, wdt, k, ifm_dim, ifm_ch, stride, padding):
@@ -98,7 +97,7 @@ def set_up_reference_model(act, idt, wdt, k, ifm_dim, ifm_ch, stride, padding):
         inputs=["inp"],
         outputs=["im2col_out"],
         kernel_size=[k, k],
-        stride=stride,
+        stride=[stride, stride],
         pad_amount=[padding, padding, padding, padding],
         input_shape="(1, {}, {}, {})".format(ifm_dim, ifm_dim, ifm_ch),
         depthwise=1,
@@ -142,7 +141,7 @@ def set_up_reference_model(act, idt, wdt, k, ifm_dim, ifm_ch, stride, padding):
     W_matrix = W_matrix.reshape(ofm_ch, ifm_ch * k * k)
 
     model.set_initializer("W_sparse", W_matrix.T)
-    sparsity = {"dw": {"kernel_shape": k}}
+    sparsity = {"dw": {"kernel_shape": [k, k]}}
     model.set_tensor_sparsity("W_sparse", sparsity)
 
     if act is not None:
diff --git a/tests/fpgadataflow/test_fpgadataflow_addstreams.py b/tests/fpgadataflow/test_fpgadataflow_addstreams.py
index 0fa156e23b4a01270297e4e8e1fdc13a75eb5a59..021d58b4a382f2fe3d1a2c3c2a4ce8d7f3c87ae5 100644
--- a/tests/fpgadataflow/test_fpgadataflow_addstreams.py
+++ b/tests/fpgadataflow/test_fpgadataflow_addstreams.py
@@ -27,23 +27,23 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import pytest
-import numpy as np
 
+import numpy as np
 from onnx import TensorProto, helper
 
 import finn.core.onnx_exec as oxe
+from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.general import GiveUniqueNodeNames
-from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.util.basic import gen_finn_dt_tensor
-from finn.custom_op.registry import getCustomOp
-from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
 
 
 def make_addstreams_modelwrapper(ch, pe, idt):
@@ -62,7 +62,10 @@ def make_addstreams_modelwrapper(ch, pe, idt):
         inputDataType=idt.name,
     )
     graph = helper.make_graph(
-        nodes=[addstreams_node], name="graph", inputs=[inp1, inp2], outputs=[outp],
+        nodes=[addstreams_node],
+        name="graph",
+        inputs=[inp1, inp2],
+        outputs=[outp],
     )
 
     model = helper.make_model(graph, producer_name="addstreams-model")
diff --git a/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py b/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py
index e45dfe07c3abc0ce218dee0563055acb4458ccd0..15bcd5fa8a937aa313f2c73f253f934f6bbd332b 100644
--- a/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py
+++ b/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py
@@ -32,19 +32,19 @@ import numpy as np
 from onnx import TensorProto, helper
 
 import finn.core.onnx_exec as oxe
+from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
 from finn.analysis.fpgadataflow.hls_synth_res_estimation import hls_synth_res_estimation
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.general import GiveUniqueNodeNames
-from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.util.basic import gen_finn_dt_tensor
-from finn.custom_op.registry import getCustomOp
-from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
 
 
 def make_modelwrapper(C, pe, idt, odt, pdt, func, vecs):
diff --git a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py
index 4e0e8c7c35a8fc8a30e0ba4c27a7c0d637e24d1f..86622cf6d44dbda3af417283f5ceea1d1ebc3bf0 100644
--- a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py
+++ b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py
@@ -27,27 +27,28 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import pytest
-import numpy as np
 
+import numpy as np
 from onnx import TensorProto, helper
 
 import finn.core.onnx_exec as oxe
+from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
-from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.general import GiveUniqueNodeNames
 from finn.util.basic import gen_finn_dt_tensor
 
-from finn.custom_op.registry import getCustomOp
-from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
-
 
-def make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, simd, stride, idt):
+def make_single_im2col_modelwrapper(
+    k, ifm_ch, ifm_dim, ofm_dim, simd, stride, dilation, idt
+):
     odt = idt
     inp = helper.make_tensor_value_info(
         "inp", TensorProto.FLOAT, [1, ifm_dim, ifm_dim, ifm_ch]
@@ -61,12 +62,12 @@ def make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, simd, stride, i
         ["inp"],
         ["outp"],
         domain="finn.custom_op.general",
-        backend="fpgadataflow",
-        stride=stride,
+        stride=[stride, stride],
         kernel_size=[k, k],
         input_shape=str((1, ifm_dim, ifm_dim, ifm_ch)),
         pad_amount=[0, 0, 0, 0],
         pad_value=0,
+        dilations=[dilation, dilation],
     )
     graph = helper.make_graph(
         nodes=[im2col_node], name="im2col_graph", inputs=[inp], outputs=[outp]
@@ -82,7 +83,7 @@ def make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, simd, stride, i
 
 
 def make_single_slidingwindow_modelwrapper(
-    k, ifm_ch, ifm_dim, ofm_dim, simd, stride, idt, dw=0
+    k, ifm_ch, ifm_dim, ofm_dim, simd, stride, dilation, idt, dw=0
 ):
     odt = idt
     inp = helper.make_tensor_value_info(
@@ -98,12 +99,13 @@ def make_single_slidingwindow_modelwrapper(
         ["outp"],
         domain="finn.custom_op.fpgadataflow",
         backend="fpgadataflow",
-        ConvKernelDim=k,
+        ConvKernelDim=[k, k],
         IFMChannels=ifm_ch,
-        IFMDim=ifm_dim,
-        OFMDim=ofm_dim,
+        IFMDim=[ifm_dim, ifm_dim],
+        OFMDim=[ofm_dim, ofm_dim],
         SIMD=simd,
-        Stride=stride,
+        Stride=[stride, stride],
+        Dilation=[dilation, dilation],
         inputDataType=idt.name,
         outputDataType=odt.name,
         depthwise=dw,
@@ -138,6 +140,9 @@ def prepare_inputs(input_tensor):
 @pytest.mark.parametrize("ifm_ch", [2, 4])
 # Stride
 @pytest.mark.parametrize("stride", [1, 2])
+# Dilation
+# Currently only dilation value of 1 is supported
+@pytest.mark.parametrize("dilation", [1])
 # execution mode
 @pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"])
 # input channel parallelism ("SIMD")
@@ -147,13 +152,13 @@ def prepare_inputs(input_tensor):
 @pytest.mark.slow
 @pytest.mark.vivado
 def test_fpgadataflow_slidingwindow(
-    idt, k, ifm_dim, ifm_ch, stride, exec_mode, simd, dw
+    idt, k, ifm_dim, ifm_ch, stride, dilation, exec_mode, simd, dw
 ):
     ofm_dim = int(((ifm_dim - k) / stride) + 1)
 
     x = gen_finn_dt_tensor(idt, (1, ifm_dim, ifm_dim, ifm_ch))
     model = make_single_slidingwindow_modelwrapper(
-        k, ifm_ch, ifm_dim, ofm_dim, simd, stride, idt, dw
+        k, ifm_ch, ifm_dim, ofm_dim, simd, stride, dilation, idt, dw
     )
 
     if exec_mode == "cppsim":
@@ -174,9 +179,10 @@ def test_fpgadataflow_slidingwindow(
     # execute model
     y_produced = oxe.execute_onnx(model, input_dict)["outp"]
     golden = make_single_im2col_modelwrapper(
-        k, ifm_ch, ifm_dim, ofm_dim, simd, stride, idt
+        k, ifm_ch, ifm_dim, ofm_dim, simd, stride, dilation, idt
     )
     y_expected = oxe.execute_onnx(golden, input_dict)["outp"]
+
     if dw == 0:
         assert (y_produced == y_expected).all()
     else:
diff --git a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator1d.py b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator1d.py
new file mode 100644
index 0000000000000000000000000000000000000000..b3d695469b7a4fa1f4235feee29e7fc3dece0df5
--- /dev/null
+++ b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator1d.py
@@ -0,0 +1,255 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import pytest
+
+import numpy as np
+from onnx import TensorProto, helper
+
+import finn.core.onnx_exec as oxe
+from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
+from finn.core.datatype import DataType
+from finn.core.modelwrapper import ModelWrapper
+from finn.custom_op.general.im2col import compute_conv_output_dim
+from finn.custom_op.registry import getCustomOp
+from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
+from finn.transformation.general import GiveUniqueNodeNames
+from finn.util.basic import gen_finn_dt_tensor
+
+
+def make_single_im2col_modelwrapper(
+    k, ifm_ch, ifm_dim, ofm_dim, simd, stride, dilation, idt
+):
+    k_h, k_w = k
+    ifm_dim_h, ifm_dim_w = ifm_dim
+    stride_h, stride_w = stride
+    dilation_h, dilation_w = dilation
+    ofm_dim_h, ofm_dim_w = ofm_dim
+
+    odt = idt
+    inp = helper.make_tensor_value_info(
+        "inp", TensorProto.FLOAT, [1, ifm_dim_h, ifm_dim_w, ifm_ch]
+    )
+    outp = helper.make_tensor_value_info(
+        "outp", TensorProto.FLOAT, [1, ofm_dim_h, ofm_dim_w, k_h * k_w * ifm_ch]
+    )
+
+    im2col_node = helper.make_node(
+        "Im2Col",
+        ["inp"],
+        ["outp"],
+        domain="finn.custom_op.general",
+        stride=[stride_h, stride_w],
+        kernel_size=[k_h, k_w],
+        input_shape=str((1, ifm_dim_h, ifm_dim_w, ifm_ch)),
+        dilations=[dilation_h, dilation_w],
+        pad_amount=[0, 0, 0, 0],
+        pad_value=0,
+    )
+    graph = helper.make_graph(
+        nodes=[im2col_node], name="im2col_graph", inputs=[inp], outputs=[outp]
+    )
+
+    model = helper.make_model(graph, producer_name="im2col-model")
+    model = ModelWrapper(model)
+
+    model.set_tensor_datatype("inp", idt)
+    model.set_tensor_datatype("outp", odt)
+
+    return model
+
+
+def make_single_slidingwindow_modelwrapper(
+    k, ifm_ch, ifm_dim, ofm_dim, simd, stride, dilation, idt, dw=0
+):
+    k_h, k_w = k
+    ifm_dim_h, ifm_dim_w = ifm_dim
+    stride_h, stride_w = stride
+    dilation_h, dilation_w = dilation
+    ofm_dim_h, ofm_dim_w = ofm_dim
+
+    odt = idt
+    inp = helper.make_tensor_value_info(
+        "inp", TensorProto.FLOAT, [1, ifm_dim_h, ifm_dim_w, ifm_ch]
+    )
+    outp = helper.make_tensor_value_info(
+        "outp", TensorProto.FLOAT, [1, ofm_dim_h, ofm_dim_w, k_h * k_w * ifm_ch]
+    )
+
+    SlidingWindow_node = helper.make_node(
+        "ConvolutionInputGenerator1D",
+        ["inp"],
+        ["outp"],
+        domain="finn.custom_op.fpgadataflow",
+        backend="fpgadataflow",
+        ConvKernelDim=[k_h, k_w],
+        IFMChannels=ifm_ch,
+        IFMDim=[ifm_dim_h, ifm_dim_w],
+        OFMDim=[ofm_dim_h, ofm_dim_w],
+        SIMD=simd,
+        Stride=[stride_h, stride_w],
+        Dilation=[dilation_h, dilation_w],
+        inputDataType=idt.name,
+        outputDataType=odt.name,
+        depthwise=dw,
+    )
+    graph = helper.make_graph(
+        nodes=[SlidingWindow_node],
+        name="slidingwindow_graph",
+        inputs=[inp],
+        outputs=[outp],
+    )
+
+    model = helper.make_model(graph, producer_name="slidingwindow-model")
+    model = ModelWrapper(model)
+
+    model.set_tensor_datatype("inp", idt)
+    model.set_tensor_datatype("outp", odt)
+
+    return model
+
+
+def prepare_inputs(input_tensor):
+    return {"inp": input_tensor}
+
+
+# input datatype
+# @pytest.mark.parametrize("idt", [DataType.BIPOLAR, DataType.INT8])
+@pytest.mark.parametrize("idt", [DataType.INT8])
+# kernel size
+@pytest.mark.parametrize("k", [[4, 1]])
+# input dimension
+@pytest.mark.parametrize("ifm_dim", [[10, 1]])
+# input channels
+@pytest.mark.parametrize("ifm_ch", [1, 4])
+# Stride
+@pytest.mark.parametrize("stride", [[1, 1], [2, 1]])
+# Dilation
+# @pytest.mark.parametrize("dilation", [[1, 1], [2, 1]])
+@pytest.mark.parametrize("dilation", [[1, 1]])
+# execution mode
+@pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"])
+# input channel parallelism ("SIMD")
+@pytest.mark.parametrize("simd", [1, 4])
+# depthwise
+@pytest.mark.parametrize("dw", [0, 1])
+# Flip dimensions
+@pytest.mark.parametrize("flip", [False, True])
+@pytest.mark.slow
+@pytest.mark.vivado
+def test_fpgadataflow_slidingwindow_1d(
+    idt, k, ifm_dim, ifm_ch, stride, dilation, exec_mode, simd, dw, flip
+):
+    if flip:
+        k = k[::-1]
+        ifm_dim = ifm_dim[::-1]
+        stride = stride[::-1]
+        dilation = dilation[::-1]
+
+    k_h, k_w = k
+    ifm_dim_h, ifm_dim_w = ifm_dim
+    stride_h, stride_w = stride
+    dilation_h, dilation_w = dilation
+
+    if (dilation_h > 1 or dilation_w > 1) and (stride_h > 1 or stride_w > 1):
+        pytest.skip(
+            """Dilation value greater than 1 and stride greater than 1
+            currently not supported for 1D convolutions"""
+        )
+    if simd > ifm_ch:
+        pytest.skip("SIMD cannot be larger than number of input channels")
+
+    ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h)
+    ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w)
+    ofm_dim = [ofm_dim_h, ofm_dim_w]
+
+    x = gen_finn_dt_tensor(idt, (1, ifm_dim_h, ifm_dim_w, ifm_ch))
+    model = make_single_slidingwindow_modelwrapper(
+        k=k,
+        ifm_ch=ifm_ch,
+        ifm_dim=ifm_dim,
+        ofm_dim=ofm_dim,
+        simd=simd,
+        stride=stride,
+        dilation=dilation,
+        idt=idt,
+        dw=dw,
+    )
+
+    if exec_mode == "cppsim":
+        model = model.transform(SetExecMode("cppsim"))
+        model = model.transform(PrepareCppSim())
+        model = model.transform(CompileCppSim())
+    elif exec_mode == "rtlsim":
+        model = model.transform(SetExecMode("rtlsim"))
+        model = model.transform(GiveUniqueNodeNames())
+        model = model.transform(PrepareIP("xc7z020clg400-1", 5))
+        model = model.transform(HLSSynthIP())
+        model = model.transform(PrepareRTLSim())
+    else:
+        raise Exception("Unknown exec_mode in test_fpgadataflow_slidingwindow")
+
+    # prepare input data
+    input_dict = prepare_inputs(x)
+    # execute model
+    y_produced = oxe.execute_onnx(model, input_dict)["outp"]
+    golden = make_single_im2col_modelwrapper(
+        k=k,
+        ifm_ch=ifm_ch,
+        ifm_dim=ifm_dim,
+        ofm_dim=ofm_dim,
+        simd=simd,
+        stride=stride,
+        dilation=dilation,
+        idt=idt,
+    )
+    y_expected = oxe.execute_onnx(golden, input_dict)["outp"]
+
+    if dw == 0:
+        assert (y_produced == y_expected).all()
+    else:
+        y_expected = y_expected.reshape(
+            1, ofm_dim_h, ofm_dim_w, k_h * k_w, ifm_ch // simd, simd
+        )
+        y_expected = y_expected.transpose(0, 1, 2, 4, 3, 5)
+        y_expected = y_expected.reshape(1, ofm_dim_h, ofm_dim_w, ifm_ch * k_h * k_w)
+        assert (y_produced == y_expected).all()
+
+    if exec_mode == "rtlsim":
+        node = model.get_nodes_by_op_type("ConvolutionInputGenerator1D")[0]
+        inst = getCustomOp(node)
+        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
+        exp_cycles_dict = model.analysis(exp_cycles_per_layer)
+        exp_cycles = exp_cycles_dict[node.name]
+        assert np.isclose(exp_cycles, cycles_rtlsim, atol=10)
+        assert exp_cycles != 0
diff --git a/tests/fpgadataflow/test_fpgadataflow_duplicatestreams.py b/tests/fpgadataflow/test_fpgadataflow_duplicatestreams.py
index 12505fdf456aa55f881fb5f3d2d609080cc97074..6b776e8827d8e76102bd069ae8567051ed0580ba 100644
--- a/tests/fpgadataflow/test_fpgadataflow_duplicatestreams.py
+++ b/tests/fpgadataflow/test_fpgadataflow_duplicatestreams.py
@@ -27,25 +27,25 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import pytest
-import numpy as np
 
+import numpy as np
 from onnx import TensorProto, helper
 
 import finn.core.onnx_exec as oxe
+from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.general import GiveUniqueNodeNames
-from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
 from finn.util.basic import gen_finn_dt_tensor
-from finn.custom_op.registry import getCustomOp
-from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
 
 
 def make_dupstreams_modelwrapper(ch, pe, idim, idt):
diff --git a/tests/fpgadataflow/test_fpgadataflow_dwc.py b/tests/fpgadataflow/test_fpgadataflow_dwc.py
index 34930e672f3ff9816d3328da102b1bc1daa8a3b1..b0af4382383d8935c69e362b1a43db536979c784 100644
--- a/tests/fpgadataflow/test_fpgadataflow_dwc.py
+++ b/tests/fpgadataflow/test_fpgadataflow_dwc.py
@@ -30,15 +30,15 @@ import pytest
 
 from onnx import TensorProto, helper
 
+import finn.core.onnx_exec as oxe
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
-from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.general import GiveUniqueNodeNames
 from finn.util.basic import gen_finn_dt_tensor
-import finn.core.onnx_exec as oxe
 
 
 def make_single_dwc_modelwrapper(Shape, INWidth, OUTWidth, finn_dtype):
diff --git a/tests/fpgadataflow/test_fpgadataflow_fclayer.py b/tests/fpgadataflow/test_fpgadataflow_fclayer.py
index 00f1ba5d59288b1a463fadbd684ff872269d6970..49c326d2a34e7262826505ae32f2509b42ae0a35 100644
--- a/tests/fpgadataflow/test_fpgadataflow_fclayer.py
+++ b/tests/fpgadataflow/test_fpgadataflow_fclayer.py
@@ -31,22 +31,22 @@ import pytest
 import numpy as np
 from onnx import TensorProto, helper
 
-from finn.custom_op.registry import getCustomOp
 import finn.core.onnx_exec as oxe
 import finn.custom_op.general.xnorpopcount as xp
+from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
 from finn.analysis.fpgadataflow.hls_synth_res_estimation import hls_synth_res_estimation
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
 from finn.custom_op.general.multithreshold import multithreshold
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.general import GiveUniqueNodeNames
-from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.util.basic import calculate_signed_dot_prod_range, gen_finn_dt_tensor
-from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
 
 
 def make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T=None, tdt=None):
diff --git a/tests/fpgadataflow/test_fpgadataflow_fifo.py b/tests/fpgadataflow/test_fpgadataflow_fifo.py
index a603fc0664b78c00354514fbdff62c94aa7b7ef3..81f66c42ca76d42fe8ee50576d72007f6ca6c12f 100644
--- a/tests/fpgadataflow/test_fpgadataflow_fifo.py
+++ b/tests/fpgadataflow/test_fpgadataflow_fifo.py
@@ -27,19 +27,19 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import pytest
-import os
 
+import os
 from onnx import TensorProto, helper
+
+import finn.core.onnx_exec as oxe
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
-from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.general import GiveUniqueNodeNames
 from finn.util.basic import gen_finn_dt_tensor
-import finn.core.onnx_exec as oxe
-
 
 build_dir = os.environ["FINN_BUILD_DIR"]
 test_fpga_part = "xc7z020clg400-1"
diff --git a/tests/fpgadataflow/test_fpgadataflow_fmpadding.py b/tests/fpgadataflow/test_fpgadataflow_fmpadding.py
index b2835d578b03ee689330d53a9a7b233c9b9f4222..5db12ee22828e43e276ed85f04f985653fe0a2dd 100644
--- a/tests/fpgadataflow/test_fpgadataflow_fmpadding.py
+++ b/tests/fpgadataflow/test_fpgadataflow_fmpadding.py
@@ -27,26 +27,25 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import pytest
-import os
-import numpy as np
 
+import numpy as np
+import os
 from onnx import TensorProto, helper
+
+import finn.core.onnx_exec as oxe
+from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
-from finn.util.basic import gen_finn_dt_tensor
-import finn.core.onnx_exec as oxe
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-from finn.transformation.general import GiveUniqueNodeNames
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
-from finn.custom_op.registry import getCustomOp
-from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
-
-from finn.util.basic import pynq_part_map
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
+from finn.transformation.general import GiveUniqueNodeNames
+from finn.transformation.infer_shapes import InferShapes
+from finn.util.basic import gen_finn_dt_tensor, pynq_part_map
 
 test_pynq_board = os.getenv("PYNQ_BOARD", default="Pynq-Z1")
 test_fpga_part = pynq_part_map[test_pynq_board]
@@ -54,15 +53,20 @@ target_clk_ns = 10
 
 
 def make_single_fmpadding_modelwrapper(idim, padding, num_ch, simd, idt, pad_style):
+    pad_h = padding[0] + padding[2]
+    pad_w = padding[1] + padding[3]
+    idim_h, idim_w = idim
+
     assert pad_style == 2, "only pad_style == 2 supported in hlslib"
-    assert padding > 0, "Output dim should be greater than input dim"
-    odim = idim + padding
+    assert pad_h > 0 or pad_w > 0, "Output dim should be greater than input dim"
+    odim_h = idim_h + pad_h
+    odim_w = idim_w + pad_w
 
     inp = helper.make_tensor_value_info(
-        "inp", TensorProto.FLOAT, [1, idim, idim, num_ch]
+        "inp", TensorProto.FLOAT, [1, idim_h, idim_w, num_ch]
     )
     outp = helper.make_tensor_value_info(
-        "outp", TensorProto.FLOAT, [1, odim, odim, num_ch]
+        "outp", TensorProto.FLOAT, [1, odim_h, odim_w, num_ch]
     )
 
     FMPadding = helper.make_node(
@@ -94,9 +98,9 @@ def make_single_fmpadding_modelwrapper(idim, padding, num_ch, simd, idt, pad_sty
 
 
 # input image dimension
-@pytest.mark.parametrize("idim", [8])
+@pytest.mark.parametrize("idim", [[8, 8], [10, 8]])
 # number of rows and number of cols to add
-@pytest.mark.parametrize("pad", [2, 3])
+@pytest.mark.parametrize("pad", [[1, 1, 1, 1], [1, 1, 2, 2], [1, 3, 2, 3]])
 # number of channels
 @pytest.mark.parametrize("num_ch", [2, 4])
 # Input parallelism
@@ -112,10 +116,22 @@ def make_single_fmpadding_modelwrapper(idim, padding, num_ch, simd, idt, pad_sty
 def test_fpgadataflow_fmpadding(idim, pad, num_ch, simd, pad_style, idt, mode):
     if num_ch % simd != 0:
         pytest.skip(" num_ch % simd != 0, skipping")
+
+    idim_h, idim_w = idim
+    pad_h = pad[0] + pad[2]
+    pad_w = pad[1] + pad[3]
+
+    if idim_h == idim_w and pad_h != pad_w:
+        pytest.skip(
+            """Only equal padding along the dimensions for square images
+            is supported, skipping"""
+        )
+
     # generate input data
-    x = gen_finn_dt_tensor(idt, [1, idim, idim, num_ch])
+    x = gen_finn_dt_tensor(idt, [1, idim_h, idim_w, num_ch])
     input_dict = {"inp": x}
-    odim = idim + pad
+    odim_h = idim_h + pad_h
+    odim_w = idim_w + pad_w
 
     model = make_single_fmpadding_modelwrapper(idim, pad, num_ch, simd, idt, pad_style)
     model = model.transform(InferShapes())
@@ -129,24 +145,26 @@ def test_fpgadataflow_fmpadding(idim, pad, num_ch, simd, pad_style, idt, mode):
         model = model.transform(HLSSynthIP())
         model = model.transform(PrepareRTLSim())
     y_produced = oxe.execute_onnx(model, input_dict)["outp"]
-    expected_oshape = (1, odim, odim, num_ch)
+    expected_oshape = (1, odim_h, odim_w, num_ch)
     assert y_produced.shape == expected_oshape
 
     # calculate reference
     # calculate correct pad according to parameters
     if pad_style == 2:
-        if pad % 2 == 0:
-            pad_up = pad // 2
-            pad_left = pad // 2
+        if pad_h % 2 == 0:
+            pad_up = pad_h // 2
+        else:
+            pad_up = pad_h // 2 + 1
+        if pad_w % 2 == 0:
+            pad_left = pad_w // 2
         else:
-            pad_up = pad // 2 + 1
-            pad_left = pad // 2 + 1
+            pad_left = pad_w // 2 + 1
     else:
-        pad_up = pad // 2
-        pad_left = pad // 2
+        pad_up = pad_h // 2
+        pad_left = pad_w // 2
 
-    pad_down = pad - pad_up
-    pad_right = pad - pad_left
+    pad_down = pad_h - pad_up
+    pad_right = pad_w - pad_left
 
     y_expected = np.pad(
         x, ((0, 0), (pad_up, pad_down), (pad_left, pad_right), (0, 0)), "constant"
diff --git a/tests/fpgadataflow/test_fpgadataflow_globalaccpool.py b/tests/fpgadataflow/test_fpgadataflow_globalaccpool.py
index 7fca91925a63a5da4294adb002a3cc97831a88ca..f1373123a69f4c3d02b191c0f0560b59d2c9a7b2 100644
--- a/tests/fpgadataflow/test_fpgadataflow_globalaccpool.py
+++ b/tests/fpgadataflow/test_fpgadataflow_globalaccpool.py
@@ -27,23 +27,23 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import pytest
-import numpy as np
 
+import numpy as np
 from onnx import TensorProto, helper
 
 import finn.core.onnx_exec as oxe
+from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.general import GiveUniqueNodeNames
-from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.util.basic import gen_finn_dt_tensor
-from finn.custom_op.registry import getCustomOp
-from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
 
 
 def make_accpool_modelwrapper(ch, pe, idim, idt):
diff --git a/tests/fpgadataflow/test_fpgadataflow_ipstitch.py b/tests/fpgadataflow/test_fpgadataflow_ipstitch.py
index 4fa780548a544d92e02b28486ae1e325ff1f9a9b..9a6050a55dd86ca5064b293f87304cbb1365edea 100644
--- a/tests/fpgadataflow/test_fpgadataflow_ipstitch.py
+++ b/tests/fpgadataflow/test_fpgadataflow_ipstitch.py
@@ -26,41 +26,39 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
-
 import pytest
 
 import numpy as np
+import os
 from onnx import TensorProto, helper
 
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
 from finn.core.onnx_exec import execute_onnx
 from finn.custom_op.registry import getCustomOp
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
 from finn.transformation.fpgadataflow.create_dataflow_partition import (
     CreateDataflowPartition,
 )
+from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
+from finn.transformation.fpgadataflow.floorplan import Floorplan
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.insert_iodma import InsertIODMA
 from finn.transformation.fpgadataflow.insert_tlastmarker import InsertTLastMarker
 from finn.transformation.fpgadataflow.make_deployment import DeployToPYNQ
+from finn.transformation.fpgadataflow.make_zynq_proj import ZynqBuild
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.synth_ooc import SynthOutOfContext
+from finn.transformation.fpgadataflow.vitis_build import VitisBuild
 from finn.transformation.general import GiveUniqueNodeNames
+from finn.transformation.infer_data_layouts import InferDataLayouts
 from finn.util.basic import (
+    alveo_default_platform,
+    alveo_part_map,
     gen_finn_dt_tensor,
     pynq_part_map,
-    alveo_part_map,
-    alveo_default_platform,
 )
 from finn.util.pyverilator import pyverilate_stitched_ip
 from finn.util.test import load_test_checkpoint_or_skip
-from finn.transformation.fpgadataflow.synth_ooc import SynthOutOfContext
-from finn.transformation.infer_data_layouts import InferDataLayouts
-from finn.transformation.fpgadataflow.insert_iodma import InsertIODMA
-from finn.transformation.fpgadataflow.floorplan import Floorplan
-from finn.transformation.fpgadataflow.vitis_build import VitisBuild
-from finn.transformation.fpgadataflow.make_zynq_proj import ZynqBuild
-
 
 test_pynq_board = os.getenv("PYNQ_BOARD", default="Pynq-Z1")
 test_fpga_part = pynq_part_map[test_pynq_board]
diff --git a/tests/fpgadataflow/test_fpgadataflow_labelselect.py b/tests/fpgadataflow/test_fpgadataflow_labelselect.py
index 5d496dbb33d21c9092fb2076cac75b3ccbbaa1e9..8997208a648fa79439a882de23865496ba527858 100644
--- a/tests/fpgadataflow/test_fpgadataflow_labelselect.py
+++ b/tests/fpgadataflow/test_fpgadataflow_labelselect.py
@@ -27,20 +27,20 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import pytest
-import numpy as np
 
+import numpy as np
 from onnx import TensorProto, helper
 
 import finn.core.onnx_exec as oxe
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.general import GiveUniqueNodeNames
-from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.util.basic import gen_finn_dt_tensor
 from finn.util.test import soft_verify_topk
 
@@ -61,7 +61,10 @@ def make_labelselect_modelwrapper(labels, pe, k, idt):
         inputDataType=idt.name,
     )
     graph = helper.make_graph(
-        nodes=[labelselect_node], name="graph", inputs=[inp], outputs=[outp],
+        nodes=[labelselect_node],
+        name="graph",
+        inputs=[inp],
+        outputs=[outp],
     )
 
     model = helper.make_model(graph, producer_name="thresholding-model")
diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding.py b/tests/fpgadataflow/test_fpgadataflow_thresholding.py
index bbc7e8227d80fb9d064f484dafe91ecdcdc47144..b87241de56870cad70d08583b24292e0da91109e 100644
--- a/tests/fpgadataflow/test_fpgadataflow_thresholding.py
+++ b/tests/fpgadataflow/test_fpgadataflow_thresholding.py
@@ -29,27 +29,27 @@
 import pytest
 
 import numpy as np
+import os
 from onnx import TensorProto, helper
 
 import finn.core.onnx_exec as oxe
+from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
 from finn.analysis.fpgadataflow.hls_synth_res_estimation import hls_synth_res_estimation
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
+from finn.core.rtlsim_exec import rtlsim_exec
 from finn.custom_op.general.multithreshold import multithreshold
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
+from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.general import GiveUniqueNodeNames
-from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.util.basic import gen_finn_dt_tensor
-from finn.custom_op.registry import getCustomOp
-from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
-import os
 from finn.util.pyverilator import axilite_read, axilite_write
-from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
-from finn.core.rtlsim_exec import rtlsim_exec
 
 test_fpga_part = "xc7z020clg400-1"
 target_clk_ns = 5
diff --git a/tests/fpgadataflow/test_fpgadataflow_vvau.py b/tests/fpgadataflow/test_fpgadataflow_vvau.py
new file mode 100644
index 0000000000000000000000000000000000000000..36b844deab4e28ff35290a170f713a64be839e8a
--- /dev/null
+++ b/tests/fpgadataflow/test_fpgadataflow_vvau.py
@@ -0,0 +1,241 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import pytest
+
+import numpy as np
+from onnx import TensorProto, helper
+
+import finn.core.onnx_exec as oxe
+from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
+from finn.core.datatype import DataType
+from finn.core.modelwrapper import ModelWrapper
+from finn.custom_op.general.multithreshold import multithreshold
+from finn.custom_op.registry import getCustomOp
+from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
+from finn.transformation.general import GiveUniqueNodeNames
+from finn.util.basic import gen_finn_dt_tensor
+
+
+def _infer_sparse_weight_tensor(W_conv, k_h, k_w, channels):
+    W_sparse = np.zeros((channels, channels, k_h, k_w))
+    for ch in range(channels):
+        W_sparse[ch][ch] = W_conv[ch][0]
+    W_conv = W_sparse.astype(np.float32)
+    W_matmul = W_conv.transpose(0, 2, 3, 1)
+    W_matmul = W_matmul.reshape(channels, channels * k_h * k_w)
+    W_matmul = W_matmul.T
+
+    return W_matmul
+
+
+def _calculate_dot_prod_range(dt_a, dt_b, len):
+    """Returns the (min,max) values a dot product between two (un)signed vectors of
+    types dt_a and dt_b of len elements can take."""
+    min_prod = 2 ** 30
+    max_prod = -(2 ** 30)
+    for a_val in [dt_a.min(), dt_a.max()]:
+        for b_val in [dt_b.min(), dt_b.max()]:
+            prod = a_val * b_val * len
+            if prod < min_prod:
+                min_prod = prod
+            if prod > max_prod:
+                max_prod = prod
+    return (min_prod, max_prod)
+
+
+def _make_single_vvau_modelwrapper(
+    W, pe, k_h, k_w, channels, dim_h, dim_w, wdt, idt, odt, T=None, tdt=None
+):
+    in_shape = [1, dim_h, dim_w, k_h * k_w * channels]  # [N, H, W, K*K*CH]
+    out_shape = [
+        1,
+        dim_h,
+        dim_w,
+        channels,
+    ]  # [N, H, W, OFM_CH] (OFM_CH=IFM_CH because depthwise convolution)
+
+    inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, in_shape)
+    outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, out_shape)
+
+    if T is not None:
+        no_act = 0
+        node_inp_list = ["inp", "weights", "thresh"]
+        actval = odt.min()
+    else:
+        no_act = 1
+        node_inp_list = ["inp", "weights"]
+        actval = 0
+
+    VVAU_node = helper.make_node(
+        "Vector_Vector_Activate_Batch",
+        node_inp_list,
+        ["outp"],
+        domain="finn.custom_op.fpgadataflow",
+        backend="fpgadataflow",
+        PE=pe,
+        Dim=[dim_h, dim_w],
+        Channels=channels,
+        Kernel=[k_h, k_w],
+        resType="lut",
+        ActVal=actval,
+        inputDataType=idt.name,
+        weightDataType=wdt.name,
+        outputDataType=odt.name,
+        noActivation=no_act,
+    )
+
+    graph = helper.make_graph(
+        nodes=[VVAU_node], name="vvau_graph", inputs=[inp], outputs=[outp]
+    )
+
+    model = helper.make_model(graph, producer_name="vvau-model")
+    model = ModelWrapper(model)
+
+    model.set_tensor_datatype("inp", idt)
+    model.set_tensor_datatype("outp", odt)
+    model.set_tensor_datatype("weights", wdt)
+
+    model.set_initializer("weights", W)
+    model.set_tensor_shape("weights", (channels, 1, k_h, k_w))
+
+    if T is not None:
+        model.set_tensor_datatype("thresh", tdt)
+        model.set_initializer("thresh", T)
+
+    return model
+
+
+def prepare_inputs(input_tensor):
+    return {"inp": input_tensor}
+
+
+# mem_mode: const or decoupled
+@pytest.mark.parametrize("idt", [DataType.UINT4, DataType.UINT8])
+# weight datatype
+@pytest.mark.parametrize("wdt", [DataType.INT4])
+# activation: None or DataType
+@pytest.mark.parametrize("act", [DataType.UINT4, None])
+# PE
+@pytest.mark.parametrize("pe", [1, "channels"])
+# Input image shape
+@pytest.mark.parametrize("dim_h", [10])
+@pytest.mark.parametrize("dim_w", [10, 1])
+# Kernel shape
+@pytest.mark.parametrize("k_h", [3])
+@pytest.mark.parametrize("k_w", [3, 1])
+# Number of input and output channels
+@pytest.mark.parametrize("channels", [3, 4])
+# execution mode
+@pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"])
+@pytest.mark.slow
+@pytest.mark.vivado
+def test_fpgadataflow_vvau(
+    idt, wdt, act, pe, dim_h, dim_w, k_h, k_w, channels, exec_mode
+):
+    if pe == "channels":
+        pe = channels
+
+    if dim_w == 1 and k_w != 1:
+        pytest.skip("1D image requires 1D kernel, skipping.")
+
+    if channels % pe != 0:
+        pytest.skip("Requirement Channels divisable by PE is violated.")
+
+    # Generate weights in expected shape for ONNX and HLS node
+    W = gen_finn_dt_tensor(wdt, (channels, 1, k_h, k_w))  # shape: [channels, 1, k, k]
+    W_onnx = _infer_sparse_weight_tensor(
+        W, k_h, k_w, channels
+    )  # shape: [k*k*channels, channels]
+
+    # Generate inputs in expected format for ONNX and HLS node
+    x = gen_finn_dt_tensor(idt, (1, dim_h, dim_w, k_h * k_w * channels))
+    x_vvau = x.reshape(1, dim_h, dim_w, k_h * k_w, channels // pe, pe)
+    x_vvau = x_vvau.transpose(0, 1, 2, 4, 3, 5)
+    x_vvau = x_vvau.reshape(1, dim_h, dim_w, channels * k_h * k_w)
+
+    if act is None:
+        T = None
+        tdt = None
+        odt = DataType.INT32
+    else:
+        odt = act
+        (min_v, max_v) = _calculate_dot_prod_range(idt, wdt, k_h * k_w * channels)
+        n_steps = act.get_num_possible_values() - 1
+        T = np.random.randint(min_v, max_v - 1, (channels, n_steps)).astype(np.float32)
+        T = np.sort(T, axis=1)
+        tdt = DataType.INT32
+
+    model = _make_single_vvau_modelwrapper(
+        W, pe, k_h, k_w, channels, dim_h, dim_w, wdt, idt, odt, T, tdt
+    )
+
+    if exec_mode == "cppsim":
+        model = model.transform(SetExecMode("cppsim"))
+        model = model.transform(PrepareCppSim())
+        model = model.transform(CompileCppSim())
+    elif exec_mode == "rtlsim":
+        model = model.transform(SetExecMode("rtlsim"))
+        model = model.transform(GiveUniqueNodeNames())
+        model = model.transform(PrepareIP("xc7z020clg400-1", 5))
+        model = model.transform(HLSSynthIP())
+        model = model.transform(PrepareRTLSim())
+    else:
+        raise Exception("Unknown exec_mode in test_fpgadataflow_vvau")
+
+    input_dict = prepare_inputs(x_vvau)
+
+    # Calculate output
+    y_expected = np.matmul(x, W_onnx)  # Y is in [N, H, W, C] format
+    if T is not None:
+        # Reshape Y, as multithreshold expects Y to be in [N, C, H, W] format
+        y_expected = np.transpose(y_expected, (0, 3, 1, 2))
+        y_expected = multithreshold(y_expected, T)
+        y_expected = np.transpose(y_expected, (0, 2, 3, 1))
+        # signed offset
+        y_expected += act.min()
+
+    y_produced = oxe.execute_onnx(model, input_dict, return_full_exec_context=False)[
+        "outp"
+    ]
+
+    assert (y_produced == y_expected).all(), "cppsim failed"
+
+    if exec_mode == "rtlsim":
+        node = model.get_nodes_by_op_type("Vector_Vector_Activate_Batch")[0]
+        inst = getCustomOp(node)
+        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
+        exp_cycles_dict = model.analysis(exp_cycles_per_layer)
+        exp_cycles = exp_cycles_dict[node.name]
+        assert np.isclose(exp_cycles, cycles_rtlsim, atol=10)
+        assert exp_cycles != 0
diff --git a/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py b/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py
index ff88536f477e80e5c92a2c352f0af81488997c7f..11ca79471d4eb2642a141ecdda9b4c55714ec76c 100644
--- a/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py
+++ b/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py
@@ -28,22 +28,22 @@
 
 import pytest
 
+import numpy as np
 from onnx import TensorProto, helper
 
 import finn.core.onnx_exec as oxe
+from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
-from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.general import GiveUniqueNodeNames
 from finn.util.basic import gen_finn_dt_tensor
-from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
-from finn.custom_op.registry import getCustomOp
-import numpy as np
 
 
 def make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt):
diff --git a/tests/fpgadataflow/test_runtime_weights.py b/tests/fpgadataflow/test_runtime_weights.py
index c487824964400cacbde575da2c10757985ad6e32..73b1315592af79145e1b7c6f147b3ede7e066bce 100644
--- a/tests/fpgadataflow/test_runtime_weights.py
+++ b/tests/fpgadataflow/test_runtime_weights.py
@@ -26,20 +26,22 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from finn.util.create import hls_random_mlp_maker
+import pytest
+
+import numpy as np
+import os
+
 from finn.core.datatype import DataType
-from finn.transformation.general import GiveUniqueNodeNames
-from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
-from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.core.rtlsim_exec import rtlsim_exec
+from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
-from finn.custom_op.registry import getCustomOp
-from finn.core.rtlsim_exec import rtlsim_exec
+from finn.transformation.general import GiveUniqueNodeNames
 from finn.util.basic import gen_finn_dt_tensor
-from finn.util.pyverilator import axilite_write, axilite_read
-import numpy as np
-import pytest
-import os
+from finn.util.create import hls_random_mlp_maker
+from finn.util.pyverilator import axilite_read, axilite_write
 
 test_fpga_part = "xc7z020clg400-1"
 target_clk_ns = 5
diff --git a/tests/fpgadataflow/test_set_folding.py b/tests/fpgadataflow/test_set_folding.py
index fe3a1db8a476e33bfc0d76996917fab9ae6ed98b..f268611c296687987fffe32293b0454109bc7db4 100644
--- a/tests/fpgadataflow/test_set_folding.py
+++ b/tests/fpgadataflow/test_set_folding.py
@@ -27,18 +27,19 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import pytest
+
 import numpy as np
 from onnx import TensorProto, helper
 
-from finn.custom_op.registry import getCustomOp
-from finn.core.datatype import DataType
 from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer
+from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.fpgadataflow.set_folding import SetFolding
-from finn.transformation.general import GiveUniqueNodeNames
+from finn.custom_op.registry import getCustomOp
 from finn.transformation.fpgadataflow.create_dataflow_partition import (
     CreateDataflowPartition,
 )
+from finn.transformation.fpgadataflow.set_folding import SetFolding
+from finn.transformation.general import GiveUniqueNodeNames
 from finn.util.test import load_test_checkpoint_or_skip
 
 
diff --git a/tests/transformation/streamline/test_absorb_mul_into_topk.py b/tests/transformation/streamline/test_absorb_mul_into_topk.py
index d0a089f9e5f894a5da635672eb58af1d8ddef3ef..bc9a31d49c7edfc20ca3e932efd00df939f1135f 100644
--- a/tests/transformation/streamline/test_absorb_mul_into_topk.py
+++ b/tests/transformation/streamline/test_absorb_mul_into_topk.py
@@ -30,13 +30,14 @@ import pytest
 import numpy as np
 from onnx import TensorProto, helper
 
+import finn.core.onnx_exec as oxe
 from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
 from finn.transformation.infer_datatypes import InferDataTypes
-from finn.transformation.general import GiveUniqueNodeNames, GiveReadableTensorNames
+from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.insert_topk import InsertTopK
 from finn.transformation.streamline.absorb import AbsorbScalarMulAddIntoTopK
-import finn.core.onnx_exec as oxe
+
 
 # parameter to indicate if mul parameter is negative or positive
 @pytest.mark.parametrize("mul_positive", [True, False])
diff --git a/tests/transformation/streamline/test_absorb_transp_into_flatten.py b/tests/transformation/streamline/test_absorb_transp_into_flatten.py
index cbbb33b4606acf55ace662da0986105f8c456b39..1e5d5fe5806d2e3f418438b260d2257f5ae31adf 100644
--- a/tests/transformation/streamline/test_absorb_transp_into_flatten.py
+++ b/tests/transformation/streamline/test_absorb_transp_into_flatten.py
@@ -3,14 +3,15 @@ import pytest
 import numpy as np
 from onnx import TensorProto, helper
 
-from finn.core.modelwrapper import ModelWrapper
 import finn.core.data_layout as DataLayout
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.infer_datatypes import InferDataTypes
+import finn.core.onnx_exec as oxe
+from finn.core.modelwrapper import ModelWrapper
+from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
 from finn.transformation.infer_data_layouts import InferDataLayouts
-from finn.transformation.general import GiveUniqueNodeNames, GiveReadableTensorNames
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.streamline.absorb import AbsorbTransposeIntoFlatten
-import finn.core.onnx_exec as oxe
+
 
 # permutation of transpose node
 @pytest.mark.parametrize("perm", [[0, 2, 3, 1], [0, 1, 3, 2], [3, 2, 0, 1]])
diff --git a/tests/transformation/streamline/test_collapse_repeated_op.py b/tests/transformation/streamline/test_collapse_repeated_op.py
index b74d868f9b921c35ff9f596c811583f45f761374..1741ab6b8f4fc1c3e806a8868f329cd7753eac4d 100644
--- a/tests/transformation/streamline/test_collapse_repeated_op.py
+++ b/tests/transformation/streamline/test_collapse_repeated_op.py
@@ -26,6 +26,8 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import pytest
+
 import numpy as np
 import onnx.helper as oh
 from onnx import TensorProto
@@ -34,7 +36,6 @@ import finn.core.onnx_exec as ox
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.streamline import CollapseRepeatedAdd, CollapseRepeatedMul
-import pytest
 
 
 def test_collapse_repeated_op():
@@ -74,7 +75,8 @@ def test_collapse_repeated_op():
 
 
 @pytest.mark.parametrize(
-    "test_args", [("Add", CollapseRepeatedAdd()), ("Mul", CollapseRepeatedMul())],
+    "test_args",
+    [("Add", CollapseRepeatedAdd()), ("Mul", CollapseRepeatedMul())],
 )
 def test_collapse_repeated_only_if_linear(test_args):
     scalar_op = test_args[0]
diff --git a/tests/transformation/streamline/test_linear_past_eltwise.py b/tests/transformation/streamline/test_linear_past_eltwise.py
index f5af2307fb042879a837a26c50715c8ec1b96963..098b3f9d4f67a2cbc1a87fbb67a313d00e229777 100644
--- a/tests/transformation/streamline/test_linear_past_eltwise.py
+++ b/tests/transformation/streamline/test_linear_past_eltwise.py
@@ -26,19 +26,18 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
-import numpy as np
+import pytest
 
+import numpy as np
+import os
 from onnx import TensorProto, helper
 
 import finn.core.onnx_exec as oxe
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.fold_constants import FoldConstants
 from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
-from finn.transformation.streamline.reorder import MoveLinearPastEltwiseAdd
 from finn.transformation.infer_shapes import InferShapes
-
-import pytest
+from finn.transformation.streamline.reorder import MoveLinearPastEltwiseAdd
 
 export_onnx_path = "test_linear_past_eltwise.onnx"
 
diff --git a/tests/transformation/streamline/test_move_chw_add_past_conv.py b/tests/transformation/streamline/test_move_chw_add_past_conv.py
index fc64a04e40036eae7057c15f4e628155bd563e51..e4be8fc3836f18bf95eb193516937c2e9334e2ff 100644
--- a/tests/transformation/streamline/test_move_chw_add_past_conv.py
+++ b/tests/transformation/streamline/test_move_chw_add_past_conv.py
@@ -29,13 +29,13 @@
 import pytest
 
 import numpy as np
-from onnx import helper, TensorProto
+from onnx import TensorProto, helper
 
+import finn.core.onnx_exec as oxe
 from finn.core.modelwrapper import ModelWrapper
+from finn.custom_op.general.im2col import compute_conv_output_dim
 from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.streamline.reorder import MoveAddPastConv
-from finn.custom_op.general.im2col import compute_conv_output_dim
-import finn.core.onnx_exec as oxe
 
 
 # input dimension
diff --git a/tests/transformation/streamline/test_move_flatten_past_affine.py b/tests/transformation/streamline/test_move_flatten_past_affine.py
index b2d5e51613d41f3f2db3dabcef7b982ec2816b19..1971ecfaa181d6ee799a9191b63d2482629b1e1c 100644
--- a/tests/transformation/streamline/test_move_flatten_past_affine.py
+++ b/tests/transformation/streamline/test_move_flatten_past_affine.py
@@ -30,16 +30,17 @@ import pytest
 import numpy as np
 from onnx import TensorProto, helper
 
-from finn.core.modelwrapper import ModelWrapper
-from finn.core.datatype import DataType
 import finn.core.data_layout as DataLayout
-from finn.util.basic import gen_finn_dt_tensor
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.infer_datatypes import InferDataTypes
+import finn.core.onnx_exec as oxe
+from finn.core.datatype import DataType
+from finn.core.modelwrapper import ModelWrapper
+from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
 from finn.transformation.infer_data_layouts import InferDataLayouts
-from finn.transformation.general import GiveUniqueNodeNames, GiveReadableTensorNames
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.streamline.reorder import MoveFlattenPastAffine
-import finn.core.onnx_exec as oxe
+from finn.util.basic import gen_finn_dt_tensor
+
 
 # data layout
 @pytest.mark.parametrize("data_layout", [DataLayout.NHWC, DataLayout.NCHW])
diff --git a/tests/transformation/streamline/test_move_flatten_past_topk.py b/tests/transformation/streamline/test_move_flatten_past_topk.py
index 65da92c22dbe9f6b1c5a49172ffae59fa6e98607..5e0211ad8857653ce75af2f5a7de0c6439770108 100644
--- a/tests/transformation/streamline/test_move_flatten_past_topk.py
+++ b/tests/transformation/streamline/test_move_flatten_past_topk.py
@@ -29,17 +29,18 @@ import pytest
 
 from onnx import TensorProto, helper
 
-from finn.core.modelwrapper import ModelWrapper
-from finn.core.datatype import DataType
 import finn.core.data_layout as DataLayout
-from finn.util.basic import gen_finn_dt_tensor
-from finn.transformation.insert_topk import InsertTopK
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.infer_datatypes import InferDataTypes
+import finn.core.onnx_exec as oxe
+from finn.core.datatype import DataType
+from finn.core.modelwrapper import ModelWrapper
+from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
 from finn.transformation.infer_data_layouts import InferDataLayouts
-from finn.transformation.general import GiveUniqueNodeNames, GiveReadableTensorNames
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.insert_topk import InsertTopK
 from finn.transformation.streamline.reorder import MoveFlattenPastTopK
-import finn.core.onnx_exec as oxe
+from finn.util.basic import gen_finn_dt_tensor
+
 
 # data layout
 @pytest.mark.parametrize("data_layout", [DataLayout.NHWC, DataLayout.NCHW])
@@ -59,7 +60,10 @@ def test_move_flatten_past_affine(data_layout, batch_size):
     flatten_node = helper.make_node("Flatten", ["inp"], ["outp"])
 
     graph = helper.make_graph(
-        nodes=[flatten_node], name="move-flatten-graph", inputs=[inp], outputs=[outp],
+        nodes=[flatten_node],
+        name="move-flatten-graph",
+        inputs=[inp],
+        outputs=[outp],
     )
 
     model = helper.make_model(graph, producer_name="move_flatten_model")
diff --git a/tests/transformation/streamline/test_move_identical_op_past_join_op.py b/tests/transformation/streamline/test_move_identical_op_past_join_op.py
index 94eb52835b1800a839e5a9792e9cf1d7be1e681d..60e76b8b07e06048ecf1a15c72134fecf5c97346 100644
--- a/tests/transformation/streamline/test_move_identical_op_past_join_op.py
+++ b/tests/transformation/streamline/test_move_identical_op_past_join_op.py
@@ -1,12 +1,12 @@
 import pytest
 
-from onnx import helper as oh
 from onnx import TensorProto
+from onnx import helper as oh
 
+import finn.core.onnx_exec as oxe
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.streamline.reorder import MoveTransposePastJoinAdd
 from finn.util.basic import gen_finn_dt_tensor
-import finn.core.onnx_exec as oxe
 
 
 def create_model(perm):
diff --git a/tests/transformation/streamline/test_move_maxpool_past_multithreshold.py b/tests/transformation/streamline/test_move_maxpool_past_multithreshold.py
index 7c49baf8cd9d5b85b3b76f3513d42483d3bbeb0c..fca05afa5b155e6a293857c14c10c4a9b80eeaf4 100644
--- a/tests/transformation/streamline/test_move_maxpool_past_multithreshold.py
+++ b/tests/transformation/streamline/test_move_maxpool_past_multithreshold.py
@@ -1,11 +1,11 @@
-from onnx import TensorProto, helper
 import numpy as np
+from onnx import TensorProto, helper
 
 import finn.core.onnx_exec as oxe
 from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.streamline.reorder import MoveMaxPoolPastMultiThreshold
-from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.streamline.reorder import MoveMaxPoolPastMultiThreshold
 
 
 def get_multithreshold_rand_params(channels, num_of_thres, seed=None):
diff --git a/tests/transformation/streamline/test_move_mul_past_dw_conv.py b/tests/transformation/streamline/test_move_mul_past_dw_conv.py
index ce0cbcd0405f8a09efabbadd5555de1bd6b89e43..cb9beed713eb448b49015a7de601a4d15edc035b 100644
--- a/tests/transformation/streamline/test_move_mul_past_dw_conv.py
+++ b/tests/transformation/streamline/test_move_mul_past_dw_conv.py
@@ -1,14 +1,15 @@
 import pytest
 
-from onnx import helper, TensorProto
-from finn.custom_op.general.im2col import compute_conv_output_dim
+from onnx import TensorProto, helper
+
 import finn.core.onnx_exec as oxe
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
+from finn.custom_op.general.im2col import compute_conv_output_dim
 from finn.transformation.infer_datatypes import InferDataTypes
 from finn.transformation.infer_shapes import InferShapes
-from finn.util.basic import gen_finn_dt_tensor
 from finn.transformation.streamline.reorder import MoveMulPastDWConv
+from finn.util.basic import gen_finn_dt_tensor
 
 
 # input dimension
diff --git a/tests/transformation/streamline/test_move_mul_past_maxpool.py b/tests/transformation/streamline/test_move_mul_past_maxpool.py
index f612841020e373a3c6458ee3e9a6eb14fcea7eb5..81f18842ed8ba2b5230f3a853076244d0a0ab8d9 100755
--- a/tests/transformation/streamline/test_move_mul_past_maxpool.py
+++ b/tests/transformation/streamline/test_move_mul_past_maxpool.py
@@ -1,15 +1,16 @@
-import numpy as np
 import pytest
 
-from onnx import helper, TensorProto
-from finn.custom_op.general.maxpoolnhwc import compute_pool_output_dim
+import numpy as np
+from onnx import TensorProto, helper
+
 import finn.core.onnx_exec as oxe
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
+from finn.custom_op.general.maxpoolnhwc import compute_pool_output_dim
 from finn.transformation.infer_datatypes import InferDataTypes
 from finn.transformation.infer_shapes import InferShapes
-from finn.util.basic import gen_finn_dt_tensor
 from finn.transformation.streamline.reorder import MoveMulPastMaxPool
+from finn.util.basic import gen_finn_dt_tensor
 
 
 # input dimension
diff --git a/tests/transformation/streamline/test_move_past_fork.py b/tests/transformation/streamline/test_move_past_fork.py
index f3d37bd60c9e2580ca4499daafa8693f39fec810..364590f933ac27539fd546d64e25325032c885c9 100644
--- a/tests/transformation/streamline/test_move_past_fork.py
+++ b/tests/transformation/streamline/test_move_past_fork.py
@@ -1,12 +1,12 @@
-from onnx import TensorProto, helper
+import pytest
+
 import numpy as np
+from onnx import TensorProto, helper
 
 import finn.core.onnx_exec as oxe
 from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.streamline.reorder import MoveLinearPastFork
 from finn.transformation.infer_shapes import InferShapes
-
-import pytest
+from finn.transformation.streamline.reorder import MoveLinearPastFork
 
 
 @pytest.mark.parametrize("ch", [64, 1])
diff --git a/tests/transformation/streamline/test_move_scalar_past_conv.py b/tests/transformation/streamline/test_move_scalar_past_conv.py
index 94fee7907d1ed1cccbf95520e903c7d9b43d8f7d..5e2ded0174e9aa7a02551ed6b658f97ff070a523 100644
--- a/tests/transformation/streamline/test_move_scalar_past_conv.py
+++ b/tests/transformation/streamline/test_move_scalar_past_conv.py
@@ -1,20 +1,19 @@
+import pytest
+
 import numpy as np
 import onnx.helper as oh
-import pytest
 from onnx import TensorProto
 
 import finn.core.onnx_exec as ox
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.streamline import (
-    MoveAddPastConv,
-    MoveScalarMulPastConv,
-)
+from finn.transformation.streamline import MoveAddPastConv, MoveScalarMulPastConv
 
 
 @pytest.mark.parametrize("padding", [False, True])
 @pytest.mark.parametrize(
-    "test_args", [("Add", MoveAddPastConv()), ("Mul", MoveScalarMulPastConv())],
+    "test_args",
+    [("Add", MoveAddPastConv()), ("Mul", MoveScalarMulPastConv())],
 )
 def test_move_scalar_past_conv(test_args, padding):
     scalar_op = test_args[0]
@@ -92,7 +91,8 @@ def test_move_scalar_past_conv(test_args, padding):
 
 
 @pytest.mark.parametrize(
-    "test_args", [("Add", MoveAddPastConv()), ("Mul", MoveScalarMulPastConv())],
+    "test_args",
+    [("Add", MoveAddPastConv()), ("Mul", MoveScalarMulPastConv())],
 )
 def test_move_scalar_past_conv_only_if_linear(test_args):
     scalar_op = test_args[0]
diff --git a/tests/transformation/streamline/test_move_scalar_past_matmul.py b/tests/transformation/streamline/test_move_scalar_past_matmul.py
index e432dbf4ec1a38551609e5914e2d44968a020908..b15f84303b0dc2e00bd51397543871cfeb99c1f9 100644
--- a/tests/transformation/streamline/test_move_scalar_past_matmul.py
+++ b/tests/transformation/streamline/test_move_scalar_past_matmul.py
@@ -26,8 +26,9 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import numpy as np
 import pytest
+
+import numpy as np
 import onnx.helper as oh
 from onnx import TensorProto
 
diff --git a/tests/transformation/streamline/test_move_transpose_past_scalar_mul.py b/tests/transformation/streamline/test_move_transpose_past_scalar_mul.py
index e434fc7d4f683120176e18a2bfa9da99d9ee0b0e..9110ede98da81a627127767276db33362503ef84 100644
--- a/tests/transformation/streamline/test_move_transpose_past_scalar_mul.py
+++ b/tests/transformation/streamline/test_move_transpose_past_scalar_mul.py
@@ -3,14 +3,15 @@ import pytest
 import numpy as np
 from onnx import TensorProto, helper
 
-from finn.core.modelwrapper import ModelWrapper
 import finn.core.data_layout as DataLayout
-from finn.transformation.infer_shapes import InferShapes
-from finn.transformation.infer_datatypes import InferDataTypes
+import finn.core.onnx_exec as oxe
+from finn.core.modelwrapper import ModelWrapper
+from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
 from finn.transformation.infer_data_layouts import InferDataLayouts
-from finn.transformation.general import GiveUniqueNodeNames, GiveReadableTensorNames
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.streamline.reorder import MoveTransposePastScalarMul
-import finn.core.onnx_exec as oxe
+
 
 # permutation of transpose node
 @pytest.mark.parametrize("perm", [[0, 2, 3, 1], [0, 1, 3, 2], [3, 2, 0, 1]])
diff --git a/tests/transformation/streamline/test_remove_identity_ops.py b/tests/transformation/streamline/test_remove_identity_ops.py
index 536c1ab0b48fa44388da23f45b528da3c5f3b2f2..ad7c20fb51902f22c20896bdfb3321dc74d0572d 100644
--- a/tests/transformation/streamline/test_remove_identity_ops.py
+++ b/tests/transformation/streamline/test_remove_identity_ops.py
@@ -1,7 +1,8 @@
 import pytest
 
 import numpy as np
-from onnx import helper, TensorProto
+from onnx import TensorProto, helper
+
 import finn.core.onnx_exec as oxe
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
@@ -11,18 +12,29 @@ from finn.transformation.streamline.remove import RemoveIdentityOps
 from finn.util.basic import gen_finn_dt_tensor
 
 
-def insert_identity_op(model, op):
+def insert_identity_op(model, op, as_first_node, approx):
+    if approx:
+        zero_val = 0.000001
+        one_val = 0.999999
+    else:
+        zero_val = 0.0
+        one_val = 1.0
     if op in ["Add", "Sub"]:
-        val = np.asarray([0.0], dtype=np.float32)
+        val = np.asarray([zero_val], dtype=np.float32)
     elif op in ["Mul", "Div"]:
-        val = np.asarray([1.0], dtype=np.float32)
+        val = np.asarray([one_val], dtype=np.float32)
     else:
         return
 
-    identity_node = helper.make_node(op, ["div_out", "value"], ["ident_out"])
     graph = model.graph
-    graph.node.insert(3, identity_node)
-    graph.node[-1].input[0] = "ident_out"
+    if as_first_node:
+        identity_node = helper.make_node(op, ["inp", "value"], ["ident_out"])
+        graph.node.insert(0, identity_node)
+        graph.node[1].input[0] = "ident_out"
+    else:
+        identity_node = helper.make_node(op, ["div_out", "value"], ["ident_out"])
+        graph.node.insert(3, identity_node)
+        graph.node[-1].input[0] = "ident_out"
     model.set_initializer("value", val)
 
     return model
@@ -30,7 +42,9 @@ def insert_identity_op(model, op):
 
 # identity operations to be inserted
 @pytest.mark.parametrize("op", ["Add", "Sub", "Mul", "Div"])
-def test_remove_identity_ops(op):
+@pytest.mark.parametrize("approx", [False, True])
+@pytest.mark.parametrize("as_first_node", [False, True])
+def test_remove_identity_ops(op, as_first_node, approx):
 
     # set up onnx model
     inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, 4, 1, 1])
@@ -64,7 +78,7 @@ def test_remove_identity_ops(op):
     model.set_initializer("shape", shape_values)
     model.set_initializer("div", div_values)
     model.set_initializer("matmul", matmul_values)
-    insert_identity_op(model, op)
+    insert_identity_op(model, op, as_first_node, approx)
     model = model.transform(InferShapes())
     model = model.transform(InferDataTypes())
     idict = {"inp": inp_values}
@@ -78,4 +92,4 @@ def test_remove_identity_ops(op):
 
     odict = oxe.execute_onnx(model, idict)
     out_after = odict["outp"]
-    assert (out_before == out_after).all()
+    assert np.isclose(out_before, out_after, atol=1e-3).all()
diff --git a/tests/transformation/streamline/test_sign_to_thres.py b/tests/transformation/streamline/test_sign_to_thres.py
index 4618dffc43f5cee848b580b77cf418c612b48f3e..2ffb5713c0363b115dee5c41484fb5826faf803a 100644
--- a/tests/transformation/streamline/test_sign_to_thres.py
+++ b/tests/transformation/streamline/test_sign_to_thres.py
@@ -26,12 +26,11 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
-from pkgutil import get_data
-
 import brevitas.onnx as bo
 import onnx
 import onnx.numpy_helper as nph
+import os
+from pkgutil import get_data
 
 import finn.core.onnx_exec as oxe
 from finn.core.modelwrapper import ModelWrapper
diff --git a/tests/transformation/streamline/test_streamline_cnv.py b/tests/transformation/streamline/test_streamline_cnv.py
index ca8cf3b1ceba6943828f47bcbcf974aa5b368c4e..ed2595330323bfc8a576af36ae3fea27522ec66c 100644
--- a/tests/transformation/streamline/test_streamline_cnv.py
+++ b/tests/transformation/streamline/test_streamline_cnv.py
@@ -26,27 +26,30 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import pkg_resources as pk
+
+import pytest
+
 import brevitas.onnx as bo
 import numpy as np
-import pytest
-import pkg_resources as pk
 
 import finn.core.onnx_exec as oxe
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.fold_constants import FoldConstants
 from finn.transformation.general import (
-    RemoveUnusedTensors,
-    RemoveStaticGraphInputs,
     GiveReadableTensorNames,
     GiveUniqueNodeNames,
+    RemoveStaticGraphInputs,
+    RemoveUnusedTensors,
 )
 from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.streamline import Streamline
-from finn.util.test import get_test_model_trained
 from finn.util.basic import make_build_dir
+from finn.util.test import get_test_model_trained
 
 export_onnx_path = make_build_dir("test_streamline_cnv_")
 
+
 # act bits
 @pytest.mark.parametrize("abits", [1, 2])
 # weight bits
diff --git a/tests/transformation/streamline/test_streamline_fc.py b/tests/transformation/streamline/test_streamline_fc.py
index d88bf14913d2551cd7347c5617895998a7d56799..3563b87c45a7ffe99fe6e9bdfd9f54a39e89cb68 100644
--- a/tests/transformation/streamline/test_streamline_fc.py
+++ b/tests/transformation/streamline/test_streamline_fc.py
@@ -26,30 +26,31 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from pkgutil import get_data
+import pytest
 
 import brevitas.onnx as bo
 import numpy as np
 import onnx
 import onnx.numpy_helper as nph
-import pytest
+from pkgutil import get_data
 
 import finn.core.onnx_exec as oxe
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.fold_constants import FoldConstants
 from finn.transformation.general import (
-    RemoveUnusedTensors,
-    RemoveStaticGraphInputs,
     GiveReadableTensorNames,
     GiveUniqueNodeNames,
+    RemoveStaticGraphInputs,
+    RemoveUnusedTensors,
 )
 from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.streamline import Streamline
-from finn.util.test import get_test_model_trained
 from finn.util.basic import make_build_dir
+from finn.util.test import get_test_model_trained
 
 export_onnx_path = make_build_dir("test_streamline_fc_")
 
+
 # act bits
 @pytest.mark.parametrize("abits", [1, 2])
 # weight bits
diff --git a/tests/transformation/test_batchnorm_to_affine_bnn_pynq.py b/tests/transformation/test_batchnorm_to_affine_bnn_pynq.py
index 7e894c078b15c16f29dec60d694f8b6892e84a8a..300ef85faacf664b89c7b949ea2e462f110eef85 100644
--- a/tests/transformation/test_batchnorm_to_affine_bnn_pynq.py
+++ b/tests/transformation/test_batchnorm_to_affine_bnn_pynq.py
@@ -26,14 +26,14 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
-from pkgutil import get_data
 import pkg_resources as pk
 
 import brevitas.onnx as bo
+import numpy as np
 import onnx
 import onnx.numpy_helper as nph
-import numpy as np
+import os
+from pkgutil import get_data
 
 import finn.core.onnx_exec as oxe
 from finn.core.modelwrapper import ModelWrapper
diff --git a/tests/transformation/test_infer_data_layouts_cnv.py b/tests/transformation/test_infer_data_layouts_cnv.py
index a8ba81dff608994b8e5efb33ec23bd0e3f894175..10bc687d13d4a85ce64955cb38c1c0dfdc6d53da 100644
--- a/tests/transformation/test_infer_data_layouts_cnv.py
+++ b/tests/transformation/test_infer_data_layouts_cnv.py
@@ -26,22 +26,22 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import brevitas.onnx as bo
 import os
 
-import brevitas.onnx as bo
+import finn.core.data_layout as DataLayout
+import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
 import finn.transformation.streamline.absorb as absorb
-from finn.transformation.streamline.reorder import MakeMaxPoolNHWC
 from finn.core.modelwrapper import ModelWrapper
+from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
 from finn.transformation.fold_constants import FoldConstants
 from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
+from finn.transformation.infer_data_layouts import InferDataLayouts
 from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
 from finn.transformation.streamline import Streamline
+from finn.transformation.streamline.reorder import MakeMaxPoolNHWC
 from finn.util.test import get_test_model_trained
-from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
-from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
-import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
-from finn.transformation.infer_data_layouts import InferDataLayouts
-import finn.core.data_layout as DataLayout
 
 export_onnx_path_cnv = "test_infer_data_layouts.onnx"
 
diff --git a/tests/transformation/test_infer_datatypes_lfc.py b/tests/transformation/test_infer_datatypes_lfc.py
index 0802c50c7d15a649182529a4e6897b9bbe273336..00715e3e3ca3626e1b76bf3b23bae4dc1d65b053 100644
--- a/tests/transformation/test_infer_datatypes_lfc.py
+++ b/tests/transformation/test_infer_datatypes_lfc.py
@@ -26,9 +26,8 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
-
 import brevitas.onnx as bo
+import os
 
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
diff --git a/tests/util/test_build_dataflow.py b/tests/util/test_build_dataflow.py
index c8e886ddb047e932e5f03ce7d460d538c95a25f2..770553201eb86f448dcd9e22afd8e827c338c7f9 100644
--- a/tests/util/test_build_dataflow.py
+++ b/tests/util/test_build_dataflow.py
@@ -26,12 +26,15 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import pytest
 import pkg_resources as pk
+
+import pytest
+
+import os
 from shutil import copytree
-from finn.util.basic import make_build_dir
+
 from finn.builder.build_dataflow import build_dataflow_directory
-import os
+from finn.util.basic import make_build_dir
 
 
 @pytest.mark.slow
diff --git a/tests/util/test_data_packing_hls.py b/tests/util/test_data_packing_hls.py
index a926bc4068831a552ccfb728511ddda4a8670ca8..3221eda34c85ed9d65b258b6489699cda8400517 100644
--- a/tests/util/test_data_packing_hls.py
+++ b/tests/util/test_data_packing_hls.py
@@ -26,13 +26,12 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
-import shutil
-import subprocess
-
 import pytest
 
 import numpy as np
+import os
+import shutil
+import subprocess
 
 import finn.util.basic as cutil
 from finn.core.datatype import DataType