diff --git a/docker/Dockerfile.finn_dev b/docker/Dockerfile.finn_dev
index 94421a780e84f2b5acd2b11b585f69b8ace803a0..a2abaac17c3bbdfb9440644fc371ffb4461621f6 100644
--- a/docker/Dockerfile.finn_dev
+++ b/docker/Dockerfile.finn_dev
@@ -55,6 +55,9 @@ RUN pip install sphinx_rtd_theme==0.5.0
 RUN pip install pytest-xdist==2.0.0
 RUN pip install pytest-parallel==0.1.0
 RUN pip install netron>=4.7.9
+RUN pip install pandas==1.1.5
+RUN pip install scikit-learn==0.24.1
+RUN pip install tqdm==4.31.1
 RUN pip install -e git+https://github.com/fbcotter/dataset_loading.git@0.0.4#egg=dataset_loading
 
 # switch user
diff --git a/docker/finn_entrypoint.sh b/docker/finn_entrypoint.sh
index 27b4897d0364bbf23d13d5ef63fe03161d37148a..55ee33706bb84810ef0eead87c414d442d131f7a 100644
--- a/docker/finn_entrypoint.sh
+++ b/docker/finn_entrypoint.sh
@@ -12,7 +12,7 @@ gecho () {
 
 # checkout the correct dependency repo commits
 # the repos themselves are cloned in the Dockerfile
-FINN_BASE_COMMIT=ff218afa2e7bdba66aa8bafb3393e0c83017f3b8
+FINN_BASE_COMMIT=94beb27de0decb58d31555823860a24da5f09c5a
 BREVITAS_COMMIT=aff49758ec445d77c75721c7de3091a2a1797ca8
 CNPY_COMMIT=4e8810b1a8637695171ed346ce68f6984e585ef4
 HLSLIB_COMMIT=1b0bb309256d51d10a8bcdb380caf09de679c8f7
diff --git a/docs/finn/getting_started.rst b/docs/finn/getting_started.rst
index 3b475303d7dde91e8b6a21856eb4d66417f164d7..bff31cde45122ebc25f515422ffc523f4f78e3be 100644
--- a/docs/finn/getting_started.rst
+++ b/docs/finn/getting_started.rst
@@ -120,13 +120,15 @@ These are summarized below:
 
 * ``VIVADO_PATH`` points to your Vivado installation on the host
 * (optional, for Vitis & Alveo only) ``VITIS_PATH``, ``PLATFORM_REPO_PATHS`` and ``XILINX_XRT`` respectively point to your Vitis installation, the Vitis platform files, and Xilinx XRT
-* ``JUPYTER_PORT`` (default 8888) changes the port for Jupyter inside Docker
-* ``NETRON_PORT`` (default 8081) changes the port for Netron inside Docker
-* ``NUM_DEFAULT_WORKERS`` (default 1) specifies the degree of parallelization for the transformations that can be run in parallel
-* ``PYNQ_BOARD`` or ``ALVEO_BOARD`` specifies the type of PYNQ/Alveo board used (see "supported hardware" below) for the test suite
-* ``PYNQ_IP`` and ``PYNQ_PORT`` (or ``ALVEO_IP`` and ``ALVEO_PORT``) specify ip address and port number to access the PYNQ board / Alveo target
-* ``PYNQ_USERNAME`` and ``PYNQ_PASSWORD`` (or ``ALVEO_USERNAME`` and ``ALVEO_PASSWORD``) specify the PYNQ board / Alveo host access credentials for the test suite. For PYNQ, password is always needed to run as sudo. For Alveo, you can leave the password empty and place your ssh private key in the ``finn/ssh_keys`` folder to use keypair authentication.
-* ``PYNQ_TARGET_DIR`` (or ``ALVEO_TARGET_DIR``) specifies the target dir on the PYNQ board / Alveo host for the test suite
+* (optional) ``JUPYTER_PORT`` (default 8888) changes the port for Jupyter inside Docker
+* (optional) ``JUPYTER_PASSWD_HASH`` (default "") Set the Jupyter notebook password hash. If set to empty string, token authentication will be used (token printed in terminal on launch).
+* (optional) ``LOCALHOST_URL`` (default localhost) sets the base URL for accessing e.g. Netron from inside the container. Useful when running FINN remotely.
+* (optional) ``NETRON_PORT`` (default 8081) changes the port for Netron inside Docker
+* (optional) ``NUM_DEFAULT_WORKERS`` (default 1) specifies the degree of parallelization for the transformations that can be run in parallel
+* (optional) ``PYNQ_BOARD`` or ``ALVEO_BOARD`` specifies the type of PYNQ/Alveo board used (see "supported hardware" below) for the test suite
+* (optional) ``PYNQ_IP`` and ``PYNQ_PORT`` (or ``ALVEO_IP`` and ``ALVEO_PORT``) specify ip address and port number to access the PYNQ board / Alveo target
+* (optional) ``PYNQ_USERNAME`` and ``PYNQ_PASSWORD`` (or ``ALVEO_USERNAME`` and ``ALVEO_PASSWORD``) specify the PYNQ board / Alveo host access credentials for the test suite. For PYNQ, password is always needed to run as sudo. For Alveo, you can leave the password empty and place your ssh private key in the ``finn/ssh_keys`` folder to use keypair authentication.
+* (optional) ``PYNQ_TARGET_DIR`` (or ``ALVEO_TARGET_DIR``) specifies the target dir on the PYNQ board / Alveo host for the test suite
 * (optional) ``FINN_HOST_BUILD_DIR`` specifies which directory on the host will be used as the build directory. Defaults to ``/tmp/finn_dev_<username>``
 * (optional) ``IMAGENET_VAL_PATH`` specifies the path to the ImageNet validation directory for tests.
 
diff --git a/notebooks/basics/1_brevitas_network_import.ipynb b/notebooks/basics/1_brevitas_network_import.ipynb
index ad2b3db8ffcf3ae99e2a3ca13a2c002685e2df92..8ba7d00a171f68577b28c5897f0106ea4207a6ef 100644
--- a/notebooks/basics/1_brevitas_network_import.ipynb
+++ b/notebooks/basics/1_brevitas_network_import.ipynb
@@ -31,7 +31,7 @@
    "source": [
     "## 1. Load up the trained PyTorch model\n",
     "\n",
-    "The FINN Docker image comes with several [example Brevitas networks](https://github.com/Xilinx/brevitas/tree/master/brevitas_examples/bnn_pynq), and we'll use the LFC-w1a1 model as the example network here. This is a binarized fully connected network trained on the MNIST dataset. Let's start by looking at what the PyTorch network definition looks like:"
+    "The FINN Docker image comes with several [example Brevitas networks](https://github.com/Xilinx/brevitas/tree/master/src/brevitas_examples/bnn_pynq), and we'll use the LFC-w1a1 model as the example network here. This is a binarized fully connected network trained on the MNIST dataset. Let's start by looking at what the PyTorch network definition looks like:"
    ]
   },
   {
@@ -65,51 +65,55 @@
       "# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n",
       "# SOFTWARE.\n",
       "\n",
+      "import ast\n",
       "from functools import reduce\n",
       "from operator import mul\n",
       "\n",
       "from torch.nn import Module, ModuleList, BatchNorm1d, Dropout\n",
       "import torch\n",
       "\n",
-      "from .common import get_quant_linear, get_act_quant, get_quant_type, QuantLinear\n",
+      "from brevitas.nn import QuantIdentity, QuantLinear\n",
+      "from .common import CommonWeightQuant, CommonActQuant\n",
+      "from .tensor_norm import TensorNorm\n",
       "\n",
-      "FC_OUT_FEATURES = [1024, 1024, 1024]\n",
-      "INTERMEDIATE_FC_PER_OUT_CH_SCALING = True\n",
-      "LAST_FC_PER_OUT_CH_SCALING = False\n",
-      "IN_DROPOUT = 0.2\n",
-      "HIDDEN_DROPOUT = 0.2\n",
+      "DROPOUT = 0.2\n",
       "\n",
       "\n",
-      "class LFC(Module):\n",
+      "class FC(Module):\n",
       "\n",
-      "    def __init__(self, num_classes=10, weight_bit_width=None, act_bit_width=None,\n",
-      "                 in_bit_width=None, in_ch=1, in_features=(28, 28)):\n",
-      "        super(LFC, self).__init__()\n",
-      "\n",
-      "        weight_quant_type = get_quant_type(weight_bit_width)\n",
-      "        act_quant_type = get_quant_type(act_bit_width)\n",
-      "        in_quant_type = get_quant_type(in_bit_width)\n",
+      "    def __init__(\n",
+      "            self,\n",
+      "            num_classes,\n",
+      "            weight_bit_width,\n",
+      "            act_bit_width,\n",
+      "            in_bit_width,\n",
+      "            in_channels,\n",
+      "            out_features,\n",
+      "            in_features=(28, 28)):\n",
+      "        super(FC, self).__init__()\n",
       "\n",
       "        self.features = ModuleList()\n",
-      "        self.features.append(get_act_quant(in_bit_width, in_quant_type))\n",
-      "        self.features.append(Dropout(p=IN_DROPOUT))\n",
+      "        self.features.append(QuantIdentity(act_quant=CommonActQuant, bit_width=in_bit_width))\n",
+      "        self.features.append(Dropout(p=DROPOUT))\n",
       "        in_features = reduce(mul, in_features)\n",
-      "        for out_features in FC_OUT_FEATURES:\n",
-      "            self.features.append(get_quant_linear(in_features=in_features,\n",
-      "                                                  out_features=out_features,\n",
-      "                                                  per_out_ch_scaling=INTERMEDIATE_FC_PER_OUT_CH_SCALING,\n",
-      "                                                  bit_width=weight_bit_width,\n",
-      "                                                  quant_type=weight_quant_type))\n",
+      "        for out_features in out_features:\n",
+      "            self.features.append(QuantLinear(\n",
+      "                in_features=in_features,\n",
+      "                out_features=out_features,\n",
+      "                bias=False,\n",
+      "                weight_bit_width=weight_bit_width,\n",
+      "                weight_quant=CommonWeightQuant))\n",
       "            in_features = out_features\n",
       "            self.features.append(BatchNorm1d(num_features=in_features))\n",
-      "            self.features.append(get_act_quant(act_bit_width, act_quant_type))\n",
-      "            self.features.append(Dropout(p=HIDDEN_DROPOUT))\n",
-      "        self.features.append(get_quant_linear(in_features=in_features,\n",
-      "                                   out_features=num_classes,\n",
-      "                                   per_out_ch_scaling=LAST_FC_PER_OUT_CH_SCALING,\n",
-      "                                   bit_width=weight_bit_width,\n",
-      "                                   quant_type=weight_quant_type))\n",
-      "        self.features.append(BatchNorm1d(num_features=num_classes))\n",
+      "            self.features.append(QuantIdentity(act_quant=CommonActQuant, bit_width=act_bit_width))\n",
+      "            self.features.append(Dropout(p=DROPOUT))\n",
+      "        self.features.append(QuantLinear(\n",
+      "                in_features=in_features,\n",
+      "                out_features=num_classes,\n",
+      "                bias=False,\n",
+      "                weight_bit_width=weight_bit_width,\n",
+      "                weight_quant=CommonWeightQuant))\n",
+      "        self.features.append(TensorNorm())\n",
       "\n",
       "        for m in self.modules():\n",
       "          if isinstance(m, QuantLinear):\n",
@@ -128,17 +132,20 @@
       "        return x\n",
       "\n",
       "\n",
-      "def lfc(cfg):\n",
+      "def fc(cfg):\n",
       "    weight_bit_width = cfg.getint('QUANT', 'WEIGHT_BIT_WIDTH')\n",
       "    act_bit_width = cfg.getint('QUANT', 'ACT_BIT_WIDTH')\n",
       "    in_bit_width = cfg.getint('QUANT', 'IN_BIT_WIDTH')\n",
       "    num_classes = cfg.getint('MODEL', 'NUM_CLASSES')\n",
       "    in_channels = cfg.getint('MODEL', 'IN_CHANNELS')\n",
-      "    net = LFC(weight_bit_width=weight_bit_width,\n",
-      "              act_bit_width=act_bit_width,\n",
-      "              in_bit_width=in_bit_width,\n",
-      "              num_classes=num_classes,\n",
-      "              in_ch=in_channels)\n",
+      "    out_features = ast.literal_eval(cfg.get('MODEL', 'OUT_FEATURES'))\n",
+      "    net = FC(\n",
+      "        weight_bit_width=weight_bit_width,\n",
+      "        act_bit_width=act_bit_width,\n",
+      "        in_bit_width=in_bit_width,\n",
+      "        in_channels=in_channels,\n",
+      "        out_features=out_features,\n",
+      "        num_classes=num_classes)\n",
       "    return net\n",
       "\n"
      ]
@@ -146,7 +153,7 @@
    ],
    "source": [
     "from brevitas_examples import bnn_pynq\n",
-    "showSrc(bnn_pynq.models.LFC)"
+    "showSrc(bnn_pynq.models.FC)"
    ]
   },
   {
@@ -164,20 +171,29 @@
     {
      "data": {
       "text/plain": [
-       "LFC(\n",
+       "FC(\n",
        "  (features): ModuleList(\n",
-       "    (0): QuantHardTanh(\n",
-       "      (act_quant_proxy): ActivationQuantProxy(\n",
+       "    (0): QuantIdentity(\n",
+       "      (input_quant): IdentityQuantProxyFromInjector(\n",
+       "        (_zero_hw_sentinel): StatelessBuffer()\n",
+       "      )\n",
+       "      (act_quant): ActQuantProxyFromInjector(\n",
+       "        (_zero_hw_sentinel): StatelessBuffer()\n",
        "        (fused_activation_quant_proxy): FusedActivationQuantProxy(\n",
        "          (activation_impl): Identity()\n",
        "          (tensor_quant): ClampedBinaryQuant(\n",
-       "            (scaling_impl): StandaloneScaling(\n",
-       "              (restrict_value): RestrictValue(\n",
-       "                (forward_impl): Sequential(\n",
-       "                  (0): PowerOfTwo()\n",
-       "                  (1): ClampMin()\n",
-       "                )\n",
+       "            (scaling_impl): ConstScaling(\n",
+       "              (restrict_clamp_scaling): _RestrictClampValue(\n",
+       "                (restrict_value_impl): FloatRestrictValue()\n",
+       "                (clamp_min_ste): Identity()\n",
        "              )\n",
+       "              (value): StatelessBuffer()\n",
+       "            )\n",
+       "            (bit_width): BitWidthConst(\n",
+       "              (bit_width): StatelessBuffer()\n",
+       "            )\n",
+       "            (delay_wrapper): DelayWrapper(\n",
+       "              (delay_impl): _NoDelay()\n",
        "            )\n",
        "          )\n",
        "        )\n",
@@ -186,34 +202,56 @@
        "    (1): Dropout(p=0.2)\n",
        "    (2): QuantLinear(\n",
        "      in_features=784, out_features=1024, bias=False\n",
-       "      (weight_reg): WeightReg()\n",
-       "      (weight_quant): WeightQuantProxy(\n",
+       "      (input_quant): IdentityQuantProxyFromInjector(\n",
+       "        (_zero_hw_sentinel): StatelessBuffer()\n",
+       "      )\n",
+       "      (output_quant): IdentityQuantProxyFromInjector(\n",
+       "        (_zero_hw_sentinel): StatelessBuffer()\n",
+       "      )\n",
+       "      (weight_quant): WeightQuantProxyFromInjector(\n",
+       "        (_zero_hw_sentinel): StatelessBuffer()\n",
        "        (tensor_quant): BinaryQuant(\n",
-       "          (scaling_impl): StandaloneScaling(\n",
-       "            (restrict_value): RestrictValue(\n",
-       "              (forward_impl): Sequential(\n",
-       "                (0): PowerOfTwo()\n",
-       "                (1): Identity()\n",
-       "              )\n",
+       "          (scaling_impl): ConstScaling(\n",
+       "            (restrict_clamp_scaling): _RestrictClampValue(\n",
+       "              (restrict_value_impl): FloatRestrictValue()\n",
+       "              (clamp_min_ste): Identity()\n",
        "            )\n",
+       "            (value): StatelessBuffer()\n",
+       "          )\n",
+       "          (bit_width): BitWidthConst(\n",
+       "            (bit_width): StatelessBuffer()\n",
+       "          )\n",
+       "          (delay_wrapper): DelayWrapper(\n",
+       "            (delay_impl): _NoDelay()\n",
        "          )\n",
        "        )\n",
        "      )\n",
-       "      (bias_quant): BiasQuantProxy()\n",
+       "      (bias_quant): BiasQuantProxyFromInjector(\n",
+       "        (_zero_hw_sentinel): StatelessBuffer()\n",
+       "      )\n",
        "    )\n",
        "    (3): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-       "    (4): QuantHardTanh(\n",
-       "      (act_quant_proxy): ActivationQuantProxy(\n",
+       "    (4): QuantIdentity(\n",
+       "      (input_quant): IdentityQuantProxyFromInjector(\n",
+       "        (_zero_hw_sentinel): StatelessBuffer()\n",
+       "      )\n",
+       "      (act_quant): ActQuantProxyFromInjector(\n",
+       "        (_zero_hw_sentinel): StatelessBuffer()\n",
        "        (fused_activation_quant_proxy): FusedActivationQuantProxy(\n",
        "          (activation_impl): Identity()\n",
        "          (tensor_quant): ClampedBinaryQuant(\n",
-       "            (scaling_impl): StandaloneScaling(\n",
-       "              (restrict_value): RestrictValue(\n",
-       "                (forward_impl): Sequential(\n",
-       "                  (0): PowerOfTwo()\n",
-       "                  (1): ClampMin()\n",
-       "                )\n",
+       "            (scaling_impl): ConstScaling(\n",
+       "              (restrict_clamp_scaling): _RestrictClampValue(\n",
+       "                (restrict_value_impl): FloatRestrictValue()\n",
+       "                (clamp_min_ste): Identity()\n",
        "              )\n",
+       "              (value): StatelessBuffer()\n",
+       "            )\n",
+       "            (bit_width): BitWidthConst(\n",
+       "              (bit_width): StatelessBuffer()\n",
+       "            )\n",
+       "            (delay_wrapper): DelayWrapper(\n",
+       "              (delay_impl): _NoDelay()\n",
        "            )\n",
        "          )\n",
        "        )\n",
@@ -222,34 +260,56 @@
        "    (5): Dropout(p=0.2)\n",
        "    (6): QuantLinear(\n",
        "      in_features=1024, out_features=1024, bias=False\n",
-       "      (weight_reg): WeightReg()\n",
-       "      (weight_quant): WeightQuantProxy(\n",
+       "      (input_quant): IdentityQuantProxyFromInjector(\n",
+       "        (_zero_hw_sentinel): StatelessBuffer()\n",
+       "      )\n",
+       "      (output_quant): IdentityQuantProxyFromInjector(\n",
+       "        (_zero_hw_sentinel): StatelessBuffer()\n",
+       "      )\n",
+       "      (weight_quant): WeightQuantProxyFromInjector(\n",
+       "        (_zero_hw_sentinel): StatelessBuffer()\n",
        "        (tensor_quant): BinaryQuant(\n",
-       "          (scaling_impl): StandaloneScaling(\n",
-       "            (restrict_value): RestrictValue(\n",
-       "              (forward_impl): Sequential(\n",
-       "                (0): PowerOfTwo()\n",
-       "                (1): Identity()\n",
-       "              )\n",
+       "          (scaling_impl): ConstScaling(\n",
+       "            (restrict_clamp_scaling): _RestrictClampValue(\n",
+       "              (restrict_value_impl): FloatRestrictValue()\n",
+       "              (clamp_min_ste): Identity()\n",
        "            )\n",
+       "            (value): StatelessBuffer()\n",
+       "          )\n",
+       "          (bit_width): BitWidthConst(\n",
+       "            (bit_width): StatelessBuffer()\n",
+       "          )\n",
+       "          (delay_wrapper): DelayWrapper(\n",
+       "            (delay_impl): _NoDelay()\n",
        "          )\n",
        "        )\n",
        "      )\n",
-       "      (bias_quant): BiasQuantProxy()\n",
+       "      (bias_quant): BiasQuantProxyFromInjector(\n",
+       "        (_zero_hw_sentinel): StatelessBuffer()\n",
+       "      )\n",
        "    )\n",
        "    (7): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-       "    (8): QuantHardTanh(\n",
-       "      (act_quant_proxy): ActivationQuantProxy(\n",
+       "    (8): QuantIdentity(\n",
+       "      (input_quant): IdentityQuantProxyFromInjector(\n",
+       "        (_zero_hw_sentinel): StatelessBuffer()\n",
+       "      )\n",
+       "      (act_quant): ActQuantProxyFromInjector(\n",
+       "        (_zero_hw_sentinel): StatelessBuffer()\n",
        "        (fused_activation_quant_proxy): FusedActivationQuantProxy(\n",
        "          (activation_impl): Identity()\n",
        "          (tensor_quant): ClampedBinaryQuant(\n",
-       "            (scaling_impl): StandaloneScaling(\n",
-       "              (restrict_value): RestrictValue(\n",
-       "                (forward_impl): Sequential(\n",
-       "                  (0): PowerOfTwo()\n",
-       "                  (1): ClampMin()\n",
-       "                )\n",
+       "            (scaling_impl): ConstScaling(\n",
+       "              (restrict_clamp_scaling): _RestrictClampValue(\n",
+       "                (restrict_value_impl): FloatRestrictValue()\n",
+       "                (clamp_min_ste): Identity()\n",
        "              )\n",
+       "              (value): StatelessBuffer()\n",
+       "            )\n",
+       "            (bit_width): BitWidthConst(\n",
+       "              (bit_width): StatelessBuffer()\n",
+       "            )\n",
+       "            (delay_wrapper): DelayWrapper(\n",
+       "              (delay_impl): _NoDelay()\n",
        "            )\n",
        "          )\n",
        "        )\n",
@@ -258,34 +318,56 @@
        "    (9): Dropout(p=0.2)\n",
        "    (10): QuantLinear(\n",
        "      in_features=1024, out_features=1024, bias=False\n",
-       "      (weight_reg): WeightReg()\n",
-       "      (weight_quant): WeightQuantProxy(\n",
+       "      (input_quant): IdentityQuantProxyFromInjector(\n",
+       "        (_zero_hw_sentinel): StatelessBuffer()\n",
+       "      )\n",
+       "      (output_quant): IdentityQuantProxyFromInjector(\n",
+       "        (_zero_hw_sentinel): StatelessBuffer()\n",
+       "      )\n",
+       "      (weight_quant): WeightQuantProxyFromInjector(\n",
+       "        (_zero_hw_sentinel): StatelessBuffer()\n",
        "        (tensor_quant): BinaryQuant(\n",
-       "          (scaling_impl): StandaloneScaling(\n",
-       "            (restrict_value): RestrictValue(\n",
-       "              (forward_impl): Sequential(\n",
-       "                (0): PowerOfTwo()\n",
-       "                (1): Identity()\n",
-       "              )\n",
+       "          (scaling_impl): ConstScaling(\n",
+       "            (restrict_clamp_scaling): _RestrictClampValue(\n",
+       "              (restrict_value_impl): FloatRestrictValue()\n",
+       "              (clamp_min_ste): Identity()\n",
        "            )\n",
+       "            (value): StatelessBuffer()\n",
+       "          )\n",
+       "          (bit_width): BitWidthConst(\n",
+       "            (bit_width): StatelessBuffer()\n",
+       "          )\n",
+       "          (delay_wrapper): DelayWrapper(\n",
+       "            (delay_impl): _NoDelay()\n",
        "          )\n",
        "        )\n",
        "      )\n",
-       "      (bias_quant): BiasQuantProxy()\n",
+       "      (bias_quant): BiasQuantProxyFromInjector(\n",
+       "        (_zero_hw_sentinel): StatelessBuffer()\n",
+       "      )\n",
        "    )\n",
        "    (11): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
-       "    (12): QuantHardTanh(\n",
-       "      (act_quant_proxy): ActivationQuantProxy(\n",
+       "    (12): QuantIdentity(\n",
+       "      (input_quant): IdentityQuantProxyFromInjector(\n",
+       "        (_zero_hw_sentinel): StatelessBuffer()\n",
+       "      )\n",
+       "      (act_quant): ActQuantProxyFromInjector(\n",
+       "        (_zero_hw_sentinel): StatelessBuffer()\n",
        "        (fused_activation_quant_proxy): FusedActivationQuantProxy(\n",
        "          (activation_impl): Identity()\n",
        "          (tensor_quant): ClampedBinaryQuant(\n",
-       "            (scaling_impl): StandaloneScaling(\n",
-       "              (restrict_value): RestrictValue(\n",
-       "                (forward_impl): Sequential(\n",
-       "                  (0): PowerOfTwo()\n",
-       "                  (1): ClampMin()\n",
-       "                )\n",
+       "            (scaling_impl): ConstScaling(\n",
+       "              (restrict_clamp_scaling): _RestrictClampValue(\n",
+       "                (restrict_value_impl): FloatRestrictValue()\n",
+       "                (clamp_min_ste): Identity()\n",
        "              )\n",
+       "              (value): StatelessBuffer()\n",
+       "            )\n",
+       "            (bit_width): BitWidthConst(\n",
+       "              (bit_width): StatelessBuffer()\n",
+       "            )\n",
+       "            (delay_wrapper): DelayWrapper(\n",
+       "              (delay_impl): _NoDelay()\n",
        "            )\n",
        "          )\n",
        "        )\n",
@@ -294,22 +376,35 @@
        "    (13): Dropout(p=0.2)\n",
        "    (14): QuantLinear(\n",
        "      in_features=1024, out_features=10, bias=False\n",
-       "      (weight_reg): WeightReg()\n",
-       "      (weight_quant): WeightQuantProxy(\n",
+       "      (input_quant): IdentityQuantProxyFromInjector(\n",
+       "        (_zero_hw_sentinel): StatelessBuffer()\n",
+       "      )\n",
+       "      (output_quant): IdentityQuantProxyFromInjector(\n",
+       "        (_zero_hw_sentinel): StatelessBuffer()\n",
+       "      )\n",
+       "      (weight_quant): WeightQuantProxyFromInjector(\n",
+       "        (_zero_hw_sentinel): StatelessBuffer()\n",
        "        (tensor_quant): BinaryQuant(\n",
-       "          (scaling_impl): StandaloneScaling(\n",
-       "            (restrict_value): RestrictValue(\n",
-       "              (forward_impl): Sequential(\n",
-       "                (0): PowerOfTwo()\n",
-       "                (1): Identity()\n",
-       "              )\n",
+       "          (scaling_impl): ConstScaling(\n",
+       "            (restrict_clamp_scaling): _RestrictClampValue(\n",
+       "              (restrict_value_impl): FloatRestrictValue()\n",
+       "              (clamp_min_ste): Identity()\n",
        "            )\n",
+       "            (value): StatelessBuffer()\n",
+       "          )\n",
+       "          (bit_width): BitWidthConst(\n",
+       "            (bit_width): StatelessBuffer()\n",
+       "          )\n",
+       "          (delay_wrapper): DelayWrapper(\n",
+       "            (delay_impl): _NoDelay()\n",
        "          )\n",
        "        )\n",
        "      )\n",
-       "      (bias_quant): BiasQuantProxy()\n",
+       "      (bias_quant): BiasQuantProxyFromInjector(\n",
+       "        (_zero_hw_sentinel): StatelessBuffer()\n",
+       "      )\n",
        "    )\n",
-       "    (15): BatchNorm1d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+       "    (15): TensorNorm()\n",
        "  )\n",
        ")"
       ]
@@ -339,7 +434,7 @@
    "outputs": [
     {
      "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPsAAAD4CAYAAAAq5pAIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAARX0lEQVR4nO3dfYyVZXrH8d/FoDAw8iYRCaisG/5QqmUbgk1KyOKmxlUMbKJm/aPauAmarMmqTVqz/UOSaqJVa/pH3YStL9CsmiWoq0a7a82mWo1GNFQQW1CULGR4E5H3t+HqH/NgZ3We6549z3nOc9z7+0kmM3Ouec65OTM/zsv13Pdt7i4Af/xGNT0AAJ1B2IFMEHYgE4QdyARhBzIxupM3Zma89Z+ZUaPKH09OnTpV23VXvf6enp6wPjAw0PJ1183dbbjLK4XdzK6U9M+SeiT9q7vfV+X6cmU27O/mS6k/6ip/eKNHx38CqcCk6r29vaW1Q4cOhcem9PX1hfUDBw6U1lIt50mTJoX1zz77LKx3o5afxptZj6R/kfR9SRdLusHMLm7XwAC0V5XX7PMlfeTuW9z9uKSnJS1pz7AAtFuVsM+Q9Lsh328rLvs9ZrbMzNaa2doKtwWgotrfoHP3FZJWSLxBBzSpyiP7dknnDfl+ZnEZgC5UJezvSJptZt8yszMl/VDS8+0ZFoB2a/lpvLufNLPbJP1ag623x9z9g7aNLCPjx48P6wcPHmz5useMGRPWjx07FtZTbcFx48aF9ai9lmoppqSOj9prqT76vn37WhpTN6v0mt3dX5L0UpvGAqBGnC4LZIKwA5kg7EAmCDuQCcIOZIKwA5mwTq4um+vpsqled6qXffTo0bA+duzYlo9Nia676vWfffbZYb3qNNLofp06dWp47O7du8N6amrwyZMnw3qdyuaz88gOZIKwA5kg7EAmCDuQCcIOZIKwA5mg9fYNkGrNVfkd1nnddUtNDa6yem1q6m5qanCTS03TegMyR9iBTBB2IBOEHcgEYQcyQdiBTBB2IBP02TvgrLPOCuvRbqOSNHHixLB+4sSJ0lpqN9LUFNbPP/88rC9YsCCs33rrraW1VC/6jjvuCOtbt24N601OM20SfXYgc4QdyARhBzJB2IFMEHYgE4QdyARhBzJBn/0b4JFHHgnrUS871Wuuuox1b29vWI+ktk2+5JJLwvqmTZvC+vHjx0trZ5xxRnhsdO6ClP53HzlyJKzXqazPXmnLZjP7VNIBSQOSTrr7vCrXB6A+lcJeWOTue9pwPQBqxGt2IBNVw+6SfmNm75rZsuF+wMyWmdlaM1tb8bYAVFD1afwCd99uZudIesXM/sfdXxv6A+6+QtIKiTfogCZVemR39+3F512SnpU0vx2DAtB+LYfdzMab2Vmnv5Z0haQN7RoYgPaq8jR+mqRniz7taElPuvu/t2VUf2RSWzYvWrQorF922WVhPeqVHzx4MDw21W/u6+sL66nzNKI566m11x999NGWr1uS7rzzztLaW2+9FR5b93bSTWg57O6+RdKftnEsAGpE6w3IBGEHMkHYgUwQdiAThB3IBFNcu0Bqqubs2bPD+v79+0trEyZMCI+NpoFK6SmwVbZ8TrX9UlJLcO/du7e0tnTp0vDYdevWhfVUSzLV8qwTS0kDmSPsQCYIO5AJwg5kgrADmSDsQCYIO5CJdiw42TFRT7fOfnBK6thU/ZZbbgnrq1atCuszZ85s+bZTffZ77rknrK9evTqsn3nmmaW1K664Ijz2wQcfDOuprbCj2168eHF47LZt28L6nj3fvDVWeWQHMkHYgUwQdiAThB3IBGEHMkHYgUwQdiATHZ/Pnup3Rzo51naqOvd54cKFYf2iiy4qrY0bNy48dvTo+FSLNWvWhPUtW7aE9SpSyz3PmTMnrKfu90jq75T57AC6FmEHMkHYgUwQdiAThB3IBGEHMkHYgUx0vM8+alT5/y9V54XXqcpc+lOnTlW67eg+S9VPnjwZHjt+/PiwfujQobCe2o46+p2l5tJfffXVYf3pp58O61X67Kk17VP3a5Na7rOb2WNmtsvMNgy5bIqZvWJmm4vPk9s5WADtN5Kn8U9IuvIrl90l6VV3ny3p1eJ7AF0sGXZ3f03SV/fRWSJpZfH1SknxXjoAGtfqGnTT3L2/+HqHpGllP2hmyyQta/F2ALRJ5QUn3d2jDRvdfYWkFRIbOwJNarX1ttPMpktS8XlX+4YEoA6thv15STcVX98k6VftGQ6AuiT77Gb2lKTvSpoqaaekuyU9J+mXks6XtFXS9e5evhn2/19XbU/jq64bX7UeSfVkU3uoR/uvV9Xb2xvWjxw5EtZT5wBUOcfgwgsvDOsff/xxy9edGldqTfqUw4cPVzq+irI+e/I1u7vfUFL6XqURAegoTpcFMkHYgUwQdiAThB3IBGEHMsGWzYVUC3JgYCCsR3p6esJ61WWHozZRqsWUmsKakrr+aNvkqCZJixYtamlMp0W/0xMnToTHpqa4Vvl7aAqP7EAmCDuQCcIOZIKwA5kg7EAmCDuQCcIOZKKr+ux1budcdTnnKuq+7QMHDpTWUv3iVK87dXyqTx8tF51axvq6664L60ePHg3rY8eOLa2l+uyp31mTWzK3ikd2IBOEHcgEYQcyQdiBTBB2IBOEHcgEYQcy0fE+ezS3u5t75dGSyanllFPq3Fb50ksvDY+dM2dOWE8tJf3cc8+F9UjUB5ekhQsXhvUqW3inlqGOzl2Qqi/B3QQe2YFMEHYgE4QdyARhBzJB2IFMEHYgE4QdyETH++zRnPU6++ipufKped1RT3j06PhuXLp0aVhPHb9kyZKwPmbMmNLa3Llzw2MnTZoU1lO97Ndff73l42fPnh0em1qbPdXrXr9+fWnt8ssvD4+N7lOpO/voKclHdjN7zMx2mdmGIZctN7PtZrau+Liq3mECqGokT+OfkHTlMJc/7O5zi4+X2jssAO2WDLu7vyZpbwfGAqBGVd6gu83M3i+e5k8u+yEzW2Zma81sbYXbAlBRq2H/maRvS5orqV/SQ2U/6O4r3H2eu89r8bYAtEFLYXf3ne4+4O6nJP1c0vz2DgtAu7UUdjObPuTbH0jaUPazALqDpfqoZvaUpO9Kmippp6S7i+/nSnJJn0q6xd37kzdmFt5Yqt+cmvcdmTVrVli/5pprwvrixYtLa6l516l526m509H+61K8hnlfX194bErVed3R7/SLL74Ij504cWJYT9m8eXNpbdWqVeGxDz1U+spUUnf32d192JNKkifVuPsNw1z8aOURAegoTpcFMkHYgUwQdiAThB3IBGEHMpFsvbX1xsw8Wna5zimud999d1hfvnx5WN+zZ09pberUqa0M6UuprYf37o2nJkT1Cy64IDw21RZMbdmccuzYsdJaahpp6u8h1YqNpi2ntlx++eWXw/rNN98c1pvc0rms9cYjO5AJwg5kgrADmSDsQCYIO5AJwg5kgrADmeh4nz2qV9maODXVMtX3rLLt8q5du8L61q1bw/oDDzwQ1levXh3W580rXwTo4YcfDo9Nbdk8eXLpimOSpG3btoX16Hf6xBNPhMd+8sknYf3aa68N69HU46rTa1988cWwnpoyXSf67EDmCDuQCcIOZIKwA5kg7EAmCDuQCcIOZKKjffZRo0Z5ND/6+PHj4fHnnHNOaW337t3hsak+e2rudNQvTm0HvWnTprA+ZcqUsJ5atjha7vn8888Pj03NZ08t771v376wfuONN5bWXnjhhfDYlNQ6AtFy0YsWLQqPTa0xkLpfUst/14k+O5A5wg5kgrADmSDsQCYIO5AJwg5kgrADmeiq+exVpPqeK1euDOvXX399y9d/+PDh8Nhx48aF9dS2yKl5/gMDA6W11Lrvb775Zlh/8sknw/q6devC+htvvFFaS51fkOrhp37n0Xkb8+fPD499++23w/rjjz8e1lPrytep5T67mZ1nZr81s41m9oGZ/aS4fIqZvWJmm4vP8SoHABo1kqfxJyX9jbtfLOnPJf3YzC6WdJekV919tqRXi+8BdKlk2N29393fK74+IOlDSTMkLZF0+rnxSklL6xokgOriFz1fYWazJH1H0tuSprl7f1HaIWlayTHLJC1rfYgA2mHE78abWZ+kNZJud/f9Q2s++C7fsG++ufsKd5/n7uWrIgKo3YjCbmZnaDDov3D3Z4qLd5rZ9KI+XVK8xCqARiVbbzY4f3OlpL3ufvuQyx+Q9Jm732dmd0ma4u5/m7iu8MbOPffccCw7duwI65Fo+15JmjlzZli/9957S2szZswIj01tuZzaujjaLlqS7r///tLaxo0bw2NTU1xT2yKnpKYtR1JtwxMnToT1aOpx6u9+woQJYb3qlOk6lbXeRvKa/S8k/ZWk9WZ2uqn6U0n3Sfqlmf1I0lZJcaMaQKOSYXf3/5JU9l/k99o7HAB14XRZIBOEHcgEYQcyQdiBTBB2IBMdneLa09PjUV83NVU06n3u37+/tCZJfX19YT3VN416vlX6vVK655s6RyDqZad6+MeOHQvrVUW/79Ryzampwam/lyq/s5SqY6sTS0kDmSPsQCYIO5AJwg5kgrADmSDsQCYIO5CJrlpKOjWHOOqlp5YVrjove/r06aW1/v7+0tpI9Pb2hvXUls11XndqGetDhw6F9SpzylNGjYofq6rMKW/6/IQq6LMDmSPsQCYIO5AJwg5kgrADmSDsQCYIO5CJruqzA6iOPjuQOcIOZIKwA5kg7EAmCDuQCcIOZIKwA5lIht3MzjOz35rZRjP7wMx+Uly+3My2m9m64uOq+ocLoFXJk2rMbLqk6e7+npmdJeldSUs1uB/7QXd/cMQ3xkk1QO3KTqoZyf7s/ZL6i68PmNmHkma0d3gA6vYHvWY3s1mSviPp7eKi28zsfTN7zMwmlxyzzMzWmtnaSiMFUMmIz403sz5J/ynpXnd/xsymSdojySX9gwaf6t+cuA6exgM1K3saP6Kwm9kZkl6U9Gt3/6dh6rMkvejuf5K4HsIO1KzliTA2uDzoo5I+HBr04o27034gaUPVQQKoz0jejV8g6XVJ6yWdXpv3p5JukDRXg0/jP5V0S/FmXnRdPLIDNav0NL5dCDtQP+azA5kj7EAmCDuQCcIOZIKwA5kg7EAmCDuQCcIOZIKwA5kg7EAmCDuQCcIOZIKwA5kg7EAmkgtOttkeSVuHfD+1uKwbdevYunVcEmNrVTvHdkFZoaPz2b9242Zr3X1eYwMIdOvYunVcEmNrVafGxtN4IBOEHchE02Ff0fDtR7p1bN06LomxtaojY2v0NTuAzmn6kR1AhxB2IBONhN3MrjSz/zWzj8zsribGUMbMPjWz9cU21I3uT1fsobfLzDYMuWyKmb1iZpuLz8PusdfQ2LpiG+9gm/FG77umtz/v+Gt2M+uRtEnSX0raJukdSTe4+8aODqSEmX0qaZ67N34ChpktlHRQ0qrTW2uZ2T9K2uvu9xX/UU5297/rkrEt1x+4jXdNYyvbZvyv1eB9187tz1vRxCP7fEkfufsWdz8u6WlJSxoYR9dz99ck7f3KxUskrSy+XqnBP5aOKxlbV3D3fnd/r/j6gKTT24w3et8F4+qIJsI+Q9Lvhny/Td2137tL+o2ZvWtmy5oezDCmDdlma4ekaU0OZhjJbbw76SvbjHfNfdfK9udV8Qbd1y1w9z+T9H1JPy6ernYlH3wN1k29059J+rYG9wDsl/RQk4MpthlfI+l2d98/tNbkfTfMuDpyvzUR9u2Szhvy/czisq7g7tuLz7skPavBlx3dZOfpHXSLz7saHs+X3H2nuw+4+ylJP1eD912xzfgaSb9w92eKixu/74YbV6futybC/o6k2Wb2LTM7U9IPJT3fwDi+xszGF2+cyMzGS7pC3bcV9fOSbiq+vknSrxocy+/plm28y7YZV8P3XePbn7t7xz8kXaXBd+Q/lvT3TYyhZFwXSvrv4uODpscm6SkNPq07ocH3Nn4k6WxJr0raLOk/JE3porH9mwa39n5fg8Ga3tDYFmjwKfr7ktYVH1c1fd8F4+rI/cbpskAmeIMOyARhBzJB2IFMEHYgE4QdyARhBzJB2IFM/B+tIjCppYWKvAAAAABJRU5ErkJggg==\n",
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPsAAAD4CAYAAAAq5pAIAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAARYElEQVR4nO3dfYyVZXrH8d/FoDAw8iYRCaisG/5QqmUbgk1KyOKmxlUMbKJm/aPauAmarMmqTVqz/UOSaqJVa/pH3YStL9CsmiWoq0a7a82mWo1GNFQQW1CULGR4E5H3t+HqH/NgZ3We6549z3nOc9z7+0kmM3Ouec65OTM/zsv13Pdt7i4Af/xGNT0AAJ1B2IFMEHYgE4QdyARhBzIxupM3Zma89Z+ZUaPKH09OnTpV23VXvf6enp6wPjAw0PJ1183dbbjLK4XdzK6U9M+SeiT9q7vfV+X6cmU27O/mS6k/6ip/eKNHx38CqcCk6r29vaW1Q4cOhcem9PX1hfUDBw6U1lIt50mTJoX1zz77LKx3o5afxptZj6R/kfR9SRdLusHMLm7XwAC0V5XX7PMlfeTuW9z9uKSnJS1pz7AAtFuVsM+Q9Lsh328rLvs9ZrbMzNaa2doKtwWgotrfoHP3FZJWSLxBBzSpyiP7dknnDfl+ZnEZgC5UJezvSJptZt8yszMl/VDS8+0ZFoB2a/lpvLufNLPbJP1ag623x9z9g7aNLCPjx48P6wcPHmz5useMGRPWjx07FtZTbcFx48aF9ai9lmoppqSOj9prqT76vn37WhlSV6v0mt3dX5L0UpvGAqBGnC4LZIKwA5kg7EAmCDuQCcIOZIKwA5mwTq4um+vpsqled6qXffTo0bA+duzYlo9Nia676vWfffbZYb3qNNLofp06dWp47O7du8N6amrwyZMnw3qdyuaz88gOZIKwA5kg7EAmCDuQCcIOZIKwA5mg9fYNkGrNVfkd1nnddUtNDa6yem1q6m5qanCTS03TegMyR9iBTBB2IBOEHcgEYQcyQdiBTBB2IBP02TvgrLPOCuvRbqOSNHHixLB+4sSJ0lpqN9LUFNbPP/88rC9YsCCs33rrraW1VC/6jjvuCOtbt24N601OM20SfXYgc4QdyARhBzJB2IFMEHYgE4QdyARhBzJBn/0b4JFHHgnrUS871Wuuuox1b29vWI+ktk2+5JJLwvqmTZvC+vHjx0trZ5xxRnhsdO6ClP53HzlyJKzXqazPXmnLZjP7VNIBSQOSTrr7vCrXB6A+lcJeWOTue9pwPQBqxGt2IBNVw+6SfmNm75rZsuF+wMyWmdlaM1tb8bYAVFD1afwCd99uZudIesXM/sfdXxv6A+6+QtIKiTfogCZVemR39+3F512SnpU0vx2DAtB+LYfdzMab2Vmnv5Z0haQN7RoYgPaq8jR+mqRniz7taElPuvu/t2VUf2RSWzYvWrQorF922WVhPeqVHzx4MDw21W/u6+sL66nzNKI566m11x999NGWr1uS7rzzztLaW2+9FR5b93bSTWg57O6+RdKftnEsAGpE6w3IBGEHMkHYgUwQdiAThB3IBFNcu0Bqqubs2bPD+v79+0trEyZMCI+NpoFK6SmwVbZ8TrX9UlJLcO/du7e0tnTp0vDYdevWhfVUSzLV8qwTS0kDmSPsQCYIO5AJwg5kgrADmSDsQCYIO5CJdiw42TFRT7fOfnBK6thU/ZZbbgnrq1atCuszZ85s+bZTffZ77rknrK9evTqsn3nmmaW1K664Ijz2wQcfDOuprbCj2168eHF47LZt28L6nj3fvDVWeWQHMkHYgUwQdiAThB3IBGEHMkHYgUwQdiATHZ/Pnup3Rzo51naqOvd54cKFYf2iiy4qrY0bNy48dvTo+FSLNWvWhPUtW7aE9SpSyz3PmTMnrKfu90jq75T57AC6FmEHMkHYgUwQdiAThB3IBGEHMkHYgUx0vM8+alT5/y9V54XXqcpc+lOnTlW67eg+S9VPnjwZHjt+/PiwfujQobCe2o46+p2l5tJfffXVYf3pp58O61X67Kk17VP3a5Na7rOb2WNmtsvMNgy5bIqZvWJmm4vPk9s5WADtN5Kn8U9IuvIrl90l6VV3ny3p1eJ7AF0sGXZ3f03SV/fRWSJpZfH1SklL2zssAO3W6hp009y9v/h6h6RpZT9oZsskLWvxdgC0SeUFJ93dow0b3X2FpBUSGzsCTWq19bbTzKZLUvF5V/uGBKAOrYb9eUk3FV/fJOlX7RkOgLok++xm9pSk70qaKmmnpLslPSfpl5LOl7RV0vXuXr4Z9v9fV21P46uuG1+1Hkn1ZFN7qEf7r1fV29sb1o8cORLWU+cAVDnH4MILLwzrH3/8ccvXnRpXak36lMOHD1c6voqyPnvyNbu731BS+l6lEQHoKE6XBTJB2IFMEHYgE4QdyARhBzLBls2FVAtyYGAgrEd6enrCetVlh6M2UarFlJrCmpK6/mjb5KgmSYsWLWppTKdFv9MTJ06Ex6amuFb5e2gKj+xAJgg7kAnCDmSCsAOZIOxAJgg7kAnCDmSiq/rsdW7nXHU55yrqvu0DBw6U1lL94lSvO3V8qk8fLRedWsb6uuuuC+tHjx4N62PHji2tpfrsqd9Zk1syt4pHdiAThB3IBGEHMkHYgUwQdiAThB3IBGEHMtHxPns0t7ube+XRksmp5ZRT6txW+dJLLw2PnTNnTlhPLSX93HPPhfVI1AeXpIULF4b1Klt4p5ahjs5dkKovwd0EHtmBTBB2IBOEHcgEYQcyQdiBTBB2IBOEHchEx/vs0Zz1OvvoqbnyqXndUU949Oj4bly6dGlYTx2/ZMmSsD5mzJjS2ty5c8NjJ02aFNZTvezXX3+95eNnz54dHptamz3V616/fn1p7fLLLw+Pje5TqTv76CnJR3Yze8zMdpnZhiGXLTez7Wa2rvi4qt5hAqhqJE/jn5B05TCXP+zuc4uPl9o7LADtlgy7u78maW8HxgKgRlXeoLvNzN4vnuZPLvshM1tmZmvNbG2F2wJQUath/5mkb0uaK6lf0kNlP+juK9x9nrvPa/G2ALRBS2F3953uPuDupyT9XNL89g4LQLu1FHYzmz7k2x9I2lD2swC6g6X6qGb2lKTvSpoqaaeku4vv50pySZ9KusXd+5M3ZhbeWKrfnJr3HZk1a1ZYv+aaa8L64sWLS2upedepedupudPR/utSvIZ5X19feGxK1Xnd0e/0iy++CI+dOHFiWE/ZvHlzaW3VqlXhsQ89VPrKVFJ399ndfdiTSpIn1bj7DcNc/GjlEQHoKE6XBTJB2IFMEHYgE4QdyARhBzKRbL219cbMPFp2uc4prnfffXdYX758eVjfs2dPaW3q1KmtDOlLqa2H9+6NpyZE9QsuuCA8NtUWTG3ZnHLs2LHSWmoaaervIdWKjaYtp7Zcfvnll8P6zTffHNab3NK5rPXGIzuQCcIOZIKwA5kg7EAmCDuQCcIOZIKwA5noeJ89qlfZmjg11TLV96yy7fKuXbvC+tatW8P6Aw88ENZXr14d1ufNK18E6OGHHw6PTW3ZPHly6YpjkqRt27aF9eh3+sQTT4THfvLJJ2H92muvDevR1OOq02tffPHFsJ6aMl0n+uxA5gg7kAnCDmSCsAOZIOxAJgg7kAnCDmSio332UaNGeTQ/+vjx4+Hx55xzTmlt9+7d4bGpPntq7nTUL05tB71p06awPmXKlLCeWrY4Wu75/PPPD49NzWdPLe+9b9++sH7jjTeW1l544YXw2JTUOgLRctGLFi0Kj02tMZC6X1LLf9eJPjuQOcIOZIKwA5kg7EAmCDuQCcIOZIKwA5noqvnsVaT6nitXrgzr119/fcvXf/jw4fDYcePGhfXUtsipef4DAwOltdS672+++WZYf/LJJ8P6unXrwvobb7xRWkudX5Dq4ad+59F5G/Pnzw+Pffvtt8P6448/HtZT68rXqeU+u5mdZ2a/NbONZvaBmf2kuHyKmb1iZpuLz/EqBwAaNZKn8Scl/Y27XyzpzyX92MwulnSXpFfdfbakV4vvAXSpZNjdvd/d3yu+PiDpQ0kzJC2RdPq58UpJS2saI4A2iF/0fIWZzZL0HUlvS5rm7v1FaYekaSXHLJO0rMIYAbTBiN+NN7M+SWsk3e7u+4fWfPBdvmHffHP3Fe4+z93LV0UEULsRhd3MztBg0H/h7s8UF+80s+lFfbqkeIlVAI1Ktt5scP7mSkl73f32IZc/IOkzd7/PzO6SNMXd/zZxXeGNnXvuueFYduzYEdYj0fa9kjRz5sywfu+995bWZsyYER6b2nI5tXVxtF20JN1///2ltY0bN4bHpqa4prZFTklNW46k2oYnTpwI69HU49Tf/YQJE8J61SnTdSprvY3kNftfSPorSevNbF1x2U8l3Sfpl2b2I0lbJcWNagCNSobd3f9LUtl/kd9r73AA1IXTZYFMEHYgE4QdyARhBzJB2IFMdHSKa09Pj0d93dRU0aj3uX///tKaJPX19YX1VN806vlW6fdK6Z5v6hyBqJed6uEfO3YsrFcV/b5TyzWnpgan/l6q/M5Sqo6tTiwlDWSOsAOZIOxAJgg7kAnCDmSCsAOZIOxAJrpqKenUHOKol55aVrjqvOzp06eX1vr7+0trI9Hb2xvWU1s213ndqWWsDx06FNarzClPGTUqfqyqMqe86fMTqqDPDmSOsAOZIOxAJgg7kAnCDmSCsAOZIOxAJrqqzw6gOvrsQOYIO5AJwg5kgrADmSDsQCYIO5AJwg5kIhl2MzvPzH5rZhvN7AMz+0lx+XIz225m64qPq+ofLoBWJU+qMbPpkqa7+3tmdpakdyUt1eB+7Afd/cER3xgn1QC1KzupZiT7s/dL6i++PmBmH0qa0d7hAajbH/Sa3cxmSfqOpLeLi24zs/fN7DEzm1xyzDIzW2tma6sNFUAVIz433sz6JP2npHvd/RkzmyZpjySX9A8afKp/c+I6eBoP1KzsafyIwm5mZ0h6UdKv3f2fhqnPkvSiu/9J4noIO1CzlifC2ODyoI9K+nBo0Is37k77gaQNVQcJoD4jeTd+gaTXJa2XdHpt3p9KukHSXA0+jf9U0i3Fm3nRdfHIDtSs0tP4diHsQP2Yzw5kjrADmSDsQCYIO5AJwg5kgrADmSDsQCYIO5AJwg5kgrADmSDsQCYIO5AJwg5kgrADmUguONlmeyRtHfL91OKybtStY+vWcUmMrVXtHNsFZYWOzmf/2o2brXX3eY0NINCtY+vWcUmMrVWdGhtP44FMEHYgE02HfUXDtx/p1rF167gkxtaqjoyt0dfsADqn6Ud2AB1C2IFMNBJ2M7vSzP7XzD4ys7uaGEMZM/vUzNYX21A3uj9dsYfeLjPbMOSyKWb2ipltLj4Pu8deQ2Prim28g23GG73vmt7+vOOv2c2sR9ImSX8paZukdyTd4O4bOzqQEmb2qaR57t74CRhmtlDSQUmrTm+tZWb/KGmvu99X/Ec52d3/rkvGtlx/4DbeNY2tbJvxv1aD9107tz9vRROP7PMlfeTuW9z9uKSnJS1pYBxdz91fk7T3KxcvkbSy+HqlBv9YOq5kbF3B3fvd/b3i6wOSTm8z3uh9F4yrI5oI+wxJvxvy/TZ1137vLuk3ZvaumS1rejDDmDZkm60dkqY1OZhhJLfx7qSvbDPeNfddK9ufV8UbdF+3wN3/TNL3Jf24eLralXzwNVg39U5/JunbGtwDsF/SQ00OpthmfI2k2919/9Bak/fdMOPqyP3WRNi3SzpvyPczi8u6grtvLz7vkvSsBl92dJOdp3fQLT7vang8X3L3ne4+4O6nJP1cDd53xTbjayT9wt2fKS5u/L4bblydut+aCPs7kmab2bfM7ExJP5T0fAPj+BozG1+8cSIzGy/pCnXfVtTPS7qp+PomSb9qcCy/p1u28S7bZlwN33eNb3/u7h3/kHSVBt+R/1jS3zcxhpJxXSjpv4uPD5oem6SnNPi07oQG39v4kaSzJb0qabOk/5A0pYvG9m8a3Nr7fQ0Ga3pDY1ugwafo70taV3xc1fR9F4yrI/cbp8sCmeANOiAThB3IBGEHMkHYgUwQdiAThB3IBGEHMvF/rSIwqVQD1iIAAAAASUVORK5CYII=\n",
       "text/plain": [
        "<Figure size 432x288 with 1 Axes>"
       ]
@@ -371,8 +466,8 @@
     {
      "data": {
       "text/plain": [
-       "tensor([0.0602, 0.0147, 0.5844, 0.0445, 0.0270, 0.0185, 0.0595, 0.0082, 0.1689,\n",
-       "        0.0141])"
+       "tensor([0.1020, 0.0113, 0.4806, 0.0571, 0.0482, 0.0079, 0.0450, 0.0076, 0.1851,\n",
+       "        0.0552])"
       ]
      },
      "execution_count": 13,
@@ -395,7 +490,7 @@
    "outputs": [
     {
      "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEICAYAAABS0fM3AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAclUlEQVR4nO3debgdVZ3u8e9LIDIFgiQqJEDCaMcJ8RgQFZGhO3QreBERrmjHK9NtwkXx2oIitrRTO+GErUAQlEZuiGhHjQRolBYVSEAEkoCEEEgYD5Mg+gCB9/5RdXBz2GefnaHq5Jx6P8+zn1PDqlq/vXeyf7VWVa2SbSIiornWG+oAIiJiaCURREQ0XBJBRETDJRFERDRcEkFERMMlEURENFwSQUSXJFnSjuX0tyV9YjX38ydJ26/d6DrWJ0nflfSIpGvrqjeGjySCEUjSMkn7tVm+t6Rnyx+ivtdPWtbvLOkiSQ9K+qOkGyWdKGnUGsYzQ9ICSU9KOncVtz1U0m8k/VnSLwcp2/r+Hpd0q6T3r0nsA7F9rO1/HaycpF9KOrLftpvaXlpFXAN4E7A/MNH21DXdmaRJZVJcf81Di3VBEkHz3FP+EPW93g4gaQfgGmA58CrbmwPvAnqAMWtaJ/Bp4JzV2PZh4KvA57uty/amwGbAR4GzJE3pX6hhP2LbActsP7GqGzbsc2qsJILo8yngN7ZPtH0vgO1bbf9P24/2LyzprZJuapm/TNL8lvlfSXpHuZ+Lbf8YeKjNfraQ9FNJvWXXxU8lTexbb/ty27MokknXXPgx8AgwRdJ0Sb+WdLqkh4B/kfQiSV+SdJek+8vuno1aYvuIpHsl3SPpf/WL+1xJn26ZP0jSDZIek3S7pGmSPgO8Gfhm2Ur5Zlm2tYtpc0nfK9//nZJOkbReuW66pKvKGB+RdIekA1rqnC5padn6uUPSe9p8vh8AzgbeUMbwqXL5UZKWSHpY0hxJW7dsY0nHSboNuG2wz7r8LL4l6edlHb+W9DJJXy3jvkXSa1vKn1R+Ro9LWiTpf7SsGyXpy2Wr9I6yNflc66P8vGaW38vdkj69pi3WSCKIv9oPmL0K5a8GdpI0TtIGwKuBrSWNKX9Me4BfdbGf9YDvUhy1bgv8BfjmKkXehqT1yh+YsUBfwtodWAq8FPgMRStjZ2BXYEdgAnBquf004P9SdKnsRPH5DFTXVOB7wEfK+vaiOAL/OMVnMKNsfc1os/k3gM2B7YG3AO8DWruzdgduBcYBXwBmqrAJ8HXgANtjgD2BG/rv3PZM4Fjgt2UMn5S0D/A54FBgK+BO4MJ+m76jrPsFrakBHAqcUsb5JPBb4PpyfjbwlZayt1MkyM0pDkDOl7RVue4o4ACK72S3Mo5W5wIrKb6v1wJ/CxxJrBnbeY2wF7AM2K/N8r2BZ4FHW16HluueBqatYj2/Ag4G9gAuBWYB04C3Aje2Kf9p4NxB9rkr8Eib5UcCvxxk29b39zDFD+Nh5brpwF0tZQU8AezQsuwNwB3l9DnA51vW7QwY2LGcPxf4dDn9HeD0AWL6JXBkv2Wm+CEbBTwFTGlZd0zf+yxjXtKybuNy25cBm5Tv853ARoN8LtOBq1rmZwJfaJnftPz+J7XEt0+H/U0qy6zf8lmc1bL+eGBxy/yrgEc77O8G4KBy+grgmJZ1+/XVRZHAn2x9v8DhwC+G8v/bSHil/6957rE9sc3yhyiODtuS9G3giHL2s7Y/C1xJ8eO7opx+hOKo9slyflCSNgZOp0ggW5SLx0gaZfuZbvbRz0DvD4rzH33GU/ywXifpuXAofpwBtgauayl/Z4c6twHmrnqojAM26LfvOylaJn3u65uw/ecy1k1t3yfp3RStlpmSfg182PYtXdS7NcXRet9+/1R2l02gOIiA539W3bi/ZfovbeY37ZuR9D7gRIqEQrluXEtsrXW3Tm9H8Xnd2/KdrbcasUY/6RqKPpdTHF225eIqmb4TzJ8tF/clgr3K6SspEsFb6DIRAB8GdgF2t71ZuS8ofpTXttahdh+k+IF6he2x5WtzFyeaAe6l+IHvs22H/S4Hduiizv4epDgS365fPXd32OavO7bn2d6fIoHfApzVzXYU51ueq7PsZtqyX72VDEssaTuKOGcAW9oeC9zMX7/ve4HWRN76HSynOMgY1/KdbWb7FVXE2iRJBCPXBpI2bHkN1vr7JLCnpC9KehmApB0lnS9p7ADb/IbiR3wqcK3thRQ/MLsD/91XSNL6kjakONoe1S+eMRQ/yI9KenEZBy3bjiq3XR9Yr9x2g1X4HNqy/SzFD9Lpkl5S1jVB0t+VRWYB0yVNKVstnxxgV1B0tbxf0r7luYkJkl5errufov+/XQzPlPV8pjy3sh3FkfL5g8Uv6aXlCepNKH4c/0TRLdaNH5Tx7irpRcBngWtsL+ty+zWxCUWS6QVQcXnvK1vWzwJOKD/DsRRXfgHg4iKGS4EvS9qs/Kx3kPSWGuIe0ZIIRq65FD+wfa9/6VTY9u0UfeSTgIWS/gj8EFgAPD7ANk9QdDEstP1Uufi3wJ22H2gpekoZw0kU3Ut/KZdBcWnoRhRHx1cDl/Sr5r1l+X+nOMH4F7o/8h3MR4ElwNWSHqNoFe1Svrefl7FdUZa5YqCd2L6W4gTv6cAfKVpDfUfcXwMOKa+e+XqbzY+nOFexFLgKuIDuLrNdjyJp3ENxPuQtwP/uYjtsXw58guL7vZeiNXNYN9uuKduLgC9T/Du5n+L8wa9bipxF8WN/I/A7in/HK4G+bsL3AaOBRRRdkbPp0KUZ3VF5wiUiYp1TXi77bdvbDVo4VltaBBGxzpC0kaS/L7sTJ1B0yf1oqOMa6dIiiIh1Rnk+5krg5RTdgD8DTrD92JAGNsIlEURENFy6hiIiGm7Y3VA2btw4T5o0aajDiIgYVq677roHbY9vt27YJYJJkyaxYMGCoQ4jImJYkTTg3fGVdg2pGIHx1nKUw5MGKHNoOQLhQkkXVBlPRES8UGUtgnJo2DMoRm9cAcyXNKe8oaSvzE7AycAbbT/Sd4dnRETUp8oWwVSKkROXlnedXggc1K/MUcAZth8B6Hc3akRE1KDKRDCB548KuILnj6oIxdC+O5cPsri6HAP+BSQdreJRhwt6e3srCjciopmG+vLR9Ske+rE3xbjiZ7Ub4Mz2mbZ7bPeMH9/2pHdERKymKhPB3Tx/CNmJvHB43RXAHNtP274D+ANFYoiIiJpUmQjmUzzKcLKk0RSjG87pV+bHFK0BJI2j6CpaWmFMERHRT2WJwPZKiodPzAMWA7NsL5R0mqQDy2LzgIckLQJ+AXzE9gsecB4REdUZdmMN9fT0ODeURUSsGknX2e5pt27Y3Vkcq+70y/5QeR0f2n/nyuuIiGoM9VVDERExxJIIIiIaLokgIqLhkggiIhouiSAiouGSCCIiGi6JICKi4ZIIIiIaLokgIqLhkggiIhouiSAiouGSCCIiGi6JICKi4ZIIIiIaLokgIqLhkggiIhouiSAiouGSCCIiGi6JICKi4ZIIIiIaLokgIqLhkggiIhouiSAiouGSCCIiGi6JICKi4SpNBJKmSbpV0hJJJ7VZP11Sr6QbyteRVcYTEREvtH5VO5Y0CjgD2B9YAcyXNMf2on5F/5/tGVXFERERnVXZIpgKLLG91PZTwIXAQRXWFxERq6HKRDABWN4yv6Jc1t87Jd0oabakbSqMJyIi2hjqk8U/ASbZfjVwGXBeu0KSjpa0QNKC3t7eWgOMiBjpqkwEdwOtR/gTy2XPsf2Q7SfL2bOB17Xbke0zbffY7hk/fnwlwUZENFWViWA+sJOkyZJGA4cBc1oLSNqqZfZAYHGF8URERBuVXTVke6WkGcA8YBRwju2Fkk4DFtieA/wfSQcCK4GHgelVxRMREe1VlggAbM8F5vZbdmrL9MnAyVXGEBERnQ31yeKIiBhiSQQREQ2XRBAR0XBJBBERDZdEEBHRcEkEERENl0QQEdFwSQQREQ2XRBAR0XBJBBERDZdEEBHRcEkEERENl0QQEdFwgyYCScdL2qKOYCIion7dtAheCsyXNEvSNEmqOqiIiKjPoInA9inATsBMigfH3Cbps5J2qDi2iIioQVfnCGwbuK98rQS2AGZL+kKFsUVERA0GfUKZpBOA9wEPUjxg/iO2n5a0HnAb8M/VhhgREVXq5lGVLwYOtn1n60Lbz0p6WzVhRUREXbrpGtq+fxKQ9H0A24sriSoiImrTTSJ4ReuMpFHA66oJJyIi6jZgIpB0sqTHgVdLeqx8PQ48APxnbRFGRESlBkwEtj9newzwRdubla8xtre0fXKNMUZERIUGPFks6eW2bwEukrRb//W2r680soiIqEWnq4Y+DBwFfLnNOgP7VBJRRETUasBEYPuo8u9b6wsnIiLq1qlr6OBOG9q+eO2HExERdevUNfT2DusMDJoIJE0DvgaMAs62/fkByr0TmA283vaCwfYbERFrT6euofevyY7L+w3OAPYHVlCMYDrH9qJ+5cYAJwDXrEl9ERGxejp1DR1h+3xJJ7Zbb/srg+x7KrDE9tJyfxcCBwGL+pX7V+DfgI90HXVERKw1ne4s3qT8O2aA12AmAMtb5leUy55TXpa6je2fddqRpKMlLZC0oLe3t4uqIyKiW526hr5T/v1UFRWXo5d+heIZBx3ZPhM4E6Cnp8dVxBMR0VTdPKpye0k/kdQr6QFJ/ylp+y72fTewTcv8xHJZnzHAK4FfSloG7AHMkdTTffgREbGmuhl07gJgFrAVsDVwEfCDLrabD+wkabKk0cBhwJy+lbb/aHuc7Um2JwFXAwfmqqGIiHp1kwg2tv192yvL1/nAhoNtZHslMAOYBywGZtleKOk0SQeuWdgREbG2dLpq6MXl5M8lnQRcSHH/wLuBud3s3Pbc/mVtnzpA2b272WdERKxdnW4ou47ih1/l/DEt6wxkBNKIiBGg01VDk+sMJCIihkY3zyxG0iuBKbScG7D9vaqCioiI+gyaCCR9EtibIhHMBQ4ArgKSCCIiRoBurho6BNgXuK8cf+g1wOaVRhUREbXpJhH8xfazwEpJm1E8s3ibQbaJiIhhoptzBAskjQXOoriS6E/AbyuNKiIiajNoIrD9T+XktyVdAmxm+8Zqw4qIiLp0e9XQwcCbKO4fuApIIoiIGCG6GXTuW8CxwE3AzcAxks6oOrCIiKhHNy2CfYC/sW0ASecBCyuNKiIiatPNVUNLgG1b5rcpl0VExAjQadC5n1CcExgDLJZ0bblqKnDtQNtFRMTw0qlr6Eu1RREREUOm06BzV/ZNS3op8Ppy9lrbD1QdWERE1KObq4YOpegKehdwKHCNpEOqDiwiIurRzVVDHwde39cKkDQeuByYXWVgERFRj26uGlqvX1fQQ11uFxERw0A3LYJLJM3jrw+s7/pRlRERse7rmAgkCfg6xYniN5WLz7T9o6oDi4iIenRMBLYtaa7tVwEX1xRTRETUqJu+/uslvX7wYhERMRx1c45gd+AIScuAJwBRNBZeXWVgERFRj24Swd9VHkVERAyZTmMNvQT4GLAjxRDUn7P9WF2BRUREPTqdI/geRVfQN4BNKa4eioiIEaZTItjK9sdtz7N9PLDK5wQkTZN0q6Qlkk5qs/5YSTdJukHSVZKmrGodERGxZjpeNSRpC0kvlvRiYFS/+Y4kjQLOAA4ApgCHt/mhv8D2q2zvCnwB+MrqvY2IiFhdnU4Wbw5cR3GVUJ/ry78Gth9k31OBJbaXAki6EDgIWNRXoN85h03K/UZERI06DUM9aQ33PQFY3jK/guJS1OeRdBxwIjCa4rGYLyDpaOBogG233bZdkYiIWE1DPnic7TNs7wB8FDhlgDJn2u6x3TN+/Ph6A4yIGOGqTAR3UzzfuM/EctlALgTeUWE8ERHRRpWJYD6wk6TJkkYDhwFzWgtI2qll9h+A2yqMJyIi2uh0Q1nHK4NsPzzI+pWSZgDzgFHAObYXSjoNWGB7DjBD0n7A08AjwD+u6huIiIg10+mqoesoruIRsC3FD7WAscBdwOTBdm57Lv2eXWD71JbpE1Y95IiIWJsG7BqyPdn29hSPpXy77XG2twTeBlxaV4AREVGtbs4R7FEe2QNg++fAntWFFBERdepm9NF7JJ0CnF/Ovwe4p7qQIiKiTt20CA4HxgM/onhK2fhyWUREjACDtgjKq4NOkLSJ7SdqiCkiImo0aItA0p6SFgGLy/nXSPpW5ZFFREQtuukaOp3iKWUPAdj+PbBXlUFFRER9urqz2PbyfoueqSCWiIgYAt1cNbRc0p6AJW0AnEDZTRQREcNfNy2CY4HjKIaVvhvYFfinKoOKiIj6dNMi2MX2e1oXSHoj8OtqQoqIiDp10yL4RpfLIiJiGOo0+ugbKIaSGC/pxJZVm1GMJhoRESNAp66h0cCmZZkxLcsfAw6pMqiIiKhPp2cWXwlcKelc23fWGFNERNSom3MEZ0sa2zcjaQtJ8yqMKSIiatRNIhhn+9G+GduPAC+pLqSIiKhTN4ngWUnb9s1I2o7iyWURETECdHMfwceBqyRdSfGoyjcDR1caVURE1KabYagvkbQbsEe56IO2H6w2rIiIqMuAXUOSXl7+3Y3i4fX3lK9ty2URETECdGoRfBg4Cvhym3UG9qkkooiIqFWn+wiOKv++tb5wIiKibp2GmDi404a2L1774URERN06dQ29vfz7Eooxh64o598K/IbiQfYRETHMdeoaej+ApEuBKbbvLee3As6tJbqIiKhcNzeUbdOXBEr3U1xFFBERI0A3ieC/JM2TNF3SdOBnwOXd7FzSNEm3Sloi6aQ260+UtEjSjZL+q7xrOSIiajRoIrA9A/g28Jrydabt4wfbTtIo4AzgAGAKcLikKf2K/Q7osf1qYDbwhVULPyIi1lQ3Q0wAXA88bvtySRtLGmP78UG2mQossb0UQNKFwEHAor4Ctn/RUv5q4IjuQ4+IiLVh0BaBpKMojta/Uy6aAPy4i31PAJa3zK8olw3kA8DPB4jhaEkLJC3o7e3touqIiOhWN+cIjgPeSPFkMmzfxloehlrSEUAP8MV2622fabvHds/48ePXZtUREY3XTdfQk7afkgSApPXpbhjqu4FtWuYnlsueR9J+FCOcvsX2k13sNyIi1qJuWgRXSvoYsJGk/YGLgJ90sd18YCdJkyWNBg4D5rQWkPRaii6nA20/sGqhR0TE2tBNIvgo0AvcBBwDzAVOGWwj2yuBGcA8YDEwy/ZCSadJOrAs9kVgU+AiSTdImjPA7iIioiIdu4bKS0AX2n45cNaq7tz2XIrE0brs1Jbp/VZ1nxERsXZ1bBHYfga4tfVRlRERMbJ0c7J4C2ChpGuBJ/oW2j5w4E0iImK46CYRfKLyKCIiYsh0eh7BhsCxwI4UJ4pnlieAIyJiBOl0juA8ipu8bqIYL6jdIysjImKY69Q1NMX2qwAkzQSurSekiIjVc/plf6i8jg/tv3PlddStU4vg6b6JdAlFRIxcnVoEr5H0WDktijuLHyunbXuzyqOLiIjKdXpU5ag6A4mIiKHRzRATERExgiURREQ0XBJBRETDJRFERDRcEkFERMMlEURENFwSQUREwyURREQ0XBJBRETDJRFERDRcEkFERMMlEURENFwSQUREwyURREQ0XBJBRETDJRFERDRcEkFERMNVmggkTZN0q6Qlkk5qs34vSddLWinpkCpjiYiI9ipLBJJGAWcABwBTgMMlTelX7C5gOnBBVXFERERnnR5ev6amAktsLwWQdCFwELCor4DtZeW6ZyuMIyIiOqiya2gCsLxlfkW5bJVJOlrSAkkLent710pwERFRGBYni22fabvHds/48eOHOpyIiBGlykRwN7BNy/zEcllERKxDqkwE84GdJE2WNBo4DJhTYX0REbEaKksEtlcCM4B5wGJglu2Fkk6TdCCApNdLWgG8C/iOpIVVxRMREe1VedUQtucCc/stO7Vlej5Fl1FERAyRYXGyOCIiqpNEEBHRcEkEERENl0QQEdFwSQQREQ2XRBAR0XBJBBERDZdEEBHRcJXeULauOf2yP1Rex4f237nyOiIi1qZGJYKIpqj6oCcHPCNLuoYiIhouiSAiouGSCCIiGi6JICKi4XKyOCqVK7Ui1n1pEURENFwSQUREwyURREQ0XBJBRETDJRFERDRcrhqKES1DLUQMLi2CiIiGSyKIiGi4dA1FVCTdUjFcpEUQEdFwaRHUJEMtRIxsw/n/eFoEERENV2mLQNI04GvAKOBs25/vt/5FwPeA1wEPAe+2vazKmCKiWsP5yLipKmsRSBoFnAEcAEwBDpc0pV+xDwCP2N4ROB34t6riiYiI9qrsGpoKLLG91PZTwIXAQf3KHAScV07PBvaVpApjioiIfmS7mh1LhwDTbB9Zzr8X2N32jJYyN5dlVpTzt5dlHuy3r6OBo8vZXYBbKwm6vXHAg4OWSt2pO3Wn7nW77u1sj2+3YlhcNWT7TODMoahb0gLbPak7dafu1D1S6u6vyq6hu4FtWuYnlsvalpG0PrA5xUnjiIioSZWJYD6wk6TJkkYDhwFz+pWZA/xjOX0IcIWr6quKiIi2Kusasr1S0gxgHsXlo+fYXijpNGCB7TnATOD7kpYAD1Mki3XNkHRJpe7UnbpTd10qO1kcERHDQ+4sjohouCSCiIiGSyIYgKRpkm6VtETSSTXXfY6kB8r7LOqsdxtJv5C0SNJCSSfUWPeGkq6V9Puy7k/VVXdLDKMk/U7ST4eg7mWSbpJ0g6QFNdc9VtJsSbdIWizpDTXVu0v5fvtej0n6YB11l/V/qPy3drOkH0jasMa6TyjrXVjnex6Q7bz6vShObt8ObA+MBn4PTKmx/r2A3YCba37fWwG7ldNjgD/U9b4BAZuW0xsA1wB71Pz+TwQuAH5aZ71l3cuAcXXXW9Z9HnBkOT0aGDsEMYwC7qO46amO+iYAdwAblfOzgOk11f1K4GZgY4oLdi4HdhyK777vlRZBe90Mj1EZ2/9NcRVVrWzfa/v6cvpxYDHFf5g66rbtP5WzG5Sv2q5kkDQR+Afg7LrqXBdI2pziwGMmgO2nbD86BKHsC9xu+84a61wf2Ki8h2lj4J6a6v0b4Brbf7a9ErgSOLimuttKImhvArC8ZX4FNf0griskTQJeS3FkXledoyTdADwAXGa7trqBrwL/DDxbY52tDFwq6bpySJW6TAZ6ge+W3WJnS9qkxvr7HAb8oK7KbN8NfAm4C7gX+KPtS2uq/mbgzZK2lLQx8Pc8/+bb2iURxAtI2hT4IfBB24/VVa/tZ2zvSnEX+lRJr6yjXklvAx6wfV0d9Q3gTbZ3oxit9zhJe9VU7/oU3ZD/bvu1wBNA3efERgMHAhfVWOcWFK38ycDWwCaSjqijbtuLKUZavhS4BLgBeKaOugeSRNBeN8NjjEiSNqBIAv9h++KhiKHsmvgFMK2mKt8IHChpGUU34D6Szq+pbuC5I1RsPwD8iKJ7sg4rgBUtra/ZFImhTgcA19u+v8Y69wPusN1r+2ngYmDPuiq3PdP262zvBTxCcT5uyCQRtNfN8BgjTjkE+Exgse2v1Fz3eEljy+mNgP2BW+qo2/bJtifankTxXV9hu5ajQwBJm0ga0zcN/C1F90HlbN8HLJe0S7loX2BRHXW3OJwau4VKdwF7SNq4/He/L8U5sVpIekn5d1uK8wMX1FV3O8Ni9NG6eYDhMeqqX9IPgL2BcZJWAJ+0PbOGqt8IvBe4qeyrB/iY7bk11L0VcF75QKP1gFm2a7+Mc4i8FPhR+SiO9YELbF9SY/3HA/9RHvQsBd5fV8Vl4tsfOKauOgFsXyNpNnA9sBL4HfUO+fBDSVsCTwPHDdEJ+udkiImIiIZL11BERMMlEURENFwSQUREwyURREQ0XBJBRETDJRFERDRcEkFERMP9f9TbtPnh8O6mAAAAAElFTkSuQmCC\n",
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEICAYAAABS0fM3AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAAbi0lEQVR4nO3debxdZXn28d9FIDKFQRIVEiDMNk6IERAVZWpDq2AREV4nrEwtsSi+VlTUSp3qhFWxCgRBKfACgo0WZShKHYGAKIRBwhzGMAmiLxC4+sd6Dm6O++yzMqx1yFnX9/PZn6z5vvc+sO+9nmetZ8k2ERHRXSuNdQIRETG2UggiIjouhSAiouNSCCIiOi6FICKi41IIIiI6LoUgoiZJlrR5mf66pI8s5XF+L2nT5ZvdwHiS9E1JD0i6pK24seJIIRiHJN0sadc+y18r6cnyRTT0+l7P+i0lnSHpXkm/k/QbSYdLmrCM+cyWNE/So5JOXMJ995H0c0l/kPTjUbbtfX8PS7pO0juXJfeR2D7E9r+Mtp2kH0s6YNi+a9q+sYm8RvAqYDdgmu1tl/VgkqaXorjysqcWzwQpBN1zR/kiGnq9HkDSZsDFwG3Ai2yvDbwJmAlMWtaYwCeAE5Zi3/uBLwGfqRvL9prAWsAHgOMkzRi+Uce+xDYGbrb9yJLu2LHPqbNSCGLIx4Gf2z7c9p0Atq+z/X9sPzh8Y0k7SbqyZ/58SZf2zP9E0hvKcc6y/V3gvj7HWVfS9yUtKk0X35c0bWi97Qtsn05VTGpz5bvAA8AMSftL+pmkoyXdB/yzpGdJ+rykWyXdXZp7VuvJ7f2S7pR0h6S/G5b3iZI+0TO/p6QrJD0k6QZJsyR9Eng18NVylvLVsm1vE9Pakr5V3v8tko6UtFJZt7+kn5YcH5B0k6Tde2LuL+nGcvZzk6S39Pl83wUcD7yi5PDxsvxASQsk3S9prqQNevaxpEMlXQ9cP9pnXT6Lr0n6QYnxM0nPk/Slkve1kl7as/0R5TN6WNLVkv62Z90ESV8oZ6U3lbPJp84+yuc1p/xdbpf0iWU9Y40UgviTXYEzl2D7XwJbSJosaRXgxcAGkiaVL9OZwE9qHGcl4JtUv1o3Av4IfHWJMu9D0krlC2YdYKhgbQfcCDwX+CTVWcaWwNbA5sBU4KNl/1nA/6VqUtmC6vMZKda2wLeA95d4O1L9Av8w1Wcwu5x9ze6z+1eAtYFNgdcAbwd6m7O2A64DJgOfBeaosgbwZWB325OAHYArhh/c9hzgEOAXJYePSdoZ+DSwD7A+cAtw2rBd31Bi/9nZ1Aj2AY4seT4K/AK4vMyfCXyxZ9sbqArk2lQ/QE6WtH5ZdyCwO9XfZJuSR68TgcVUf6+XAn8JHEAsG9t5jbMXcDOwa5/lrwWeBB7see1T1j0OzFrCOD8B9gK2B84DTgdmATsBv+mz/SeAE0c55tbAA32WHwD8eJR9e9/f/VRfjPuWdfsDt/ZsK+ARYLOeZa8AbirTJwCf6Vm3JWBg8zJ/IvCJMv0N4OgRcvoxcMCwZab6IpsAPAbM6Fl38ND7LDkv6Fm3etn3ecAa5X2+EVhtlM9lf+CnPfNzgM/2zK9Z/v7Te/LbecDxppdtVu75LI7rWf9u4Jqe+RcBDw443hXAnmX6QuDgnnW7DsWiKuCP9r5fYD/gR23/PzbeXmn/6547bE/rs/w+ql+HfUn6OvDWMvsp258CLqL68l1Yph+g+lX7aJkflaTVgaOpCsi6ZfEkSRNsP1HnGMOM9P6g6v8YMoXqi/UySU+lQ/XlDLABcFnP9rcMiLkhcM6Sp8pkYJVhx76F6sxkyF1DE7b/UHJd0/Zdkt5MddYyR9LPgPfZvrZG3A2ofq0PHff3pblsKtWPCHj6Z1XH3T3Tf+wzv+bQjKS3A4dTFRTKusk9ufXG7p3emOrzurPnb7bSUuQaw6RpKIZcQPXrsi9XV8kMdTB/qiweKgQ7lumLqArBa6hZCID3AVsB29leqxwLqi/l5a13qN17qb6gXmB7nfJa21VHM8CdVF/wQzYacNzbgM1qxBzuXqpf4hsPi3P7gH3+dGD7XNu7URXwa4Hj6uxH1d/yVMzSzLTesLiNDEssaWOqPGcD69leB7iKP/297wR6C3nv3+A2qh8Zk3v+ZmvZfkETuXZJCsH4tYqkVXteo539fQzYQdLnJD0PQNLmkk6WtM4I+/yc6kt8W+AS2/OpvmC2A/5naCNJK0talerX9oRh+Uyi+kJ+UNKzSx707Duh7LsysFLZd5X6H0N/tp+k+kI6WtJzSqypkv6qbHI6sL+kGeWs5WMjHAqqppZ3Stql9E1MlfT8su5uqvb/fjk8UeJ8svStbEz1S/nk0fKX9NzSQb0G1Zfj76maxeo4teS7taRnAZ8CLrZ9c839l8UaVEVmEYCqy3tf2LP+dOCw8hmuQ3XlFwCuLmI4D/iCpLXKZ72ZpNe0kPe4lkIwfp1D9QU79PrnQRvbvoGqjXw6MF/S74DvAPOAh0fY5xGqJob5th8ri38B3GL7np5Njyw5HEHVvPTHsgyqS0NXo/p1/Evgh8PCvK1s/+9UHYx/pP4v39F8AFgA/FLSQ1RnRVuV9/aDktuFZZsLRzqI7UuoOniPBn5HdTY09Iv734C9y9UzX+6z+7up+ipuBH4KnEK9y2xXoioad1D1h7wG+Psa+2H7AuAjVH/fO6nOZvats++ysn018AWq/07upuo/+FnPJsdRfdn/BvgV1X/Hi4GhZsK3AxOBq6maIs9kQJNm1KPS4RIR8YxTLpf9uu2NR904llrOCCLiGUPSapL+ujQnTqVqkjt7rPMa73JGEBHPGKU/5iLg+VTNgP8FHGb7oTFNbJxLIYiI6LhGm4ZU3WZ/XbmV/Yg+6/dXdWv9FeWVOwQjIlrW2A1lZfyPY6hu0V8IXCppbrlqoNf/c/9b7/uaPHmyp0+fvvwSjYjogMsuu+xe21P6rWvyzuJtqW6PvxFA0mnAnlSXfS216dOnM2/evOWQXkREd0ga8e74JpuGpvL0W78X8vRb54e8UdW492dK2rDPeiQdpGo8+3mLFi1qIteIiM4a68tHv0c10NWLgfOBk/ptZPtY2zNtz5wype+ZTURELKUmC8HtPH2ckGkMG0PF9n22Hy2zxwMvazCfiIjoo8lCcCnVePWbSJpIdQv73N4NesYgB9gDuKbBfCIioo/GOottL5Y0GziXarCxE2zPl3QUMM/2XOAfJe1BNZbI/VTjpkdERItWuBvKZs6c6Vw1FBGxZCRdZntmv3Vj3VkcERFjLIUgIqLjUggiIjouzyzugKPP/23jMd6725aNx4iIZuSMICKi41IIIiI6LoUgIqLjUggiIjouhSAiouNSCCIiOi6FICKi41IIIiI6LoUgIqLjUggiIjouhSAiouNSCCIiOi6FICKi41IIIiI6LoUgIqLjUggiIjouhSAiouNSCCIiOi6FICKi41IIIiI6LoUgIqLjUggiIjouhSAiouNSCCIiOi6FICKi41IIIiI6LoUgIqLjUggiIjouhSAiouNSCCIiOq7RQiBplqTrJC2QdMSA7d4oyZJmNplPRET8ucYKgaQJwDHA7sAMYD9JM/psNwk4DLi4qVwiImJkTZ4RbAsssH2j7ceA04A9+2z3L8C/Av+/wVwiImIETRaCqcBtPfMLy7KnSNoG2ND2fw06kKSDJM2TNG/RokXLP9OIiA4bs85iSSsBXwTeN9q2to+1PdP2zClTpjSfXEREhzRZCG4HNuyZn1aWDZkEvBD4saSbge2BuekwjohoV5OF4FJgC0mbSJoI7AvMHVpp+3e2J9uebns68EtgD9vzGswpIiKGaawQ2F4MzAbOBa4BTrc9X9JRkvZoKm5ERCyZlZs8uO1zgHOGLfvoCNu+tslcIiKiv9xZHBHRcSkEEREdl0IQEdFxKQQRER2XQhAR0XEpBBERHZdCEBHRcSkEEREdN2ohkPRuSeu2kUxERLSvzhnBc4FLJZ1enjimppOKiIj2jFoIbB8JbAHMAfYHrpf0KUmbNZxbRES0oFYfgW0Dd5XXYmBd4ExJn20wt4iIaMGog85JOgx4O3AvcDzwftuPlwfLXA/8U7MpRkREk+qMPvpsYC/bt/QutP2kpNc1k1ZERLSlTtPQpsOLgKRvA9i+ppGsIiKiNXUKwQt6ZyRNAF7WTDoREdG2EQuBpA9Kehh4saSHyuth4B7gP1vLMCIiGjViIbD9aduTgM/ZXqu8Jtlez/YHW8wxIiIaNGJnsaTn274WOEPSNsPX27680cwiIqIVg64aeh9wIPCFPusM7NxIRhER0aoRC4HtA8u/O7WXTkREtG1Q09Beg3a0fdbyTyciIto2qGno9QPWGUghiIgYBwY1Db2zzUQiImJsDGoaeqvtkyUd3m+97S82l1ZERLRlUNPQGuXfSW0kEhERY2NQ09A3yr8fby+diIhoW51HVW4q6XuSFkm6R9J/Stq0jeQiIqJ5dQadOwU4HVgf2AA4Azi1yaQiIqI9dQrB6ra/bXtxeZ0MrNp0YhER0Y5BVw09u0z+QNIRwGlU9w+8GTinhdwiIqIFg64auozqi19l/uCedQYyAmlExDgw6KqhTdpMJCIixkadZxYj6YXADHr6Bmx/q6mkIiKiPXUuH/0Y8JXy2gn4LLBHnYNLmiXpOkkLSj/D8PWHSLpS0hWSfippxhLmHxERy6jOVUN7A7sAd5Xxh14CrD3aTuXZxscAu1OdTezX54v+FNsvsr01VYHJsBURES2rUwj+aPtJYLGktaieWbxhjf22BRbYvtH2Y1RXHe3Zu4Hth3pm16DqhI6IiBbV6SOYJ2kd4DiqK4l+D/yixn5Tgdt65hcC2w3fSNKhwOHARPLUs4iI1o16RmD7H2w/aPvrwG7AO5bnENW2j7G9GfAB4Mh+20g6SNI8SfMWLVq0vEJHRAT1moaQtJekLwLvBjareezbeXoT0rSybCSnAW/ot8L2sbZn2p45ZcqUmuEjIqKOOlcNfQ04BLgSuAo4WNIxNY59KbCFpE0kTQT2BeYOO/YWPbN/A1xfN/GIiFg+6vQR7Az8hW0DSDoJmD/aTrYXS5oNnAtMAE6wPV/SUcA823OB2ZJ2BR4HHgDesZTvIyIillKdQrAA2Ai4pcxvWJaNyvY5DBuXyPZHe6YPq5dmREQ0ZdCgc9+jupxzEnCNpEvKqm2BS0baLyIiViyDzgg+31oWERExZgYNOnfR0LSk5wIvL7OX2L6n6cQiIqIdda4a2oeqKehNwD7AxZL2bjqxiIhoR53O4g8DLx86C5A0BbgAOLPJxCIioh11bihbaVhT0H0194uIiBVAnTOCH0o6lz89sD6PqoyIGEcGFgJJAr5M1VH8qrL4WNtnN51YRES0Y2AhsG1J59h+EXBWSzlFRESL6rT1Xy7p5aNvFhERK6I6fQTbAW+VdDPwCCCqk4UXN5lYRES0o04h+KvGs4iIiDEzaKyh5wAfAjanGoL608MeLRkREePAoD6Cb1E1BX0FWJPq6qGIiBhnBjUNrW/7w2X6XEmXt5FQRES0a7T7CNal6hwGmNA7b/v+hnOLiIgWDCoEawOX8adCADB0VmBg06aSioiI9gwahnp6i3lERMQYyeBxEREdl0IQEdFxKQQRER036IayZw/aMVcNRUSMD4OuGrqM6uogARsBD5TpdYBbgU2aTi4iIpo3YtOQ7U1sb0r1WMrX255sez3gdcB5bSUYERHNqtNHsL3tp55IZvsHwA7NpRQREW2qM/roHZKOBE4u828B7mgupYiIaFOdM4L9gCnA2VRPKZtSlkVExDgw6hlBuTroMElr2H6khZwiIqJFo54RSNpB0tXANWX+JZK+1nhmERHRijpNQ0dTPaXsPgDbvwZ2bDKpiIhoT607i23fNmzREw3kEhERY6DOVUO3SdoBsKRVgMMozUQREbHiq3NGcAhwKDAVuB3YGviHBnOKiIgW1Tkj2Mr2W3oXSHol8LNmUoqIiDbVOSP4Ss1lf0bSLEnXSVog6Yg+6w+XdLWk30j6b0kb1zluREQsP4NGH30F1VASUyQd3rNqLWDCaAeWNAE4BtgNWAhcKmmu7at7NvsVMNP2HyT9PfBZ4M1L/jYiImJpDTojmAisSVUsJvW8HgL2rnHsbYEFtm+0/RhwGrBn7wa2f2T7D2X2l8C0JUs/IiKW1aBnFl8EXCTpRNu3LMWxpwK9l50uBLYbsP27gB8sRZyIiFgGdfoIjpe0ztCMpHUlnbs8k5D0VmAm8LkR1h8kaZ6keYsWLVqeoSMiOq9OIZhs+8GhGdsPAM+psd/twIY989PKsqeRtCvwYWAP24/2O5DtY23PtD1zypQpNUJHRERddQrBk5I2GpopV/a4xn6XAltI2kTSRGBfYG7vBpJeCnyDqgjcUz/tiIhYXurcR/Bh4KeSLqJ6VOWrgYNG28n2YkmzgXOprjI6wfZ8SUcB82zPpWoKWhM4QxLArbb3WLq3EhERS6POMNQ/lLQNsH1Z9B7b99Y5eHmy2TnDln20Z3rXJcg1IiIaMGLTkKTnl3+3oXp4/R3ltVFZFhER48CgM4L3AQcCX+izzsDOjWQUEbGUjj7/t43HeO9uWzYeo22D7iM4sPy7U3vpRERE2wYNMbHXoB1tn7X804mIiLYNahp6ffn3OVRjDl1Y5ncCfk71IPuIiFjBDWoaeieApPOAGbbvLPPrAye2kl1ERDSuzg1lGw4VgeJuqquIIiJiHKhzQ9l/l7GFTi3zbwYuaC6liIhoU50bymZL+ltgx7LoWNtnN5tWRES0pc4ZAcDlwMO2L5C0uqRJth9uMrGIiGjHqH0Ekg4EzqQaHA6q5wx8t8GcIiKiRXU6iw8FXkn1ZDJsX0+9YagjImIFUKcQPFoeNQmApJWpNwx1RESsAOoUgoskfQhYTdJuwBnA95pNKyIi2lKnEHwAWARcCRxMNaz0kU0mFRER7Rl41ZCkCcB8288HjmsnpYiIaNPAMwLbTwDX9T6qMiIixpc69xGsC8yXdAnwyNDCPFIyImJ8qFMIPtJ4FhERMWYGPY9gVeAQYHOqjuI5the3lVhERLRjUB/BScBMqiKwO/0fWRkRESu4QU1DM2y/CEDSHOCSdlKKiIg2DTojeHxoIk1CERHj16AzgpdIeqhMi+rO4ofKtG2v1Xh2ERHRuEGPqpzQZiIRETE26gwxERER41gKQUREx6UQRER0XApBRETHpRBERHRcCkFERMelEEREdFwKQUREx6UQRER0XApBRETHNVoIJM2SdJ2kBZKO6LN+R0mXS1osae8mc4mIiP4aKwTlwffHUD3LYAawn6QZwza7FdgfOKWpPCIiYrA6j6pcWtsCC2zfCCDpNGBP4OqhDWzfXNY92WAeTzn6/N82HuO9u23ZeIyIiOWpyaahqcBtPfMLy7IlJukgSfMkzVu0aNFySS4iIiorRGex7WNtz7Q9c8qUKWOdTkTEuNJkIbgd2LBnflpZFhERzyBNFoJLgS0kbSJpIrAvMLfBeBERsRQaKwTlOcezgXOBa4DTbc+XdJSkPQAkvVzSQuBNwDckzW8qn4iI6K/Jq4awfQ5wzrBlH+2ZvpSqySgiIsbICtFZHBERzUkhiIjouBSCiIiOSyGIiOi4FIKIiI5LIYiI6LgUgoiIjkshiIjouBSCiIiOSyGIiOi4FIKIiI5LIYiI6LhGB52LyONBI575UggiIpaDFflHT5qGIiI6LoUgIqLj0jQU41rTp+vpn4jxIIUgYhxKAYwlkaahiIiOSyGIiOi4FIKIiI5LIYiI6LgUgoiIjkshiIjouBSCiIiOSyGIiOi4FIKIiI5LIYiI6LgUgoiIjkshiIjouBSCiIiOSyGIiOi4DEPdkhX5MXYRMb6lEEQ0JM8EiBVFo4VA0izg34AJwPG2PzNs/bOAbwEvA+4D3mz75iZziohm5ex3xdNYH4GkCcAxwO7ADGA/STOGbfYu4AHbmwNHA//aVD4REdFfk53F2wILbN9o+zHgNGDPYdvsCZxUps8EdpGkBnOKiIhhZLuZA0t7A7NsH1Dm3wZsZ3t2zzZXlW0Wlvkbyjb3DjvWQcBBZXYr4LpGku5vMnDvqFsldmIndmI/s2NvbHtKvxUrRGex7WOBY8citqR5tmcmdmIndmKPl9jDNdk0dDuwYc/8tLKs7zaSVgbWpuo0joiIljRZCC4FtpC0iaSJwL7A3GHbzAXeUab3Bi50U21VERHRV2NNQ7YXS5oNnEt1+egJtudLOgqYZ3suMAf4tqQFwP1UxeKZZkyapBI7sRM7sdvSWGdxRESsGDLWUEREx6UQRER0XArBCCTNknSdpAWSjmg59gmS7in3WbQZd0NJP5J0taT5kg5rMfaqki6R9OsS++Ntxe7JYYKkX0n6/hjEvlnSlZKukDSv5djrSDpT0rWSrpH0ipbiblXe79DrIUnvaSN2if/e8t/aVZJOlbRqi7EPK3Hnt/meR2Q7r2Evqs7tG4BNgYnAr4EZLcbfEdgGuKrl970+sE2ZngT8tq33DQhYs0yvAlwMbN/y+z8cOAX4fptxS+ybgcltxy2xTwIOKNMTgXXGIIcJwF1UNz21EW8qcBOwWpk/Hdi/pdgvBK4CVqe6YOcCYPOx+NsPvXJG0F+d4TEaY/t/qK6iapXtO21fXqYfBq6h+h+mjdi2/fsyu0p5tXYlg6RpwN8Ax7cV85lA0tpUPzzmANh+zPaDY5DKLsANtm9pMebKwGrlHqbVgTtaivsXwMW2/2B7MXARsFdLsftKIehvKnBbz/xCWvpCfKaQNB14KdUv87ZiTpB0BXAPcL7t1mIDXwL+CXiyxZi9DJwn6bIypEpbNgEWAd8szWLHS1qjxfhD9gVObSuY7duBzwO3AncCv7N9XkvhrwJeLWk9SasDf83Tb75tXQpB/BlJawLfAd5j+6G24tp+wvbWVHehbyvphW3ElfQ64B7bl7URbwSvsr0N1Wi9h0rasaW4K1M1Q/677ZcCjwBt94lNBPYAzmgx5rpUZ/mbABsAa0h6axuxbV9DNdLyecAPgSuAJ9qIPZIUgv7qDI8xLklahaoI/Ifts8Yih9I08SNgVkshXwnsIelmqmbAnSWd3FJs4KlfqNi+BzibqnmyDQuBhT1nX2dSFYY27Q5cbvvuFmPuCtxke5Htx4GzgB3aCm57ju2X2d4ReICqP27MpBD0V2d4jHGnDAE+B7jG9hdbjj1F0jplejVgN+DaNmLb/qDtabanU/2tL7Tdyq9DAElrSJo0NA38JVXzQeNs3wXcJmmrsmgX4Oo2YvfYjxabhYpbge0lrV7+u9+Fqk+sFZKeU/7diKp/4JS2YvezQow+2jaPMDxGW/ElnQq8FpgsaSHwMdtzWgj9SuBtwJWlrR7gQ7bPaSH2+sBJ5YFGKwGn2279Ms4x8lzg7PIojpWBU2z/sMX47wb+o/zouRF4Z1uBS+HbDTi4rZgAti+WdCZwObAY+BXtDvnwHUnrAY8Dh45RB/1TMsRERETHpWkoIqLjUggiIjouhSAiouNSCCIiOi6FICKi41IIIiI6LoUgIqLj/hdRB2LXFx7MKAAAAABJRU5ErkJggg==\n",
       "text/plain": [
        "<Figure size 432x288 with 1 Axes>"
       ]
@@ -436,16 +531,7 @@
    "cell_type": "code",
    "execution_count": 15,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/workspace/brevitas/brevitas_examples/bnn_pynq/models/LFC.py:80: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.\n",
-      "  x = 2.0 * x - torch.tensor([1.0], device=x.device)\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import brevitas.onnx as bo\n",
     "export_onnx_path = \"/tmp/LFCW1A1.onnx\"\n",
@@ -469,6 +555,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "Stopping http://0.0.0.0:8081\n",
       "Serving '/tmp/LFCW1A1.onnx' at http://0.0.0.0:8081\n"
      ]
     },
@@ -522,10 +609,10 @@
     {
      "data": {
       "text/plain": [
-       "input: \"40\"\n",
+       "input: \"37\"\n",
        "input: \"38\"\n",
-       "output: \"41\"\n",
-       "op_type: \"Add\""
+       "output: \"40\"\n",
+       "op_type: \"MatMul\""
       ]
      },
      "execution_count": 17,
@@ -554,7 +641,13 @@
     {
      "data": {
       "text/plain": [
-       "array(-0.5, dtype=float32)"
+       "array([[-1., -1.,  1., ..., -1.,  1., -1.],\n",
+       "       [ 1.,  1., -1., ...,  1., -1.,  1.],\n",
+       "       [-1., -1., -1., ...,  1., -1.,  1.],\n",
+       "       ...,\n",
+       "       [ 1., -1., -1., ..., -1., -1.,  1.],\n",
+       "       [ 1., -1., -1., ...,  1.,  1.,  1.],\n",
+       "       [ 1., -1.,  1., ...,  1., -1.,  1.]], dtype=float32)"
       ]
      },
      "execution_count": 18,
@@ -581,7 +674,7 @@
     {
      "data": {
       "text/plain": [
-       "<DataType.FLOAT32: 16>"
+       "<DataType.BIPOLAR: 34>"
       ]
      },
      "execution_count": 19,
@@ -601,7 +694,7 @@
     {
      "data": {
       "text/plain": [
-       "[]"
+       "[784, 1024]"
       ]
      },
      "execution_count": 20,
@@ -643,7 +736,6 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\n",
       "Stopping http://0.0.0.0:8081\n",
       "Serving '/tmp/LFCW1A1-clean.onnx' at http://0.0.0.0:8081\n"
      ]
@@ -689,8 +781,8 @@
     {
      "data": {
       "text/plain": [
-       "array([[-1.5095654 , -2.915617  ,  0.764004  , -1.8118242 , -2.308991  ,\n",
-       "        -2.6900144 , -1.520713  , -3.4965858 , -0.47711682, -2.9628415 ]],\n",
+       "array([[-1.3736125, -3.5715756,  0.1768887, -1.9529207, -2.1233053,\n",
+       "        -3.9293835, -2.1914592, -3.9634604, -0.7772659, -1.9869976]],\n",
        "      dtype=float32)"
       ]
      },
diff --git a/notebooks/end2end_example/cybersecurity/1-train-mlp-with-brevitas.ipynb b/notebooks/end2end_example/cybersecurity/1-train-mlp-with-brevitas.ipynb
index 91a776f84e9554579d97447c9ca0889da5c29e48..ff4c5704002219ca18bb07eeb8c768f860f3ffbf 100644
--- a/notebooks/end2end_example/cybersecurity/1-train-mlp-with-brevitas.ipynb
+++ b/notebooks/end2end_example/cybersecurity/1-train-mlp-with-brevitas.ipynb
@@ -7,6 +7,13 @@
     "# Train a Quantized MLP on UNSW-NB15 with Brevitas"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<font color=\"red\">**Live FINN tutorial:** We recommend clicking **Cell -> Run All** when you start reading this notebook for \"latency hiding\".</font>"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -26,7 +33,7 @@
     "*The task:* The goal of [*network intrusion detection*](https://ieeexplore.ieee.org/abstract/document/283931) is to identify, preferably in real time, unauthorized use, misuse, and abuse of computer systems by both system insiders and external penetrators. This may be achieved by a mix of techniques, and machine-learning (ML) based techniques are increasing in popularity. \n",
     "\n",
     "*The dataset:* Several datasets are available for use in ML-based methods for intrusion detection.\n",
-    "The [UNSW-NB15](https://www.unsw.adfa.edu.au/unsw-canberra-cyber/cybersecurity/ADFA-NB15-Datasets/) is one such dataset created by the Australian Centre for Cyber Security (ACCS) to provide a comprehensive network based data set which can reflect modern network traffic scenarios. You can find more details about the dataset on [its homepage](https://www.unsw.adfa.edu.au/unsw-canberra-cyber/cybersecurity/ADFA-NB15-Datasets/).\n",
+    "The **UNSW-NB15** is one such dataset created by the Australian Centre for Cyber Security (ACCS) to provide a comprehensive network based data set which can reflect modern network traffic scenarios. You can find more details about the dataset on [its homepage](https://www.unsw.adfa.edu.au/unsw-canberra-cyber/cybersecurity/ADFA-NB15-Datasets/).\n",
     "\n",
     "*Performance considerations:* FPGAs are commonly used for implementing high-performance packet processing systems that still provide a degree of programmability. To avoid introducing bottlenecks on the network, the DNN implementation must be capable of detecting malicious ones at line rate, which can be millions of packets per second, and is expected to increase further as next-generation networking solutions provide increased\n",
     "throughput. This is a good reason to consider FPGA acceleration for this particular use-case."
@@ -39,25 +46,59 @@
     "## Outline\n",
     "-------------\n",
     "\n",
-    "* [Initial setup](#initial_setup)\n",
-    "* [Define the Quantized MLP model](#define_quantized_mlp)\n",
-    "* [Load the UNSW_NB15 dataset](#load_dataset) \n",
+    "* [Load the UNSW_NB15 Dataset](#load_dataset) \n",
+    "* [Define the Quantized MLP Model](#define_quantized_mlp)\n",
     "* [Define Train and Test  Methods](#train_test)\n",
-    "* [(Option 1) Train the Model from Scratch](#train_scratch)\n",
-    "* [(Option 2) Load Pre-Trained Parameters](#load_pretrained)\n",
+    "    * [(Option 1) Train the Model from Scratch](#train_scratch)\n",
+    "    * [(Option 2) Load Pre-Trained Parameters](#load_pretrained)\n",
     "* [Network Surgery Before Export](#network_surgery)\n",
-    "* [Export to FINN-ONNX](#export_finn_onnx)\n",
-    "* [View the Exported ONNX in Netron](#view_in_netron)\n",
-    "* [That's it!](#thats_it)"
+    "* [Export to FINN-ONNX](#export_finn_onnx)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import onnx\n",
+    "import torch"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Initial Setup <a id='initial_setup'></a>\n",
+    "**This is important -- always import onnx before torch**. This is a workaround for a [known bug](https://github.com/onnx/onnx/issues/2394)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load the UNSW_NB15 Dataset <a id='load_dataset'></a>\n",
+    "\n",
+    "### Dataset Quantization <a id='dataset_qnt'></a>\n",
     "\n",
-    "Let's start by making sure we have all the Python packages we'll need for this notebook."
+    "The goal of this notebook is to train a Quantized Neural Network (QNN) to be later deployed as an FPGA accelerator generated by the FINN compiler. Although we can choose a variety of different precisions for the input, [Murovic and Trost](https://ev.fe.uni-lj.si/1-2-2019/Murovic.pdf) have previously shown we can actually binarize the inputs and still get good (90%+) accuracy."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We will create a binarized representation for the dataset by following the procedure defined by Murovic and Trost, which we repeat briefly here:\n",
+    "\n",
+    "* Original features have different formats ranging from integers, floating numbers to strings.\n",
+    "* Integers, which for example represent a packet lifetime, are binarized with as many bits as to include the maximum value. \n",
+    "* Another case is with features formatted as strings (protocols), which are binarized by simply counting the number of all different strings for each feature and coding them in the appropriate number of bits.\n",
+    "* Floating-point numbers are reformatted into fixed-point representation.\n",
+    "* In the end, each sample is transformed into a 593-bit wide binary vector. \n",
+    "* All vectors are labeled as bad (0) or normal (1)\n",
+    "\n",
+    "Following Murovic and Trost's open-source implementation provided as a Matlab script [here](https://github.com/TadejMurovic/BNN_Deployment/blob/master/cybersecurity_dataset_unswb15.m), we've created a [Python version](dataloader_quantized.py).\n",
+    "\n",
+    "<font color=\"red\">**FPGA'21 tutorial:** Downloading the original dataset and quantizing it can take some time, so we provide a download link to the pre-quantized version for your convenience. </font>"
    ]
   },
   {
@@ -69,51 +110,124 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Requirement already satisfied: pandas in /workspace/.local/lib/python3.6/site-packages (1.1.5)\n",
-      "Requirement already satisfied: pytz>=2017.2 in /opt/conda/lib/python3.6/site-packages (from pandas) (2019.1)\n",
-      "Requirement already satisfied: numpy>=1.15.4 in /opt/conda/lib/python3.6/site-packages (from pandas) (1.19.4)\n",
-      "Requirement already satisfied: python-dateutil>=2.7.3 in /opt/conda/lib/python3.6/site-packages (from pandas) (2.8.1)\n",
-      "Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.6/site-packages (from python-dateutil>=2.7.3->pandas) (1.15.0)\n",
-      "Requirement already satisfied: scikit-learn in /workspace/.local/lib/python3.6/site-packages (0.23.2)\n",
-      "Requirement already satisfied: scipy>=0.19.1 in /opt/conda/lib/python3.6/site-packages (from scikit-learn) (1.5.2)\n",
-      "Requirement already satisfied: joblib>=0.11 in /workspace/.local/lib/python3.6/site-packages (from scikit-learn) (1.0.0)\n",
-      "Requirement already satisfied: numpy>=1.13.3 in /opt/conda/lib/python3.6/site-packages (from scikit-learn) (1.19.4)\n",
-      "Requirement already satisfied: threadpoolctl>=2.0.0 in /workspace/.local/lib/python3.6/site-packages (from scikit-learn) (2.1.0)\n",
-      "Requirement already satisfied: tqdm in /opt/conda/lib/python3.6/site-packages (4.31.1)\n"
+      "--2021-02-24 16:57:33--  https://zenodo.org/record/4519767/files/unsw_nb15_binarized.npz?download=1\n",
+      "Resolving zenodo.org (zenodo.org)... 137.138.76.77\n",
+      "Connecting to zenodo.org (zenodo.org)|137.138.76.77|:443... connected.\n",
+      "HTTP request sent, awaiting response... 200 OK\n",
+      "Length: 13391907 (13M) [application/octet-stream]\n",
+      "Saving to: 'unsw_nb15_binarized.npz'\n",
+      "\n",
+      "unsw_nb15_binarized 100%[===================>]  12.77M  2.17MB/s    in 8.9s    \n",
+      "\n",
+      "2021-02-24 16:57:44 (1.44 MB/s) - 'unsw_nb15_binarized.npz' saved [13391907/13391907]\n",
+      "\n"
      ]
     }
    ],
    "source": [
-    "!pip install --user pandas\n",
-    "!pip install --user scikit-learn\n",
-    "!pip install --user tqdm"
+    "! wget -O unsw_nb15_binarized.npz https://zenodo.org/record/4519767/files/unsw_nb15_binarized.npz?download=1"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can extract the binarized numpy arrays from the .npz archive and wrap them as a PyTorch `TensorDataset` as follows:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Samples in each set: train = 175341, test = 82332\n",
+      "Shape of one input sample: torch.Size([593])\n"
+     ]
+    }
+   ],
    "source": [
-    "import onnx\n",
-    "import torch"
+    "import numpy as np\n",
+    "from torch.utils.data import TensorDataset\n",
+    "\n",
+    "def get_preqnt_dataset(data_dir: str, train: bool):\n",
+    "    unsw_nb15_data = np.load(data_dir + \"/unsw_nb15_binarized.npz\")\n",
+    "    if train:\n",
+    "        partition = \"train\"\n",
+    "    else:\n",
+    "        partition = \"test\"\n",
+    "    part_data = unsw_nb15_data[partition].astype(np.float32)\n",
+    "    part_data = torch.from_numpy(part_data)\n",
+    "    part_data_in = part_data[:, :-1]\n",
+    "    part_data_out = part_data[:, -1]\n",
+    "    return TensorDataset(part_data_in, part_data_out)\n",
+    "\n",
+    "train_quantized_dataset = get_preqnt_dataset(\".\", True)\n",
+    "test_quantized_dataset = get_preqnt_dataset(\".\", False)\n",
+    "\n",
+    "print(\"Samples in each set: train = %d, test = %s\" % (len(train_quantized_dataset), len(test_quantized_dataset))) \n",
+    "print(\"Shape of one input sample: \" +  str(train_quantized_dataset[0][0].shape))"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "**This is important -- always import onnx before torch**. This is a workaround for a [known bug](https://github.com/onnx/onnx/issues/2394)."
+    "## Set up DataLoader\n",
+    "\n",
+    "Following either option, we now have access to the quantized dataset. We will wrap the dataset in a PyTorch `DataLoader` for easier access in batches."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from torch.utils.data import DataLoader, Dataset\n",
+    "\n",
+    "batch_size = 1000\n",
+    "\n",
+    "# dataset loaders\n",
+    "train_quantized_loader = DataLoader(train_quantized_dataset, batch_size=batch_size, shuffle=True)\n",
+    "test_quantized_loader = DataLoader(test_quantized_dataset, batch_size=batch_size, shuffle=False)    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Input shape for 1 batch: torch.Size([1000, 593])\n",
+      "Label shape for 1 batch: torch.Size([1000])\n"
+     ]
+    }
+   ],
+   "source": [
+    "count = 0\n",
+    "for x,y in train_quantized_loader:\n",
+    "    print(\"Input shape for 1 batch: \" + str(x.shape))\n",
+    "    print(\"Label shape for 1 batch: \" + str(y.shape))\n",
+    "    count += 1\n",
+    "    if count == 1:\n",
+    "        break"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Define the Quantized MLP Model <a id='define_quantized_mlp'></a>\n",
+    "# Define the Quantized MLP Model <a id='define_quantized_mlp'></a>\n",
     "\n",
     "We'll now define an MLP model that will be trained to perform inference with quantized weights and activations.\n",
-    "For this, we'll use the quantization-aware training (QAT) capabilities offered by[Brevitas](https://github.com/Xilinx/brevitas).\n",
+    "For this, we'll use the quantization-aware training (QAT) capabilities offered by [Brevitas](https://github.com/Xilinx/brevitas).\n",
     "\n",
     "Our MLP will have four fully-connected (FC) layers in total: three hidden layers with 64 neurons, and a final output layer with a single output, all using 2-bit weights. We'll use 2-bit quantized ReLU activation functions, and apply batch normalization between each FC layer and its activation.\n",
     "\n",
@@ -122,7 +236,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -144,13 +258,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
     "from brevitas.nn import QuantLinear, QuantReLU\n",
     "import torch.nn as nn\n",
     "\n",
+    "# Setting seeds for reproducibility\n",
+    "torch.manual_seed(0)\n",
+    "\n",
     "model = nn.Sequential(\n",
     "      QuantLinear(input_size, hidden1, bias=True, weight_bit_width=weight_bit_width),\n",
     "      nn.BatchNorm1d(hidden1),\n",
@@ -179,80 +296,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Load the UNSW_NB15 Dataset <a id='load_dataset'></a>\n",
-    "\n",
-    "### Dataset Quantization <a id='dataset_qnt'></a>\n",
-    "\n",
-    "The goal of this notebook is to train a Quantized Neural Network (QNN) to be later deployed as an FPGA accelerator generated by the FINN compiler. Although we can choose a variety of different precisions for the input, [Murovic and Trost](https://ev.fe.uni-lj.si/1-2-2019/Murovic.pdf) have previously shown we can actually binarize the inputs and still get good (90%+) accuracy.\n",
-    "Thus, we will create a binarized representation for the dataset by following the procedure defined by [Murovic and Trost](https://ev.fe.uni-lj.si/1-2-2019/Murovic.pdf), which we repeat briefly here:\n",
-    "\n",
-    "* Original features have different formats ranging from integers, floating numbers to strings.\n",
-    "* Integers, which for example represent a packet lifetime, are binarized with as many bits as to include the maximum value. \n",
-    "* Another case is with features formatted as strings (protocols), which are binarized by simply counting the number of all different strings for each feature and coding them in the appropriate number of bits.\n",
-    "* Floating-point numbers are reformatted into fixed-point representation.\n",
-    "* In the end, each sample is transformed into a 593-bit wide binary vector. \n",
-    "* All vectors are labeled as bad (0) or normal (1)\n",
-    "\n",
-    "Following their open-source implementation provided as a Matlab script [here](https://github.com/TadejMurovic/BNN_Deployment/blob/master/cybersecurity_dataset_unswb15.m), we've created a [Python version](dataloader_quantized.py).\n",
-    "This `UNSW_NB15_quantized` class implements a PyTorch `DataLoader`, which represents a Python iterable over a dataset. This is useful because enables access to data in batches."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Download the training and test set from the [official website](https://www.unsw.adfa.edu.au/unsw-canberra-cyber/cybersecurity/ADFA-NB15-Datasets/) - uncomment the following lines to download:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#! wget https://www.unsw.adfa.edu.au/unsw-canberra-cyber/cybersecurity/ADFA-NB15-Datasets/a%20part%20of%20training%20and%20testing%20set/UNSW_NB15_training-set.csv\n",
-    "#! wget https://www.unsw.adfa.edu.au/unsw-canberra-cyber/cybersecurity/ADFA-NB15-Datasets/a%20part%20of%20training%20and%20testing%20set/UNSW_NB15_testing-set.csv"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from torch.utils.data import DataLoader, Dataset\n",
-    "from dataloader_quantized import UNSW_NB15_quantized\n",
-    "\n",
-    "file_path_train = \"UNSW_NB15_training-set.csv\"\n",
-    "file_path_test = \"UNSW_NB15_testing-set.csv\"\n",
-    "\n",
-    "train_quantized_dataset = UNSW_NB15_quantized(file_path_train = file_path_train, \\\n",
-    "                                              file_path_test = file_path_test, \\\n",
-    "                                              train=True)\n",
-    "\n",
-    "test_quantized_dataset = UNSW_NB15_quantized(file_path_train = file_path_train, \\\n",
-    "                                              file_path_test = file_path_test, \\\n",
-    "                                              train=False)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "batch_size = 1000\n",
-    "\n",
-    "# dataset loaders\n",
-    "train_quantized_loader = DataLoader(train_quantized_dataset, batch_size=batch_size, shuffle=True)\n",
-    "test_quantized_loader = DataLoader(test_quantized_dataset, batch_size=batch_size, shuffle=True)    "
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Define Train and Test  Methods  <a id='train_test'></a>\n",
+    "# Define Train and Test  Methods  <a id='train_test'></a>\n",
     "The train and test methods will use a `DataLoader`, which feeds the model with a new predefined batch of training data in each iteration, until the entire training data is fed to the model. Each repetition of this process is called an `epoch`."
    ]
   },
@@ -319,7 +363,16 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## (Option 1) Train the Model from Scratch <a id=\"train_scratch\"></a>\n"
+    "# Train the QNN <a id=\"train_qnn\"></a>\n",
+    "\n",
+    "We provide two options for training below: you can opt for training the model from scratch (slower) or use a pre-trained model (faster). The first option will give more insight into how the training process works, while the second option will likely give better accuracy."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## (Option 1, slower) Train the Model from Scratch <a id=\"train_scratch\"></a>\n"
    ]
   },
   {
@@ -335,7 +388,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "num_epochs = 5\n",
+    "num_epochs = 10\n",
     "lr = 0.001 \n",
     "\n",
     "def display_loss_plot(losses, title=\"Training loss\", xlabel=\"Iterations\", ylabel=\"Loss\"):\n",
@@ -360,16 +413,28 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 12,
    "metadata": {
     "scrolled": true
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training loss = 0.132480 test accuracy = 0.797989: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 10/10 [00:58<00:00,  5.70s/it]\n"
+     ]
+    }
+   ],
    "source": [
     "import numpy as np\n",
     "from sklearn.metrics import accuracy_score\n",
     "from tqdm import tqdm, trange\n",
     "\n",
+    "# Setting seeds for reproducibility\n",
+    "torch.manual_seed(0)\n",
+    "np.random.seed(0)\n",
+    "\n",
     "running_loss = []\n",
     "running_test_acc = []\n",
     "t = trange(num_epochs, desc=\"Training loss\", leave=True)\n",
@@ -385,12 +450,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 13,
    "metadata": {
     "scrolled": true
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEWCAYAAAB8LwAVAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAAjcUlEQVR4nO3de5QcZ33m8e/TPTfNrceyRrdu2ZJBMsiewQZhwhIICRBsQmwSjokJIWQ3OQ5ZHEggm5gkBxJnOccLWTbZXbOLN5jsbiCObQirJAYTwj0JoDG+yJKxLcsXjWRdLWlGl7n/9o+uGfeMeqSRND3VPf18zukzVW9Vdf+mj6RHVW+9bykiMDMzmymTdgFmZladHBBmZlaWA8LMzMpyQJiZWVkOCDMzK8sBYWZmZTkgzGYh6cuS3jPf+55lDa+X1D/f72s2Fw1pF2A2nyQdK1ltBYaB8WT91yPic3N9r4i4phL7mtUKB4QtKhHRPrks6Wng1yLiazP3k9QQEWMLWZtZrfElJqsLk5dqJP2epL3AZyVdIOnvJR2QdDhZLpQc801Jv5Ys/4qk70r602TfpyRdc477rpP0bUmDkr4m6TZJfzXH3+OlyWcdkbRN0rUl294iaXvyvrsl/U7Sviz53Y5Iel7SdyT5776dkf+QWD1ZCSwFLgZupPjn/7PJ+kXASeC/n+b4VwGPAcuAjwOfkaRz2PfzwA+AC4E/At49l+IlNQJ/B3wVWA78JvA5SZcmu3yG4mW0DuBy4OtJ+4eAfqAbWAH8PuA5duyMHBBWTyaAj0bEcEScjIhDEfGFiDgREYPAx4CfOM3xz0TE/4qIceB/A6so/oM7530lXQS8EvhIRIxExHeBzXOs/8eAduDW5NivA38PvDPZPgpslNQZEYcj4ocl7auAiyNiNCK+E56EzebAAWH15EBEDE2uSGqV9GlJz0gaAL4NdEnKznL83smFiDiRLLaf5b6rgedL2gB2zbH+1cCuiJgoaXsGyCfLbwfeAjwj6VuSXp20fwLYAXxV0k5JN8/x86zOOSCsnsz8X/OHgEuBV0VEJ/C6pH22y0bz4TlgqaTWkrY1czx2D7BmRv/BRcBugIjYEhHXUbz89CXgrqR9MCI+FBGXANcCH5T0hvP7NaweOCCsnnVQ7Hc4Imkp8NFKf2BEPAP0AX8kqSn5X/7PzvHw7wMngN+V1Cjp9cmxdybv9S5JuYgYBQYoXlJD0lslvTjpAzlK8bbfibKfYFbCAWH17M+AJcBB4HvAVxboc98FvBo4BPxH4G8ojtc4rYgYoRgI11Cs+VPAL0fEj5Jd3g08nVwue2/yOQDrga8Bx4B/BT4VEd+Yt9/GFi25r8osXZL+BvhRRFT8DMbsbPgMwmyBSXqlpBdJyki6GriOYp+BWVXxSGqzhbcS+CLFcRD9wG9ExAPplmR2Kl9iMjOzsnyJyczMylo0l5iWLVsWa9euTbsMM7Oacv/99x+MiO5y2xZNQKxdu5a+vr60yzAzqymSnpltmy8xmZlZWQ4IMzMrywFhZmZlVTQgJF0t6TFJO043g6Skt0sKSZtK2j6cHPeYpDdXsk4zMztVxTqpkymTbwPeRHEw0BZJmyNi+4z9OoAPUJyIbLJtI3ADcBnFKY6/JmlDMre+mZktgEqeQVwF7IiInckkY3dSnFJgpj8B/hMwVNJ2HXBn8mCXpyjOZX9VBWs1M7MZKhkQeaY/CKWfFx5sAoCklwNrIuIfzvbY5PgbJfVJ6jtw4MD8VG1mZkCKndTJQ08+SfGhLeckIm6PiE0Rsam7u+w4jzM6cmKEP//aE2ztP3quZZiZLUqVHCi3m+lPyiokbZMmH6z+zeRZ7iuBzZKuncOx8yabEf/la4+TEfQUcpX4CDOzmlTJM4gtwHpJ6yQ1Uex0nno4e0QcjYhlEbE2ItZSfGDLtRHRl+x3g6RmSesoPvDkB5UosqOlkUu629i622cQZmalKnYGERFjkm4C7gOywB0RsU3SLUBfRGw+zbHbJN0FbAfGgPdV8g6m3nyO7z/1fKXe3sysJlV0LqaIuBe4d0bbR2bZ9/Uz1j8GfKxixZW4PJ/jSw/uYf/gEMs7WhbiI83Mqp5HUgO9hS4AHvFlJjOzKQ4I4LLVnUjwsO9kMjOb4oAA2pobeHF3u88gzMxKOCASPYWczyDMzEo4IBI9+Rz7B4fZNzB05p3NzOqAAyLRmwyS81mEmVmRAyKxcVWOjGBr/5G0SzEzqwoOiMSSpiwbVnR4RLWZWcIBUaInn2Pr7qNERNqlmJmlzgFRoqeQ4+CxEZ476o5qMzMHRImevDuqzcwmOSBKvHRVJw0ZsXX3kbRLMTNLnQOiREvjZEf1QNqlmJmlzgExQ08+x9b+I+6oNrO654CYoaeQ4/CJUfoPn0y7FDOzVDkgZpgcUe3xEGZW7xwQM1y6soPGrHwnk5nVPQfEDM0NWV6ystNTf5tZ3XNAlHF5PsfD7qg2szrngCijt5BjYGiMZ58/kXYpZmapcUCU4RHVZmYOiLI2rOigqSHjfggzq2sOiDKaGjK8dFWnzyDMrK45IGbRky/eyTQx4Y5qM6tPDohZ9Oa7GBwe4+lDx9MuxcwsFQ6IWfR4RLWZ1TkHxCzWL2+nuSHDVvdDmFmdckDMoiGbYePqTh72GYSZ1amKBoSkqyU9JmmHpJvLbH+vpK2SHpT0XUkbk/a1kk4m7Q9K+p+VrHM2vfkc23YfZdwd1WZWhyoWEJKywG3ANcBG4J2TAVDi8xHRExFXAB8HPlmy7cmIuCJ5vbdSdZ5OT6GL4yPjPHXwWBofb2aWqkqeQVwF7IiInRExAtwJXFe6Q0SUPrqtDaiq/6pPTv3t8RBmVo8qGRB5YFfJen/SNo2k90l6kuIZxPtLNq2T9ICkb0l6bbkPkHSjpD5JfQcOHJjP2gF4UXc7SxqzvpPJzOpS6p3UEXFbRLwI+D3gD5Pm54CLIuJK4IPA5yV1ljn29ojYFBGburu75722bEZctrrTdzKZWV2qZEDsBtaUrBeSttncCbwNICKGI+JQsnw/8CSwoTJlnl5PIce2PQOMjU+k8fFmZqmpZEBsAdZLWiepCbgB2Fy6g6T1Jas/AzyRtHcnndxIugRYD+ysYK2z6i3kODk6zpMHPKLazOpLQ6XeOCLGJN0E3AdkgTsiYpukW4C+iNgM3CTpjcAocBh4T3L464BbJI0CE8B7I+L5StV6Oj35LgAe7j/CpSs70ijBzCwVFQsIgIi4F7h3RttHSpY/MMtxXwC+UMna5uqSZW20NWV5ZPdRrt+05swHmJktEql3Ule7TEZcls95RLWZ1R0HxBz05nNs3zPAqDuqzayOOCDmoKeQY3hsgif2eUS1mdUPB8Qc9Ba6ANi6+0iqdZiZLSQHxBxcvLSVjpYGj6g2s7rigJiDTEZcvjrnEdVmVlccEHPUW8jx6HODjIy5o9rM6oMDYo56CjlGxid4fN9g2qWYmS0IB8Qc9SYjqt0PYWb1wgExR2uWLiG3pNHPhjCzuuGAmCNJ9ORzvtXVzOqGA+Is9BRyPLZ3kKHR8bRLMTOrOAfEWejN5xgdDx7b645qM1v8HBBnoSd5RrU7qs2sHjggzkK+awkXtDZ6wJyZ1QUHxFmQRE+hy1N/m1ldcECcpd58jsf3uaPazBY/B8RZ6inkGJ8Itj83kHYpZmYV5YA4S71JR/UjvsxkZoucA+IsrexsYVl7k0dUm9mi54A4S1Mjqh0QZrbIOSDOQU+hiyf2D3JiZCztUszMKsYBcQ568zkmArbvcUe1mS1eDohz4BHVZlYPHBDnYEVnC8s7mt0PYWaLmgPiHPUWch5RbWaLmgPiHPXku3jywDGODbuj2swWJwfEOeot5IiAbT6LMLNFqqIBIelqSY9J2iHp5jLb3ytpq6QHJX1X0saSbR9OjntM0psrWee5uDzvjmozW9wqFhCSssBtwDXARuCdpQGQ+HxE9ETEFcDHgU8mx24EbgAuA64GPpW8X9Xo7mhmVa7FAWFmi1YlzyCuAnZExM6IGAHuBK4r3SEiSgcStAGRLF8H3BkRwxHxFLAjeb+q4hHVZraYVTIg8sCukvX+pG0aSe+T9CTFM4j3n82xaest5Nh58DgDQ6Npl2JmNu9S76SOiNsi4kXA7wF/eDbHSrpRUp+kvgMHDlSmwNPoKXQBntnVzBanSgbEbmBNyXohaZvNncDbzubYiLg9IjZFxKbu7u7zq/Yc9OQ99beZLV6VDIgtwHpJ6yQ1Uex03ly6g6T1Jas/AzyRLG8GbpDULGkdsB74QQVrPSdL25rIdy3x1N9mtig1VOqNI2JM0k3AfUAWuCMitkm6BeiLiM3ATZLeCIwCh4H3JMduk3QXsB0YA94XEVX5jM/eQs53MpnZolSxgACIiHuBe2e0faRk+QOnOfZjwMcqV9386Cnk+PIjezl6YpRca2Pa5ZiZzZvUO6lr3VQ/xB6fRZjZ4uKAOE+TAeF+CDNbbBwQ56mrtYmLlraydfeRtEsxM5tXDoh50FPI+QzCzBYdB8Q86M3n6D98ksPHR9Iuxcxs3jgg5kGPZ3Y1s0XIATEPLnNAmNki5ICYB7kljaxb1sbD/UfSLsXMbN44IOaJp/42s8XGATFPegs59hwd4uCx4bRLMTObFw6IeeJHkJrZYuOAmCeXre5EwpeZzGzRcEDMk46WRi5Z1uYBc2a2aMwpICS1ScokyxskXSvJU5fO0Fvo8pQbZrZozPUM4ttAi6Q88FXg3cBfVqqoWtWTz7FvYJj9A0Npl2Jmdt7mGhCKiBPAzwOfiojrgcsqV1Zt6im4o9rMFo85B4SkVwPvAv4hactWpqTatXFVJxl56m8zWxzmGhC/BXwY+NvkcaCXAN+oWFU1qq25gRcvb/cZhJktCnN65GhEfAv4FkDSWX0wIt5fycJqVU++i289foCIQFLa5ZiZnbO53sX0eUmdktqAR4Dtkv5DZUurTT35Tg4eG2avO6rNrMbN9RLTxogYAN4GfBlYR/FOJpuhp9AFeMCcmdW+uQZEYzLu4W3A5ogYBaJiVdWwjas6yWbkfggzq3lzDYhPA08DbcC3JV0MDFSqqFq2pCnL+uXtvpPJzGrenAIiIv5rROQj4i1R9AzwkxWurWb1FnJs3X2UCJ9kmVntmmsndU7SJyX1Ja//TPFswsroyed4/vgIe466o9rMatdcLzHdAQwC70heA8BnK1VUrXuho/pIqnWYmZ2PuQbEiyLioxGxM3n9MXBJJQurZS9Z2UFDRu6HMLOaNteAOCnpxydXJL0GOFmZkmpfS2OWS1d2+E4mM6tpcxpJDbwX+D+Scsn6YeA9lSlpcegt5Lh3616PqDazmjXXu5geioiXAb1Ab0RcCfzUmY6TdLWkxyTtkHRzme0flLRd0sOS/im5fXZy27ikB5PX5rP4narC5fkcR0+O0n/YJ1pmVpvO6olyETGQjKgG+ODp9pWUBW4DrgE2Au+UtHHGbg8AmyKiF7gH+HjJtpMRcUXyuvZs6qwGvfkuwDO7mlntOp9Hjp7puslVwI6kU3sEuBO4rnSHiPhG8pwJgO8BhfOop6psWNlOUzbDw37CnJnVqPMJiDONAssDu0rW+5O22fwqxXmeJrUkYy6+J+lt5Q6QdOPk2IwDBw7MpeYF09yQ5SWrOjwnk5nVrNN2UksapHwQCFgyX0VI+iVgE/ATJc0XR8Tu5NkTX5e0NSKeLD0uIm4HbgfYtGlT1Q1bvjyf4+8e2uOOajOrSac9g4iIjojoLPPqiIgz3QG1G1hTsl5I2qaR9EbgD4BrI2K45LN3Jz93At8ErpzTb1RFevM5BofGeObQiTPvbGZWZc7nEtOZbAHWS1onqQm4AZh2N5KkKylOBHhtROwvab9AUnOyvAx4DbC9grVWxOQzqh/2eAgzq0EVC4iIGANuAu4DHgXuSh5XeoukybuSPgG0A3fPuJ31pUCfpIcoPtr01oiouYDYsKKDpoaMp9wws5o014Fy5yQi7gXundH2kZLlN85y3L8APZWsbSE0ZjNsXNXpW13NrCZV8hKTUZzZddueASYmqq4P3czstBwQFdZTyHFseIynDh1PuxQzs7PigKiw3qSj2uMhzKzWOCAq7MXd7bQ0ZtwPYWY1xwFRYQ3ZDJetzrHVU26YWY1xQCyAnnyOR3YPMO6OajOrIQ6IBdCTz3FydJydB46lXYqZ2Zw5IBbAZEe1+yHMrJY4IBbAJd3ttDZl/QhSM6spDogFkM2Iy1Z38rCn3DCzGuKAWCA9+S62PzfA2PhE2qWYmc2JA2KB9BZyDI1OsMMd1WZWIxwQC6THHdVmVmMcEAtk3YVttDc3eMoNM6sZDogFkpnsqPadTGZWIxwQC6i3kOPR5wYYdUe1mdUAB8QC6il0MTI2weP7BtMuxczsjBwQC6g376m/zax2OCAW0MUXttLR0uB+CDOrCQ6IBSSJnnzOZxBmVhMcEAusp5DjR3sHGB4bT7sUM7PTckAssN58F6PjweN7PaLazKqbA2KBTU397SfMmVmVc0AssMIFS+hqbXQ/hJlVPQfEApvsqPacTGZW7RwQKejJ53h83yBDo+6oNrPq5YBIQW8hx9hE8KO9HlFtZtXLAZGCnkIXAFv9hDkzq2IVDQhJV0t6TNIOSTeX2f5BSdslPSzpnyRdXLLtPZKeSF7vqWSdC211roWlbU3uhzCzqlaxgJCUBW4DrgE2Au+UtHHGbg8AmyKiF7gH+Hhy7FLgo8CrgKuAj0q6oFK1LrSpEdWecsPMqlglzyCuAnZExM6IGAHuBK4r3SEivhERJ5LV7wGFZPnNwD9GxPMRcRj4R+DqCta64HoLOZ7Yf4yTI+6oNrPqVMmAyAO7Stb7k7bZ/Crw5bM5VtKNkvok9R04cOA8y11YPfkc4xPB9ucG0i7FzKysquiklvRLwCbgE2dzXETcHhGbImJTd3d3ZYqrkF53VJtZlatkQOwG1pSsF5K2aSS9EfgD4NqIGD6bY2vZis5mlrU3e+pvM6talQyILcB6SeskNQE3AJtLd5B0JfBpiuGwv2TTfcBPS7og6Zz+6aRt0ZBEb8FTf5tZ9apYQETEGHATxX/YHwXuiohtkm6RdG2y2yeAduBuSQ9K2pwc+zzwJxRDZgtwS9K2qPTkczx54BjHh8fSLsXM7BQNlXzziLgXuHdG20dKlt94mmPvAO6oXHXp6y3kmAjY/twAr1y7NO1yzMymqYpO6nrVkzyj2gPmzKwaOSBStLyzhRWdzb6TycyqkgMiZT35Lo+oNrOq5IBIWW8hx86DxxkcGk27FDOzaRwQKesp5IiAbXs8otrMqosDImWTHdUeD2Fm1cYBkbJl7c2szrV4RLWZVR0HRBXoKeR4xAFhZlXGAVEFegtdPHXwOEdPuqPazKqHA6IKTPZDbPNZhJlVEQdEFZgaUe2AMLMq4oCoAhe0NVG4YInvZDKzquKAqBK9BT+j2syqiwOiSvTku3j2+RPsHxhKuxQzM8ABUTVe/aILkeAnPvFNPnjXg3xv5yEiIu2yzKyOVfR5EDZ3V6zp4kv//jXcuWUXf/fQHr74w91cfGEr17+iwNtfUWBVbknaJZpZndFi+V/qpk2boq+vL+0y5sXJkXG+su057trSz7/uPIQEr13fzTs2FXjTxhU0N2TTLtHMFglJ90fEprLbHBDV7dlDJ7jn/l3cc38/e44O0dXayHUvW831m9ZweXJ7rJnZuXJALALjE8G/PHmQu/r6uW/bXkbGJti4qpN3bCpw3RV5LmhrSrtEM6tBDohF5siJETY/tIe7+/rZuvsoTdkMb9q4gus3FXjt+m6yGaVdopnVCAfEIrZ9zwB337+LLz2wm8MnRlnZ2cLbX5Hn+lesYe2ytrTLM7Mq54CoA8Nj4/zTo/u5u28X33r8ABMBV61byjs2reEtPStpbfINa2Z2KgdEndl7dIgv/LCfu/t28fShE7Q1ZXlr72re8coCL7/oAiRfgjKzIgdEnYoI+p45zF1bdvEPW5/jxMg4l3S3cf0r1vD2l+dZ3tmSdolmljIHhHFseIx7H36Ou+/fxZanD5PNiNdv6Ob6TQV+6iUraGrwoHqzeuSAsGl2HjjG3ff384X7+9k/OMzStiZ+7so879i0hktXdqRdnpktIAeElTU2PsF3njjIXX27+Nqj+xgdD3oLOa67Is/lqzvZsKLD4yvMFjkHhJ3RoWPDfOnBPdzdt4sf7R2cau/uaGbDinbWL+/g0pUdxeUVHXS2NKZYrZnNFweEzVlEsOfoEI/vG+SJfYM8vu9YsnyMk6PjU/utyrWwfkUHG5a3s2FFBxtWdrB+eTttzb6d1qyWnC4gKvq3WdLVwJ8DWeAvIuLWGdtfB/wZ0AvcEBH3lGwbB7Ymq89GxLWVrNWKJJHvWkK+awk/eenyqfaJiWD3kZM8vm+Qx5LAeHzfIP935yGGxyam9st3LWHDinY2rOxgw/IONqzo4MXL21nS5AkGzWpNxQJCUha4DXgT0A9skbQ5IraX7PYs8CvA75R5i5MRcUWl6rOzk8mINUtbWbO0lTe8dMVU+/hE8OzzJ0454/jnHYcYGS8GhwQXLW1l/fLiJapLV3awfnkHl3S30dLo4DCrVpU8g7gK2BEROwEk3QlcB0wFREQ8nWybKPcGVv2yGbFuWRvrlrXx5stWTrWPjU/w9KET00Lj8X2DfPOx/YxNFC9rZgRrL2wrXqJK+jY2rOhg3bI233ZrVgUqGRB5YFfJej/wqrM4vkVSHzAG3BoRX5q5g6QbgRsBLrroonOv1OZdQzbDi5e38+Ll7VzT80L7yNgETx86zmN7S8449g/y1e17SXKDhpLQWZlrYUXn5Kt5armzpcEjws0qrJp7FC+OiN2SLgG+LmlrRDxZukNE3A7cDsVO6jSKtLPT1JBJzhimj7cYGh1n54HjPLF/MDnbOMbTh47z/aee5+jJ0VPep6UxUxIcLazoaGZlroXlJcsrOlt8CcvsPFQyIHYDa0rWC0nbnETE7uTnTknfBK4EnjztQVazWhqzbFzdycbVnadsGxodZ//AMHsHhtg37VVs29p/hH8cGGJo9NQrlZ0tDVNhsbyjhZW55pLl4lnJsvZmGrO+pGU2UyUDYguwXtI6isFwA/CLczlQ0gXAiYgYlrQMeA3w8YpValWtpTHLRRe2ctGFrbPuExEMDI2xf2AoCZLhaWGyd2CYHfsPsn9wmPGJ6SebEixrby5ewupoYUWupfizs5kVuRaWdxRDZWlrExk/a8PqSMUCIiLGJN0E3EfxNtc7ImKbpFuAvojYLOmVwN8CFwA/K+mPI+Iy4KXAp5PO6wzFPojts3yUGZLILWkkt6SR9Stmny5kfCI4dHy4eEZydIh9g0mYJMt7jg7xwK4jPH985JRjGzKiu6OZ5R3NdCcBsryjheWdzS8sdzRzYXuzH9pki4IHypmVMTxWvKy1b2CI/YPD7B8YYt9gMVj2Dw5N/Tx84tT+kUxyRrI8OSNZXhIky0uCZVl7Ew2+tGUpS22gnFmtam7ITo37OJ3hsXEODA4nITI9PPYNDLPn6BAP9R/h4LFTz0gkuLCteEYyPUyaWd7ZMvWzu73Zt/1aKhwQZuehuSFL4YJWChecPkhGxyc4eGx4+llJcmZSXB5i254BDh0bZqLMSf3Stia6WouX0DpbGulc0khnS0Nxfaqt4ZTtnUsa3QFv58wBYbYAGrMZVuWWsCq35LT7jU8Eh44Vw+OFy1vD7Bsc4uiJUQaGRjlyYoRnnz/B0ZOjDJwcnRp4OJvWpuysATIzYCaXJ/fraGlwx3wdc0CYVZFsRsXLS50tXJ7PnXH/iODk6DgDJ8cYGBqdCo2BodFi28mkbXJ9aJS9A0M8vn9wav103ZAStDc3TAuVxmyGTEY0ZERGIpuBhkyxLSvIZjJkM8XfJZsRWSnZJrLZ5OeMbQ3JekaiITv5vi/sM7Wc7NOYFUsas7Q0ZWltytLa2MCSpmzx1Zj1TQLzxAFhVsMk0drUQGtTcbzH2ZqYCI6PjCXBMpYEyWSojE0Lm6MnRxkcGuXk6DjjEzH1mohgbCKYmAjGIxgbL7ZN7VO6XLJ/Je+PaW7IsKQpS2tjdio4JkOkNQmR6csNxeVkffpycVtL4wv718tZlQPCrI5lMqKjpZGOlsbizeYLKJLgGJsZMmcIlvGJYgidHB3n5Mg4J0fHOTEyzsmRsZLl5Ofo5HJx25ETI+w5Utw2NPrCPmerpTFTDJbGYnA0NWRobszSnM3Q3JihuSFDc0O2+LOxZHlyv4aSfU67//TtTQ2ZBT07ckCYWSqUXE5qSHk2lImJYHhsghMjY9OCoxgeY5wcmZgKmFODZ5yR8QmGR8cZHptgeGycY8NjHDpWXC62lW4//3lJG7M6JXAuz+f4b++8ch6+jekcEGZW1zIZTV2GurDCnxURjI7HrOExbXlsnOHRkuWxiWR9/JTta5ae/uaHc+WAMDNbIJJoahBNDRlmH+9fPXyDtJmZleWAMDOzshwQZmZWlgPCzMzKckCYmVlZDggzMyvLAWFmZmU5IMzMrKxF80Q5SQeAZ87jLZYBB+epnFrn72I6fx/T+ft4wWL4Li6OiO5yGxZNQJwvSX2zPXav3vi7mM7fx3T+Pl6w2L8LX2IyM7OyHBBmZlaWA+IFt6ddQBXxdzGdv4/p/H28YFF/F+6DMDOzsnwGYWZmZTkgzMysrLoPCElXS3pM0g5JN6ddT5okrZH0DUnbJW2T9IG0a0qbpKykByT9fdq1pE1Sl6R7JP1I0qOSXp12TWmS9NvJ35NHJP21pJa0a5pvdR0QkrLAbcA1wEbgnZI2pltVqsaAD0XERuDHgPfV+fcB8AHg0bSLqBJ/DnwlIl4CvIw6/l4k5YH3A5si4nIgC9yQblXzr64DArgK2BEROyNiBLgTuC7lmlITEc9FxA+T5UGK/wDk060qPZIKwM8Af5F2LWmTlANeB3wGICJGIuJIqkWlrwFYIqkBaAX2pFzPvKv3gMgDu0rW+6njfxBLSVoLXAl8P+VS0vRnwO8CEynXUQ3WAQeAzyaX3P5CUlvaRaUlInYDfwo8CzwHHI2Ir6Zb1fyr94CwMiS1A18AfisiBtKuJw2S3grsj4j7066lSjQALwf+R0RcCRwH6rbPTtIFFK82rANWA22SfindquZfvQfEbmBNyXohaatbkhophsPnIuKLadeTotcA10p6muKlx5+S9FfplpSqfqA/IibPKO+hGBj16o3AUxFxICJGgS8C/yblmuZdvQfEFmC9pHWSmih2Mm1OuabUSBLFa8yPRsQn064nTRHx4YgoRMRain8uvh4Ri+5/iHMVEXuBXZIuTZreAGxPsaS0PQv8mKTW5O/NG1iEnfYNaReQpogYk3QTcB/FuxDuiIhtKZeVptcA7wa2Snowafv9iLg3vZKsivwm8LnkP1M7gX+bcj2piYjvS7oH+CHFu/8eYBFOu+GpNszMrKx6v8RkZmazcECYmVlZDggzMyvLAWFmZmU5IMzMrCwHhFlC0rHk51pJvzjP7/37M9b/ZT7f36wSHBBmp1oLnFVAJBO2nc60gIiIRTfq1hYfB4TZqW4FXivpwWTO/6ykT0jaIulhSb8OIOn1kr4jaTPJqGJJX5J0f/KcgBuTtlspzvr5oKTPJW2TZytK3vsRSVsl/ULJe3+z5PkLn0tG7CLp1uSZHQ9L+tMF/3asbtT1SGqzWdwM/E5EvBUg+Yf+aES8UlIz8M+SJmfufDlweUQ8laz/u4h4XtISYIukL0TEzZJuiogrynzWzwNXUHy+wrLkmG8n264ELqM4jfQ/A6+R9Cjwc8BLIiIkdc3vr272Ap9BmJ3ZTwO/nEw/8n3gQmB9su0HJeEA8H5JDwHfozgR5HpO78eBv46I8YjYB3wLeGXJe/dHxATwIMVLX0eBIeAzkn4eOHGev5vZrBwQZmcm4Dcj4orkta5k7v/jUztJr6c4y+erI+JlFOfnOZ/HUA6XLI8DDRExRvFBV/cAbwW+ch7vb3ZaDgizUw0CHSXr9wG/kUyFjqQNszwsJwccjogTkl5C8bGtk0Ynj5/hO8AvJP0c3RSf2vaD2QpLntWRSyZQ/G2Kl6bMKsJ9EGanehgYTy4V/SXFZzGvBX6YdBQfAN5W5rivAO9N+gkeo3iZadLtwMOSfhgR7ypp/1vg1cBDQAC/GxF7k4AppwP4f5JaKJ7ZfPCcfkOzOfBsrmZmVpYvMZmZWVkOCDMzK8sBYWZmZTkgzMysLAeEmZmV5YAwM7OyHBBmZlbW/wc3oVuWUfor/QAAAABJRU5ErkJggg==\n",
+      "text/plain": [
+       "<Figure size 432x288 with 1 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
    "source": [
+    "%matplotlib inline\n",
     "import matplotlib.pyplot as plt\n",
     "\n",
     "loss_per_epoch = [np.mean(loss_per_epoch) for loss_per_epoch in running_loss]\n",
@@ -399,25 +478,69 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 14,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEWCAYAAAB8LwAVAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAAo3UlEQVR4nO3deXxddZ3/8dcnW5OmbbokXUiXtHRnLYRSBCq0LBWQVaEgiz8XRkeY0XEZdNRxYFTGUdFRRgcRF0CQRZjKIFvCJhZoS8vSJt2XJKVJuiZps+fz++Oe0tv2Jr1tc3PuTd7Px+M+mrPdfO5Fzzvn+z3n+zV3R0RE5EBpYRcgIiLJSQEhIiIxKSBERCQmBYSIiMSkgBARkZgUECIiEpMCQkREYlJASMozs4aoV4eZNUYtf+II3u8lM/tMImoVSSUZYRcgcrTcfcDen81sA/AZd38hvIoSy8wy3L0t7Dqk99MVhPRaZpZmZreZ2Voz22Zmj5jZ0GBbtpk9EKzfaWaLzGyEmX0XOBv4eXAF8vNO3vtRM9tiZrvM7BUzOy5qW46Z/cjMNgbb/2pmOcG2s8zsb8HvrDCzTwbr97tqMbNPmtlfo5bdzL5gZquB1cG6nwbvUWdmS8zs7Kj9083sG8Fnrw+2jzGzu83sRwd8lgVm9qWj/8alt1FASG92K3A58GHgGGAHcHew7SYgDxgDDAM+BzS6+78ArwK3uPsAd7+lk/f+CzAJGA68BTwYte2HwKnAh4ChwNeADjMbFxz3M6AAOBlYdhif53LgdGB6sLwoeI+hwB+AR80sO9j2T8C1wEXAIOBTwB7gd8C1ZpYGYGb5wHnB8SL7UROT9GafI3KirwQws+8Am8zsBqCVSDBMdPd3gCWH88buft/en4P33WFmeUA9kZPxLHevCnb5W7DfdcAL7v5QsH5b8IrX9919e1QND0Rt+5GZfROYArwNfAb4mruvDLa/vfd3mtkuYC7wPDAfeMndqw+jDukjdAUhvdk44ImgOWcnUAa0AyOA+4FngYfNbLOZ/cDMMuN506D55s6g+aYO2BBsyg9e2cDaGIeO6WR9vCoOqOMrZlYWNGPtJHJFlB/H7/odcH3w8/VEvguRgyggpDerAD7i7oOjXtnuXuXure7+b+4+nUhT0CXAjcFxhxri+DrgMiJNM3lAUbDegK1AE3BsJ/XEWg+wG+gftTwyxj4f1BX0N3wNuBoY4u6DgV1BDYf6XQ8Al5nZScA04MlO9pM+TgEhvdkvge8Gbf+YWYGZXRb8fK6ZnWBm6UAdkSanjuC4amBCF+87EGgm0jzUH/je3g3u3gHcB/zYzI4JrjbOMLN+RPopzjOzq80sw8yGmdnJwaHLgCvNrL+ZTQQ+fYjPNhBoA2qBDDP7NpG+hr3uBe4ws0kWcaKZDQtqrCTSf3E/8Li7Nx7id0kfpYCQ3uynwALgOTOrB14n0skLkb/QHyMSDmXAy+xravkp8DEz22Fm/xXjfX8PbASqgBXB+0b7CvAukZPwduA/gDR330Sk0/jLwfplwEnBMXcBLUTC6Xfs3+kdy7PAM8CqoJYm9m+C+jHwCPBc8Bl/DeREbf8dcAJqXpIumCYMEul7zGw2kaamca6TgHRCVxAifUzQGf+PwL0KB+mKAkKkDzGzacBOYBTwk1CLkaSnJiYREYlJVxAiIhJTr3mSOj8/34uKisIuQ0QkpSxZsmSruxfE2tZrAqKoqIjFixeHXYaISEoxs42dbVMTk4iIxKSAEBGRmBQQIiISU0IDwszmmdlKM1tjZrfF2D7WzF40s6Vm9o6ZXRS17evBcSvN7MJE1ikiIgdLWCd1MAja3cD5QCWwyMwWuPuKqN2+CTzi7r8ws+nA00BR8PN84DgiE728YGaT3b09UfWKiMj+EnkFMRNY4+7r3L0FeJjIEMnRnH0jUOYBm4OfLwMedvdmd18PrAneT0REekgiA6KQ/UeXrAzWRfsOcL2ZVRK5erj1MI4VEZEECvs5iGuB37r7j8zsDOB+Mzs+3oPN7GbgZoCxY8cmqEQRkeTS1t5B5Y5G1m/dzbqtu8nJTOe607v/HJjIgKgiMu3hXqODddE+DcwDcPeFwYTr+XEei7vfA9wDUFxcrEGlRKTXcHdqG5pZX7v7gyBYV7ub9Vsb2LR9D63t+055p4wdnHIBsQiYZGbjiZzc5xOZqjHaJiKTp/82GGUym8gMWQuAP5jZj4l0Uk8C3kxgrSIioWhobmPDBwHQwPqtkUBYX7ub+ua2D/bLykijaFh/Jg4fwAXHjWR8fi4T8nOZUDCAIf3jmk79sCUsINy9zcxuITLzVTpwn7svN7PbgcXuvoDIzFq/MrMvEemw/mQwPv1yM3uEyGxdbcAXdAeTiKSq1vYOKrbviVwJ1EbCYP3WSBhU1zV/sJ8ZHJOXw4SCXK48pZDx+bmMLxjAhPxcjhmcQ3qadfFbul+vGe67uLjYNRaTiITF3ampbw4CoGG/pqFN2/fQ3rHvXDs0Nyty8g9exxbkMj5/AOOG9Sc7M71H6zazJe5eHGtb2J3UIiIpoaWtg227m6mt3/favKspuCqIXA3sadnX0JGdmUbRsFymjRrIxSeMCq4GIs1Cg/tnhfhJ4qeAEJE+q73D2bGnZb+Tfm1DM1uDf6PX7dzTetDxaQajh/RnfH4uM8cPZUJ+5EpgfEEuowZlk9bDTULdTQEhIr2Ku1PX1Lbfyb22vpmtDc0HBcG2hmY6YrSy52SmM3xQP/IH9OPYggHMmjCMgoH9Iq8B/ciP+jkro/cOaaeAEJGU0tDcxmtrtlJT10RtQ0vMv/xb2joOOi4z3cgfEDmxj8rL5sTReR+c9PeuLwj+ze2nUyMoIEQkRayqruf+hRv501uV7A7a+s1gWG7WByf4Ywty9zvRf/DvwH7k5WRiltpNPj1NASEiSau1vYPnlldz/+sbeH3ddrLS07jkxFFcfdoYJuTnMjQ3i4z03tvEEzYFhIgknZq6Jv7w5iYeenMT1XXNFA7O4Z/nTeXq4tEMG9Av7PL6DAWEiCQFd+eN9du5f+FGnl2+hbYO58OTC/ju5eM4d+rwHn9ITBQQIhKyhuY2nnirkvtf38iq6gYGZWfwyQ8Vcf2scRTl54ZdXp+mgBCRUKyuruf+1zfyp7eqaGhu4/jCQfzgqhP56EnHkJPVs08TS2wKCBHpMa3tHTy/oprfL9y/0/n6M8YxY8xg3WWUZBQQIpJwNXVNPPRmBX94c+MHnc5fmzeFa4rHqNM5iSkgRCQh3J0312/n969v5Nn3Ip3Os9XpnFIUECLSrRqa23hiaRUPLNzIyup6BmVncFPQ6Txenc4pRQEhIt1iTU3kSefHg07n444ZxH9cdQKXnlSoTucUpYAQSXItbR0sXLeN5tZ2BuVkMig7k0E5GeTlZJKblRHqiKFtH3Q6b2Thum1kpadx8YmjuH7WOE4Zq07nVKeAEElCHR3Okk07eGJpFU+/+37MoaYhMtz0wKjAGJQdeeXlRNZFwqST5exMsjPTjugkvrfT+aE3N7GlronCwTl89cIpXHPaGPLV6dxrKCBEksiamnqeXLqZJ5dVUbmjkZzMdC48bgSXnVxIwcB+1DW1UtfYSl1jG3VNrexqDJab2qhrjCyv29pAXWMbuxpbaWzteqbezHT7IFAG5mQyKDtjvwDZGyqRgMmko8N5/K1Kngk6nc+elM8dlx/PHHU690oKCJGQ1dQ1seDtSCi8V1VHmsHZkwr4ygVTOH/6iKMaerqlrYP6vUESFSKRoDk4ZHY1tlK1o/GD9a3tB0+WsLfT+ROnj2VCwYCj+eiS5BQQIiFoaG7jueVbeGJpFa+t2UqHw4mj8/j2JdO55KRRDB+Y3S2/JysjjWED+h3RswbuTlNrx34h0tjazqnjhtA/S6eOvkD/lUV6SGt7B39dvZUnllbx3IotNLV2MGZoDrecO5HLZhRybJL9NW5m5GSlk5OVzohB3RNYkloUECIJ5O4sq9jJk0ureOqd99m2u4XB/TP52KmjuWJGIaeMHaI7fSRpKSBEEmDD1t08uayKJ5dWsWHbHvplpHHe9BFccXIhsycX9Op5jKX3UECIdJNtDc089c77PLG0imUVOzGDMyYM4+/Pnci840cyKDsz7BJFDosCQuQoNLa083xZNU8ureLlVbW0dzjTRg3iGxdN5dKTChmZp7Z7SV0KCJHD1N7h/G1tpLP52fe2sLulnVF52Xz27AlcPuMYpo4cFHaJIt1CASESB3dn+eY6nlhaxZ/f3kxNfTMDszP46EnHcPmMQmYWDQ11yAuRRFBAiHTh/V2N/OmtKp5YWsWamgYy041zpwznihmFnDt1ONmZGoROei8FhEgMDc1t/PeLa7j3r+tpaetgZtFQvnfFCVx0wkgG988KuzyRHqGAEInS3uE8tqSC/3x2FVsbmrlyRiFfPG8yY4f1D7s0kR6ngBAJLFy7jTueWsGK9+s4ddwQ7r2pmJPHDA67LJHQKCCkz9uwdTffe7qM51ZUUzg4h59dO4NLThylJ5ylz1NASJ9V19TKz0vX8JvX1pOZnsZXL5zCp88ar45nkYACQvqctvYOHl5UwY+fX8WOPS18/NTRfOWCKQzXgHQi+1FASJ/y6upa7nhqBauqGzh9/FC+dcl0ji/MC7sskaSU0IAws3nAT4F04F53v/OA7XcB5waL/YHh7j442NYOvBts2+TulyayVund1tQ08L2nyygtr2Hs0P788vpTuPC4kepnEOlCwgLCzNKBu4HzgUpgkZktcPcVe/dx9y9F7X8rMCPqLRrd/eRE1Sd9w849LfzkhdU88PpGsjPT+fpHpvLJM4vol6F+BpFDSeQVxExgjbuvAzCzh4HLgBWd7H8t8K8JrEf6kNb2Dh58fSN3vbCa+qZW5s8cyz+dP5n8I5hZTaSvSmRAFAIVUcuVwOmxdjSzccB4oDRqdbaZLQbagDvd/ckYx90M3AwwduzY7qlaUpq78+LKGr77f2Wsrd3NWRPz+eYl0zSAnsgRSJZO6vnAY+7eHrVunLtXmdkEoNTM3nX3tdEHufs9wD0AxcXFB8+uLn3Kqup67nhqBa+u3sqE/Fx+fVMxc6YOVz+DyBFKZEBUAWOilkcH62KZD3wheoW7VwX/rjOzl4j0T6w9+FDp67Y1NHPXC6v4wxubGNAvg29dMp0bZo3TrG0iRymRAbEImGRm44kEw3zgugN3MrOpwBBgYdS6IcAed282s3zgTOAHCaxVUlBLWwe/X7iBn5asZk9LOzfMGscXz5vMkFwNpifSHRIWEO7eZma3AM8Suc31Pndfbma3A4vdfUGw63zgYXePbiKaBvyPmXUAaUT6IDrr3JY+xt15bkU133+6jA3b9nDOlAK+efE0Jg4fGHZpIr2K7X9eTl3FxcW+ePHisMuQBFuxuY47nlrBwnXbmDh8AN+8eBrnTBkedlkiKcvMlrh7caxtydJJLdKl2vpmfvTcSv64uILBOZncftlxXDdzLBnp6mcQSRQFhCS1ptZ27nttPf/94lqaWtv59JnjuXXOJPL6Z4Zdmkivp4CQpOTuPP3uFr7/lzIqdzRy3rQRfOOiqUwoGBB2aSJ9hgJCksr23S08ubSKRxZXUL6lnqkjB/LgZ07nzIn5YZcm0ucoICR07R3Oq6treXRxJc+vqKalvYMTR+fxg6tO5KpTR5OepgfdRMKggJDQbNq2h0eXVPDYkkre39XEkP6ZfGLWWK4uHsO0URoaQyRsCgjpUU2t7Tzz3hb+uKiCheu2YQazJxXwzYunc9704RplVSSJKCAk4dydd6t28cdFFSx4ezP1TW2MGZrDl8+fzFWnjuaYwTlhlygiMSggJGEO7HDul5HGRSeM4uPFo5k1fhhp6lsQSWoKCOlWezucH1lcwfMrqmltd04ance/X348Hz3pGPJy9PyCSKpQQEi3iNXhfMOsIq4+bbTmYhBJUQoIOWKNLe08s/x9/riogtfXbSfNYPbkAr59yXTmThuh4bZFUpwCQg6Lu/NO5S4eWVzBgmWbqW9uY+zQ/nzlgkiH86g8dTiL9BYKCInL9t0tPLG0ikeDDufszDQuOn4UHy8ew+njh6rDWaQXUkBIp9o7nFdW1/JodIfzmMF894pIh/OgbHU4i/RmCgg5yMZtu3l0cSWPLalkS10TQ3OzuPGMIq4uHsOUkZqUR6SvUEDIfp55730+98BbpBl8eHIB37l0OnOmqsNZpC9SQMh+HllcSeHgHB7//IcYmZcddjkiEiL9WSgfaGxp57U1W7nguBEKBxFRQMg+f1u7lea2DuZOHRF2KSKSBDptYjKzK+M4vsndn+7GeiREJeU15GalM3P80LBLEZEk0FUfxK+A/wW6usF9NqCA6AXcndKyGmZPLlCHtIgAXQfEX9z9U10dbGYPdHM9EpLlm+vYUtfEnKnDwy5FRJJEp38quvv1hzo4nn0kNZSW12AG50xRQIhIRNxtCWY20cweMLPHzeyMRBYlPa+kvIaTRg+mYGC/sEsRkSTRaUCY2YH3Od4BfB34IvCLBNYkPay2vpm3K3Zy3jRdPYjIPl1dQfzZzG6MWm4FioBxQHsii5Ke9eLKGgDm6PZWEYnSVUDMAwaZ2TNmNhv4CnAhcAXwiZ4oTnpGSVk1o/KymTZK4yyJyD6d3sXk7u3Az83sfuBbwOeBb7r72p4qThKvua2dV1dv5YoZhZhpyG4R2aerB+VOB74KtADfAxqB75pZFXCHu+/skQolod5Yt509Le3MVf+DiBygq+cg/ge4CBgA/MbdzwTmm9mHgT8SaW6SFFdaXkN2ZhofOjY/7FJEJMl0FRBtRDqlc4lcRQDg7i8DLye2LOkJ7k5JeTVnHptPdmZ62OWISJLpqpP6OuAqYA5wYxf7SYpaU9NAxfZG5k7T3UsicrCuOqlXAV/uwVqkh5WU7729Vf0PInKwrh6Ue+pQBx9qHzObZ2YrzWyNmd0WY/tdZrYseK0ys51R224ys9XB66ZD1SKHr6SsmuOOGaS5H0Qkpq76IM4yswVdbDdgeqcbzdKBu4HzgUpgkZktcPcVe/dx9y9F7X8rMCP4eSjwr0Ax4MCS4Ngdh/5IEo8du1tYsnEHt5w7MexSRCRJdRUQl8VxfEsX22YCa9x9HYCZPRy854pO9r+WSChA5A6p5919e3Ds80Qe3HsojpokDi+vqqXDYY76H0SkE131QRztnUqFQEXUciVweqwdzWwcMB4o7eLYwhjH3QzcDDB27NijLLdvKSmvIX9AFicW5oVdiogkqWSZGWY+8Fjw9Hbc3P0edy929+KCgoIEldb7tLZ38PLKGs6dMpy0ND09LSKxJTIgqoAxUcujg3WxzGf/5qPDOVYO05KNO6hratPT0yLSpUMGhJl91MyOJEgWAZPMbLyZZREJgYM6vc1sKjAEWBi1+lngAjMbYmZDgAuCddINSstryEpP46xJuuoSkc7Fc+K/BlhtZj8ITuZxcfc24BYiJ/Yy4BF3X25mt5vZpVG7zgcednePOnY7kfknFgWv2/d2WMvRe6GsmtMnDGVAv67uURCRvu6QZwh3v97MBhG5y+i3ZubAb4CH3L3+EMc+DTx9wLpvH7D8nU6OvQ+471D1yeFZv3U362p3c+OscWGXIiJJLq6mI3evAx4DHgZGEZkT4q3g2QVJIaXlmhxIROITTx/EpWb2BPASkAnMdPePACehoThSTml5NZOGD2DssP5hlyIiSS6eRuirgLvc/ZXole6+x8w+nZiyJBHqm1p5Y912Pn32+LBLEZEUEE9AfAd4f++CmeUAI9x9g7uXJKow6X6vrt5KW4czV81LIhKHePogHgU6opbbg3WSYkrKahjcP5NTxg4OuxQRSQHxBESGu0dPGNQCZCWuJEmE9g7nxZU1nDO5gIz0ZHmAXkSSWTxnitro5xbM7DJga+JKkkRYVrGT7btbNDifiMQtnj6IzwEPmtnPiQzxXYFmmEs5peXVpKcZH9bT0yISp3gelFsLzDKzAcFyQ8Krkm5XUlZD8bgh5PXPDLsUEUkRcY21YGYXA8cB2WaR0T/d/fYE1iXdqGpnI+Vb6vnGRXGPlCIiEteDcr8kMh7TrUSamD4OaJyGFKKnp0XkSMTTSf0hd78R2OHu/wacAUxObFnSnUrKqika1p9jC3LDLkVEUkg8AdEU/LvHzI4BWomMxyQpYE9LG39bu405U0ewt3lQRCQe8fRB/NnMBgP/CbwFOPCrRBYl3ee1NdtoaevQ5EAicti6DIhgoqASd98JPG5mTwHZ7r6rJ4qTo1daXs2AfhmcVjQ07FJEJMV02cTk7h3A3VHLzQqH1OHulJTVMHtyPlkZenpaRA5PPGeNEjO7ytSAnXKWb66jpr5Zdy+JyBGJJyD+jsjgfM1mVmdm9WZWl+C6pBuUlNVgBudM0dPTInL44nmSemBPFCLdr6S8mhljBpM/oF/YpYhICjpkQJjZ7FjrD5xASJJLTV0T71Tu4qsXTgm7FBFJUfHc5vrVqJ+zgZnAEmBOQiqSbvHiyr1PT+v2VhE5MvE0MX00etnMxgA/SVRB0j1Kymo4Ji+bqSPVQigiR+ZI7n2sBKZ1dyHSfZpa2/nrmq3MmTZcT0+LyBGLpw/iZ0SenoZIoJxM5IlqSVJvrN/OnpZ2zT0tIkclnj6IxVE/twEPuftrCapHukFpWTXZmWmcceywsEsRkRQWT0A8BjS5ezuAmaWbWX9335PY0uRIuDsvlNVw1sQCsjPTwy5HRFJYXE9SAzlRyznAC4kpR47WquoGqnY2anA+ETlq8QREdvQ0o8HP/RNXkhyNkvJqAM6dooAQkaMTT0DsNrNT9i6Y2alAY+JKkqNRWlbD8YWDGJmXHXYpIpLi4umD+CLwqJltJjLl6EgiU5BKktm+u4W3Nu3gljmTwi5FRHqBeB6UW2RmU4G9YzasdPfWxJYlR+LlVTV0OMzV09Mi0g0O2cRkZl8Act39PXd/DxhgZn+f+NLkcL1QVkPBwH6cUJgXdiki0gvE0wfx2WBGOQDcfQfw2YRVJEektb2DV1bWMmfKcNLS9PS0iBy9eAIiPXqyIDNLB7ISV5IciUUbtlPf3MYc3d4qIt0knoB4Bvijmc01s7nAQ8G6QzKzeWa20szWmNltnexztZmtMLPlZvaHqPXtZrYseC2I5/f1ZaVlNWSlp3HWxPywSxGRXiKeu5j+GbgZ+Hyw/Dzwq0MdFFxp3A2cT2SAv0VmtsDdV0TtMwn4OnCmu+8ws+g/fxvd/eS4PoVQWl7DrGOHkdsvnv+kIiKHdsgrCHfvcPdfuvvH3P1jwArgZ3G890xgjbuvc/cW4GHgsgP2+Sxwd9CvgbvXHF75ArCutoF1W3fr7iUR6VZxDfdtZjPM7AdmtgG4HSiP47BCoCJquTJYF20yMNnMXjOz181sXtS2bDNbHKy/vJO6bg72WVxbWxvPR+mVSss1OZCIdL9O2yPMbDJwbfDaCvwRMHc/t5t//yTgHGA08IqZnRDcNTXO3avMbAJQambvuvva6IPd/R7gHoDi4mKnjyopq2HyiAGMGaoRUESk+3R1BVFOZFrRS9z9LHf/GdB+GO9dBYyJWh4drItWCSxw91Z3Xw+sIhIYuHtV8O864CVgxmH87j5jV2MrizZsZ+40zf0gIt2rq4C4EngfeNHMfhXcwXQ4N9gvAiaZ2XgzywLmAwfejfQkkasHzCyfSJPTOjMbYmb9otafSaTvQw7w6upa2jpc/Q8i0u06DQh3f9Ld5wNTgReJjMk03Mx+YWYXHOqN3b0NuAV4FigDHnH35WZ2u5ldGuz2LLDNzFYEv+Or7r6NyJSmi83s7WD9ndF3P8k+pWU1DO6fyYyxQ8IuRUR6GXOPv+nezIYAHweucfe5CavqCBQXF/vixYsPvWMv0t7hFP/785wzZTh3XXNy2OWISAoysyXuXhxrW1x3Me3l7jvc/Z5kC4e+alnFDnbsadXdSyKSEIcVEJJcSspqSE8zZk8uCLsUEemFFBAprKSshtOKhpCXkxl2KSLSCykgUlTF9j2srK5n7lTd3ioiiaGASFEvrow8PT1Xo7eKSIIoIFJUSVkN4/NzmVAwIOxSRKSXUkCkoN3NbSxcu013L4lIQikgUtBra7bS0t6hp6dFJKEUECmotLyGgf0yKC4aGnYpItKLKSBSTEeHU1Jew+zJBWRl6D+fiCSOzjAp5r3Nu6itb1b/g4gknAIixZSU1WAG5yogRCTBFBApprS8hlPGDmFoblbYpYhIL6eASCHVdU28W7VLzUsi0iMUECnkxXI9PS0iPUcBkUJeKKuhcHAOU0YMDLsUEekDFBApoqm1ndfWbGXO1OGYHc7MryIiR0YBkSIWrttGY2s7c9S8JCI9RAGRIkrLasjJTOeMCcPCLkVE+ggFRApwd0rLazhrUj7ZmelhlyMifYQCIgWsrK6namejBucTkR6lgEgBJWWR21v19LSI9CQFRAooKavmhMI8RgzKDrsUEelDFBBJbltDM0srdurpaRHpcQqIJPfSylrc9fS0iPQ8BUSSKy2vYfjAfhx/TF7YpYhIH6OASGItbR28sqqWOVOHk5amp6dFpGcpIJLY4g3bqW9uU/+DiIRCAZHESspryMpI48yJ+WGXIiJ9kAIiSbk7JWXVnDFhGLn9MsIuR0T6IAVEklq3dTcbtu3R3UsiEhoFRJIqDZ6eVv+DiIRFAZGkSsqrmTpyIKOH9A+7FBHpoxQQSWhXYyuLNuzQ1YOIhCqhAWFm88xspZmtMbPbOtnnajNbYWbLzewPUetvMrPVweumRNaZbF5ZVUt7h6v/QURClbDbY8wsHbgbOB+oBBaZ2QJ3XxG1zyTg68CZ7r7DzIYH64cC/woUAw4sCY7dkah6k0lpeQ1D+mdy8pghYZciIn1YIq8gZgJr3H2du7cADwOXHbDPZ4G795743b0mWH8h8Ly7bw+2PQ/MS2CtSaOtvYMXV9Zw7pThpOvpaREJUSIDohCoiFquDNZFmwxMNrPXzOx1M5t3GMdiZjeb2WIzW1xbW9uNpYdnacVOdu5p1dzTIhK6sDupM4BJwDnAtcCvzGxwvAe7+z3uXuzuxQUFBYmpsIeVlNWQkWbMntw7Po+IpK5EBkQVMCZqeXSwLlolsMDdW919PbCKSGDEc2yvVFpezczxQxmUnRl2KSLSxyUyIBYBk8xsvJllAfOBBQfs8ySRqwfMLJ9Ik9M64FngAjMbYmZDgAuCdb1axfY9rKpu0O2tIpIUEnYXk7u3mdktRE7s6cB97r7czG4HFrv7AvYFwQqgHfiqu28DMLM7iIQMwO3uvj1RtSaL0vJIH/3caSNCrkREBMzdw66hWxQXF/vixYvDLuOo3PDrN6ja0UjpV84JuxQR6SPMbIm7F8faFnYntQQamtt4Y912NS+JSNLQONJJ4KWVNdzx1Apa2juYd/zIsMsREQEUEKFaW9vAd/+vjNLyGoqG9efeG4spLhoadlkiIoACIhS7Glv5r5LV/O5vG8jOTOcbF03lpg8V0S8jPezSREQ+oIDoQe0dzsOLNvGj51axY08L1xSP4csXTKFgYL+wSxMROYgCoocsXLuN259aQdn7dcwsGsq3Pzqd4wvzwi5LRKRTCogE27RtD997uoxnlm+hcHAOd193ChedMBIzDcQnIslNAZEgDc1t/PeLa7j3r+tJN+PL50/ms7MnkJ2pfgYRSQ0KiG7W0eH8aWkVP3imnJr6Zq6YUcg/z5vKyLzssEsTETksCohutGTjDm7/83LertzFSWMG88sbTuWUsZr0R0RSkwKiG7y/q5E7/1LO/y7bzIhB/fjx1Sdx+cmFpGnCHxFJYQqIo9DY0s49r6zjFy+vocPhlnMn8vlzjiW3n75WEUl9OpMdAXfnqXfe5/tPl7F5VxMXnzCK2z4ylTFD+4ddmohIt1FAHKZ3K3dx+1PLWbRhB9NHDeLH15zMrAnDwi5LRKTbKSDiVFPfxA+fXcmjSyoZ2j+L7195AlcXjyFd/Qwi0kspIA6hua2d37y2gZ+XrqG5rZ3Pnj2BW+ZM1JSgItLrKSA64e48t6Ka7z1dxsZtezhv2nD+5eLpjM/PDbs0EZEeoYCIYeWWem5/ajmvrdnGpOED+P2nZjJ7ckHYZYmI9CgFRJTtu1u46/lVPPjGRgZmZ/Jvlx7HJ04fS0a6Jt4Tkb5HAQG0tndw/8KN/OSFVexuaeeGWeP44nmTGZKbFXZpIiKh6fMBUbF9D5/8zZusrd3N2ZPy+dYl05k8YmDYZYmIhK7PB8SIQdmMG5bL1z8yjbnThmsYbhGRQJ8PiKyMNO775GlhlyEiknTU+yoiIjEpIEREJCYFhIiIxKSAEBGRmBQQIiISkwJCRERiUkCIiEhMCggREYnJ3D3sGrqFmdUCG4/iLfKBrd1UTqrTd7E/fR/70/exT2/4Lsa5e8zhqntNQBwtM1vs7sVh15EM9F3sT9/H/vR97NPbvws1MYmISEwKCBERiUkBsc89YReQRPRd7E/fx/70fezTq78L9UGIiEhMuoIQEZGYFBAiIhJTnw8IM5tnZivNbI2Z3RZ2PWEyszFm9qKZrTCz5Wb2j2HXFDYzSzezpWb2VNi1hM3MBpvZY2ZWbmZlZnZG2DWFycy+FPz/5D0ze8jMssOuqbv16YAws3TgbuAjwHTgWjObHm5VoWoDvuzu04FZwBf6+PcB8I9AWdhFJImfAs+4+1TgJPrw92JmhcA/AMXufjyQDswPt6ru16cDApgJrHH3de7eAjwMXBZyTaFx9/fd/a3g53oiJ4DCcKsKj5mNBi4G7g27lrCZWR4wG/g1gLu3uPvOUIsKXwaQY2YZQH9gc8j1dLu+HhCFQEXUciV9+IQYzcyKgBnAGyGXEqafAF8DOkKuIxmMB2qB3wRNbveaWW7YRYXF3auAHwKbgPeBXe7+XLhVdb++HhASg5kNAB4HvujudWHXEwYzuwSocfclYdeSJDKAU4BfuPsMYDfQZ/vszGwIkdaG8cAxQK6ZXR9uVd2vrwdEFTAmanl0sK7PMrNMIuHwoLv/Kex6QnQmcKmZbSDS9DjHzB4It6RQVQKV7r73ivIxIoHRV50HrHf3WndvBf4EfCjkmrpdXw+IRcAkMxtvZllEOpkWhFxTaMzMiLQxl7n7j8OuJ0zu/nV3H+3uRUT+d1Hq7r3uL8R4ufsWoMLMpgSr5gIrQiwpbJuAWWbWP/j/zVx6Yad9RtgFhMnd28zsFuBZInch3Ofuy0MuK0xnAjcA75rZsmDdN9z96fBKkiRyK/Bg8MfUOuD/hVxPaNz9DTN7DHiLyN1/S+mFw25oqA0REYmprzcxiYhIJxQQIiISkwJCRERiUkCIiEhMCggREYlJASESMLOG4N8iM7uum9/7Gwcs/607318kERQQIgcrAg4rIIIB27qyX0C4e6976lZ6HwWEyMHuBM42s2XBmP/pZvafZrbIzN4xs78DMLNzzOxVM1tA8FSxmT1pZkuCeQJuDtbdSWTUz2Vm9mCwbu/VigXv/Z6ZvWtm10S990tR8y88GDyxi5ndGczZ8Y6Z/bDHvx3pM/r0k9QinbgN+Iq7XwIQnOh3uftpZtYPeM3M9o7ceQpwvLuvD5Y/5e7bzSwHWGRmj7v7bWZ2i7ufHON3XQmcTGR+hfzgmFeCbTOA44gMI/0acKaZlQFXAFPd3c1scPd+dJF9dAUhcmgXADcGw4+8AQwDJgXb3owKB4B/MLO3gdeJDAQ5ia6dBTzk7u3uXg28DJwW9d6V7t4BLCPS9LULaAJ+bWZXAnuO8rOJdEoBIXJoBtzq7icHr/FRY//v/mAns3OIjPJ5hrufRGR8nqOZhrI56ud2IMPd24hMdPUYcAnwzFG8v0iXFBAiB6sHBkYtPwt8PhgKHTOb3MlkOXnADnffY2ZTiUzbulfr3uMP8CpwTdDPUUBk1rY3OyssmKsjLxhA8UtEmqZEEkJ9ECIHewdoD5qKfktkLuYi4K2go7gWuDzGcc8Anwv6CVYSaWba6x7gHTN7y90/EbX+CeAM4G3Aga+5+5YgYGIZCPyvmWUTubL5pyP6hCJx0GiuIiISk5qYREQkJgWEiIjEpIAQEZGYFBAiIhKTAkJERGJSQIiISEwKCBERien/A0GEoRH/AAAdAAAAAElFTkSuQmCC\n",
+      "text/plain": [
+       "<Figure size 432x288 with 1 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "acc_per_epoch = [np.mean(acc_per_epoch) for acc_per_epoch in running_test_acc]\n",
+    "display_loss_plot(acc_per_epoch, title=\"Test accuracy\", ylabel=\"Accuracy [%]\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.7979886313948404"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "test(model, test_quantized_loader)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Save the Brevitas model to disk\n",
+    "torch.save(model.state_dict(), \"state_dict_self-trained.pth\")"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## (Option 2) Load Pre-Trained Parameters <a id=\"load_pretrained\"></a>\n",
+    "## (Option 2, faster) Load Pre-Trained Parameters <a id=\"load_pretrained\"></a>\n",
     "\n",
     "Instead of training from scratch, you can also use pre-trained parameters we provide here. These parameters should achieve ~91.9% test accuracy."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
@@ -426,7 +549,7 @@
        "IncompatibleKeys(missing_keys=[], unexpected_keys=[])"
       ]
      },
-     "execution_count": 36,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -441,7 +564,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 18,
    "metadata": {
     "scrolled": true
    },
@@ -452,7 +575,7 @@
        "0.9188772287810328"
       ]
      },
-     "execution_count": 37,
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -465,7 +588,14 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Network Surgery Before Export <a id=\"network_surgery\"></a>\n",
+    "**Why do these parameters give better accuracy vs training from scratch?** Even with the topology and quantization fixed, achieving good accuracy on a given dataset requires [*hyperparameter tuning*](https://towardsdatascience.com/hyperparameters-optimization-526348bb8e2d) and potentially running training for a long time. The \"training from scratch\" example above is only intended as a quick example, whereas the pretrained parameters are obtained from a longer training run using the [determined.ai](https://determined.ai/) platform for hyperparameter tuning."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Network Surgery Before Export <a id=\"network_surgery\"></a>\n",
     "\n",
     "Sometimes, it's desirable to make some changes to our trained network prior to export (this is known in general as \"network surgery\"). This depends on the model and is not generally necessary, but in this case we want to make a couple of changes to get better results with FINN."
    ]
@@ -479,7 +609,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
@@ -488,7 +618,7 @@
        "(64, 593)"
       ]
      },
-     "execution_count": 24,
+     "execution_count": 19,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -504,7 +634,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
@@ -513,7 +643,7 @@
        "(64, 600)"
       ]
      },
-     "execution_count": 25,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -528,7 +658,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [
     {
@@ -537,7 +667,7 @@
        "torch.Size([64, 600])"
       ]
      },
-     "execution_count": 26,
+     "execution_count": 21,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -553,14 +683,14 @@
    "source": [
     "Next, we'll modify the expected input/output ranges. In FINN, we prefer to work with bipolar {-1, +1} instead of binary {0, 1} values. To achieve this, we'll create a \"wrapper\" model that handles the pre/postprocessing as follows:\n",
     "\n",
-    "* on the input side, we'll pre-process by (x + 1) / 2 in order to map incoming {-1, +1} inputs to {0, 1} ones which the trained network is used to. Since we're just multiplying/adding a scalar, these operations can be *streamlined* in FINN and implemented with no extra cost.\n",
+    "* on the input side, we'll pre-process by (x + 1) / 2 in order to map incoming {-1, +1} inputs to {0, 1} ones which the trained network is used to. Since we're just multiplying/adding a scalar, these operations can be [*streamlined*](https://finn.readthedocs.io/en/latest/nw_prep.html#streamlining-transformations) by FINN and implemented with no extra cost.\n",
     "\n",
     "* on the output side, we'll add a binary quantizer which maps everthing below 0 to -1 and everything above 0 to +1. This is essentially the same behavior as the sigmoid we used earlier, except the outputs are bipolar instead of binary."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -588,7 +718,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -618,7 +748,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [
     {
@@ -627,7 +757,7 @@
        "0.9188772287810328"
       ]
      },
-     "execution_count": 29,
+     "execution_count": 24,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -640,14 +770,15 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Export to FINN-ONNX <a id=\"export_finn_onnx\" ></a>\n",
+    "# Export to FINN-ONNX <a id=\"export_finn_onnx\" ></a>\n",
+    "\n",
     "\n",
-    "FINN expects an ONNX model as input. We'll now export our network into ONNX to be imported and used in FINN for the next notebooks. Note that the particular ONNX representation used for FINN differs from standard ONNX, you can read more about this [here](https://finn.readthedocs.io/en/latest/internals.html#intermediate-representation-finn-onnx)."
+    "[ONNX](https://onnx.ai/) is an open format built to represent machine learning models, and the FINN compiler expects an ONNX model as input. We'll now export our network into ONNX to be imported and used in FINN for the next notebooks. Note that the particular ONNX representation used for FINN differs from standard ONNX, you can read more about this [here](https://finn.readthedocs.io/en/latest/internals.html#intermediate-representation-finn-onnx)."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 25,
    "metadata": {
     "scrolled": true
    },
@@ -682,27 +813,71 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## View the Exported ONNX in Netron <a id=\"view_in_netron\" ></a>\n",
+    "## One final fix: input datatype\n",
+    "\n",
+    "There's one more thing we'll do: we will mark the input tensor datatype as `DataType.BIPOLAR`, which will be used by the compiler later on. To do this, we'll utilize the `ModelWrapper` component from FINN, which lets us examine and manipulate the ONNX graph in an easier way.\n",
+    "\n",
+    "*In the near future it will be possible to add this information to the model [while exporting](https://github.com/Xilinx/brevitas/issues/232), instead of having to add it manually.*"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Input tensor name: 0\n",
+      "Input tensor shape: [1, 600]\n",
+      "Input tensor datatype: DataType.BIPOLAR\n"
+     ]
+    }
+   ],
+   "source": [
+    "from finn.core.modelwrapper import ModelWrapper\n",
+    "from finn.core.datatype import DataType\n",
+    "\n",
+    "finn_model = ModelWrapper(export_onnx_path)\n",
+    "\n",
+    "finnonnx_in_tensor_name = finn_model.graph.input[0].name\n",
+    "finnonnx_model_in_shape = finn_model.get_tensor_shape(finnonnx_in_tensor_name)\n",
+    "finn_model.set_tensor_datatype(finnonnx_in_tensor_name, DataType.BIPOLAR)\n",
+    "print(\"Input tensor name: %s\" % finnonnx_in_tensor_name)\n",
+    "print(\"Input tensor shape: %s\" % str(finnonnx_model_in_shape))\n",
+    "print(\"Input tensor datatype: %s\" % str(finn_model.get_tensor_datatype(finnonnx_in_tensor_name)))\n",
+    "\n",
+    "ready_model_filename = \"cybsec-mlp-ready.onnx\"\n",
+    "finn_model.save(ready_model_filename)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## View the Exported ONNX in Netron\n",
     "\n",
-    "Let's examine the exported ONNX model with Netron. Particular things of note:\n",
+    "Let's examine the exported ONNX model with [Netron](https://github.com/lutzroeder/netron), which is a visualizer for neural networks and allows interactive investigation of network properties. For example, you can click on the individual nodes and view the properties. Particular things of note:\n",
     "\n",
-    "* The input preprocessing (x + 1) / 2 is exported as part of the network (initial Add and Div layers)\n",
-    "* We've exported the padded version; shape of the first MatMul node's weight parameter is 600x64\n",
-    "* The weight parameters (second inputs) for MatMul nodes are annotated with `quantization: finn_datatype:INT2`\n",
+    "* The input tensor \"0\" is annotated with `quantization: finn_datatype: BIPOLAR`\n",
+    "* The input preprocessing (x + 1) / 2 is exported as part of the network (initial `Add` and `Div` layers)\n",
+    "* Brevitas `QuantLinear` layers are exported to ONNX as `MatMul`. We've exported the padded version; shape of the first MatMul node's weight parameter is 600x64\n",
+    "* The weight parameters (second inputs) for MatMul nodes are annotated with `quantization: finn_datatype: INT2`\n",
     "* The quantized activations are exported as `MultiThreshold` nodes with `domain=finn.custom_op.general`\n",
     "* There's a final `MultiThreshold` node with threshold=0 to produce the final bipolar output (this is the `qnt_output` from `CybSecMLPForExport`"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Serving 'cybsec-mlp.onnx' at http://0.0.0.0:8081\n"
+      "Serving 'cybsec-mlp-ready.onnx' at http://0.0.0.0:8081\n"
      ]
     },
     {
@@ -712,17 +887,17 @@
        "        <iframe\n",
        "            width=\"100%\"\n",
        "            height=\"400\"\n",
-       "            src=\"http://0.0.0.0:8081/\"\n",
+       "            src=\"http://localhost:8081/\"\n",
        "            frameborder=\"0\"\n",
        "            allowfullscreen\n",
        "        ></iframe>\n",
        "        "
       ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x7f4045ac19e8>"
+       "<IPython.lib.display.IFrame at 0x7f77214fa630>"
       ]
      },
-     "execution_count": 32,
+     "execution_count": 27,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -730,7 +905,7 @@
    "source": [
     "from finn.util.visualization import showInNetron\n",
     "\n",
-    "showInNetron(export_onnx_path)"
+    "showInNetron(ready_model_filename)"
    ]
   },
   {
diff --git a/notebooks/end2end_example/cybersecurity/2-export-to-finn-and-verify.ipynb b/notebooks/end2end_example/cybersecurity/2-import-into-finn-and-verify.ipynb
similarity index 59%
rename from notebooks/end2end_example/cybersecurity/2-export-to-finn-and-verify.ipynb
rename to notebooks/end2end_example/cybersecurity/2-import-into-finn-and-verify.ipynb
index f48cada0dd25f08f1659a778d04785bda27f443e..6ac4e52072d71f527e4ec5d923a76851b77dc247 100644
--- a/notebooks/end2end_example/cybersecurity/2-export-to-finn-and-verify.ipynb
+++ b/notebooks/end2end_example/cybersecurity/2-import-into-finn-and-verify.ipynb
@@ -6,10 +6,15 @@
    "source": [
     "# Verify Exported ONNX Model in FINN\n",
     "\n",
-    "**Important: This notebook depends on the 1-train-mlp-with-brevitas notebook, because we are using the ONNX model that was exported there. So please make sure the needed .onnx file is generated before you run this notebook. Also remember to 'close and halt' any other FINN notebooks, since Netron visualizations use the same port.**\n",
+    "<font color=\"red\">**Live FINN tutorial:** We recommend clicking **Cell -> Run All** when you start reading this notebook for \"latency hiding\".</font>\n",
+    "\n",
+    "**Important: This notebook depends on the 1-train-mlp-with-brevitas notebook, because we are using the ONNX model that was exported there. So please make sure the needed .onnx file is generated before you run this notebook.**\n",
+    "\n",
+    "**Also remember to 'close and halt' any other FINN notebooks, since Netron visualizations use the same port.**\n",
     "\n",
     "In this notebook we will show how to import the network we trained in Brevitas and verify it in the FINN compiler. \n",
     "This verification process can actually be done at various stages in the compiler [as explained in this notebook](../bnn-pynq/tfc_end2end_verification.ipynb) but for this example we'll only consider the first step: verifying the exported high-level FINN-ONNX model.\n",
+    "Another goal of this notebook is to introduce you to the concept of *graph transformations* -- we'll be applying some transformations to the graph to make it executable for verification. \n",
     "Once this model is sucessfully verified, we'll generate an FPGA accelerator from it in the next notebook."
    ]
   },
@@ -36,8 +41,8 @@
    "source": [
     "## Outline\n",
     "-------------\n",
-    "1. [Import model and visualize in Netron](#brevitas_import_visualization)\n",
-    "2. [Network preperations: Tidy up transformations](#network_preparations)\n",
+    "1. [Import model into FINN with ModelWrapper](#brevitas_import_visualization)\n",
+    "2. [Network preparations: Tidy-up transformations](#network_preparations)\n",
     "3. [Load the dataset and Brevitas model](#load_dataset) \n",
     "4. [Compare FINN and Brevitas execution](#compare_brevitas)"
    ]
@@ -46,7 +51,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# 1. Import model and visualize in Netron <a id=\"brevitas_import_visualization\"></a>\n",
+    "# 1. Import model into FINN with ModelWrapper <a id=\"brevitas_import_visualization\"></a>\n",
     "\n",
     "Now that we have the model in .onnx format, we can work with it using FINN. To import it into FINN, we'll use the [`ModelWrapper`](https://finn.readthedocs.io/en/latest/source_code/finn.core.html#finn.core.modelwrapper.ModelWrapper). It is a wrapper around the ONNX model which provides several helper functions to make it easier to work with the model."
    ]
@@ -59,46 +64,91 @@
    "source": [
     "from finn.core.modelwrapper import ModelWrapper\n",
     "\n",
-    "model_file_path = \"cybsec-mlp.onnx\"\n",
-    "model_for_sim = ModelWrapper(model_file_path)"
+    "ready_model_filename = \"cybsec-mlp-ready.onnx\"\n",
+    "model_for_sim = ModelWrapper(ready_model_filename)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "To visualize the exported model, Netron can be used. Netron is a visualizer for neural networks and allows interactive investigation of network properties. For example, you can click on the individual nodes and view the properties."
+    "Let's have a look at some of the member functions exposed by `ModelWrapper` to see what kind of information we can extract from it."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 3,
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
    "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Serving 'cybsec-mlp.onnx' at http://0.0.0.0:8081\n"
-     ]
-    },
     {
      "data": {
-      "text/html": [
-       "\n",
-       "        <iframe\n",
-       "            width=\"100%\"\n",
-       "            height=\"400\"\n",
-       "            src=\"http://0.0.0.0:8081/\"\n",
-       "            frameborder=\"0\"\n",
-       "            allowfullscreen\n",
-       "        ></iframe>\n",
-       "        "
-      ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x7fc1fc950748>"
+       "['__class__',\n",
+       " '__delattr__',\n",
+       " '__dict__',\n",
+       " '__dir__',\n",
+       " '__doc__',\n",
+       " '__eq__',\n",
+       " '__format__',\n",
+       " '__ge__',\n",
+       " '__getattribute__',\n",
+       " '__gt__',\n",
+       " '__hash__',\n",
+       " '__init__',\n",
+       " '__init_subclass__',\n",
+       " '__le__',\n",
+       " '__lt__',\n",
+       " '__module__',\n",
+       " '__ne__',\n",
+       " '__new__',\n",
+       " '__reduce__',\n",
+       " '__reduce_ex__',\n",
+       " '__repr__',\n",
+       " '__setattr__',\n",
+       " '__sizeof__',\n",
+       " '__str__',\n",
+       " '__subclasshook__',\n",
+       " '__weakref__',\n",
+       " '_model_proto',\n",
+       " 'analysis',\n",
+       " 'check_all_tensor_shapes_specified',\n",
+       " 'check_compatibility',\n",
+       " 'cleanup',\n",
+       " 'find_consumer',\n",
+       " 'find_consumers',\n",
+       " 'find_direct_predecessors',\n",
+       " 'find_direct_successors',\n",
+       " 'find_producer',\n",
+       " 'find_upstream',\n",
+       " 'get_all_tensor_names',\n",
+       " 'get_finn_nodes',\n",
+       " 'get_initializer',\n",
+       " 'get_metadata_prop',\n",
+       " 'get_node_index',\n",
+       " 'get_nodes_by_op_type',\n",
+       " 'get_non_finn_nodes',\n",
+       " 'get_tensor_datatype',\n",
+       " 'get_tensor_fanout',\n",
+       " 'get_tensor_layout',\n",
+       " 'get_tensor_shape',\n",
+       " 'get_tensor_sparsity',\n",
+       " 'get_tensor_valueinfo',\n",
+       " 'graph',\n",
+       " 'is_fork_node',\n",
+       " 'is_join_node',\n",
+       " 'make_empty_exec_context',\n",
+       " 'make_new_valueinfo_name',\n",
+       " 'model',\n",
+       " 'rename_tensor',\n",
+       " 'save',\n",
+       " 'set_initializer',\n",
+       " 'set_metadata_prop',\n",
+       " 'set_tensor_datatype',\n",
+       " 'set_tensor_layout',\n",
+       " 'set_tensor_shape',\n",
+       " 'set_tensor_sparsity',\n",
+       " 'temporary_fix_oldstyle_domain',\n",
+       " 'transform']"
       ]
      },
      "execution_count": 3,
@@ -107,60 +157,33 @@
     }
    ],
    "source": [
-    "from finn.util.visualization import showInNetron\n",
-    "showInNetron(model_file_path)"
+    "dir(model_for_sim)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# 2. Network preperation: Tidy up transformations <a id=\"network_preparations\"></a>\n",
-    "\n",
-    "Before running the verification, we need to prepare our FINN-ONNX model. In particular, all the intermediate tensors need to have statically defined shapes. To do this, we apply some transformations to the model like a kind of \"tidy-up\" to make it easier to process. You can read more about these transformations in [this notebook](../bnn-pynq/tfc_end2end_example.ipynb).\n"
+    "Many of these helper functions relate to extracting information about the structure and properties of the ONNX model. You can find out more about examining and manipulating ONNX models programmatically in [this tutorial](../../basics/0_how_to_work_with_onnx.ipynb), but we'll show a few basic functions here. For instance, we can extract the shape and datatype annotation for various tensors in the graph, as well as information related to the operation types associated with each node."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 4,
    "metadata": {},
-   "outputs": [],
-   "source": [
-    "from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames, RemoveStaticGraphInputs\n",
-    "from finn.transformation.infer_shapes import InferShapes\n",
-    "from finn.transformation.infer_datatypes import InferDataTypes\n",
-    "from finn.transformation.fold_constants import FoldConstants\n",
-    "\n",
-    "model_for_sim = model_for_sim.transform(InferShapes())\n",
-    "model_for_sim = model_for_sim.transform(FoldConstants())\n",
-    "model_for_sim = model_for_sim.transform(GiveUniqueNodeNames())\n",
-    "model_for_sim = model_for_sim.transform(GiveReadableTensorNames())\n",
-    "model_for_sim = model_for_sim.transform(InferDataTypes())\n",
-    "model_for_sim = model_for_sim.transform(RemoveStaticGraphInputs())"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "There's one more thing we'll do: we will mark the input tensor datatype as bipolar, which will be used by the compiler later on. \n",
-    "\n",
-    "*In the near future it will be possible to add this information to the model while exporting, instead of having to add it manually.*"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Input tensor name: global_in\n",
-      "Output tensor name: global_out\n",
+      "Input tensor name: 0\n",
+      "Output tensor name: 78\n",
       "Input tensor shape: [1, 600]\n",
-      "Input tensor datatype: DataType.BIPOLAR\n"
+      "Output tensor shape: [1, 1]\n",
+      "Input tensor datatype: DataType.BIPOLAR\n",
+      "Output tensor datatype: DataType.FLOAT32\n",
+      "List of node operator types in the graph: \n",
+      "['Add', 'Div', 'MatMul', 'Add', 'Mul', 'Unsqueeze', 'BatchNormalization', 'Squeeze', 'MultiThreshold', 'Mul', 'MatMul', 'Add', 'Mul', 'Unsqueeze', 'BatchNormalization', 'Squeeze', 'MultiThreshold', 'Mul', 'MatMul', 'Add', 'Mul', 'Unsqueeze', 'BatchNormalization', 'Squeeze', 'MultiThreshold', 'Mul', 'MatMul', 'Add', 'Mul', 'MultiThreshold']\n"
      ]
     }
    ],
@@ -172,22 +195,69 @@
     "print(\"Input tensor name: %s\" % finnonnx_in_tensor_name)\n",
     "print(\"Output tensor name: %s\" % finnonnx_out_tensor_name)\n",
     "finnonnx_model_in_shape = model_for_sim.get_tensor_shape(finnonnx_in_tensor_name)\n",
+    "finnonnx_model_out_shape = model_for_sim.get_tensor_shape(finnonnx_out_tensor_name)\n",
     "print(\"Input tensor shape: %s\" % str(finnonnx_model_in_shape))\n",
-    "model_for_sim.set_tensor_datatype(finnonnx_in_tensor_name, DataType.BIPOLAR)\n",
+    "print(\"Output tensor shape: %s\" % str(finnonnx_model_out_shape))\n",
+    "finnonnx_model_in_dt = model_for_sim.get_tensor_datatype(finnonnx_in_tensor_name)\n",
+    "finnonnx_model_out_dt = model_for_sim.get_tensor_datatype(finnonnx_out_tensor_name)\n",
     "print(\"Input tensor datatype: %s\" % str(model_for_sim.get_tensor_datatype(finnonnx_in_tensor_name)))\n",
-    "\n",
-    "verified_model_filename = \"cybsec-mlp-verified.onnx\"\n",
-    "model_for_sim.save(verified_model_filename)"
+    "print(\"Output tensor datatype: %s\" % str(model_for_sim.get_tensor_datatype(finnonnx_out_tensor_name)))\n",
+    "print(\"List of node operator types in the graph: \")\n",
+    "print([x.op_type for x in model_for_sim.graph.node])"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Let's view our ready-to-go model. Some changes to note:\n",
+    "Note that the output tensor is (as of yet) marked as a float32 value, even though we know the output is binary. This will be automatically inferred by the compiler in the next step when we run the `InferDataTypes` transformation."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 2. Network preparation: Tidy-up transformations <a id=\"network_preparations\"></a>\n",
+    "\n",
+    "Before running the verification, we need to prepare our FINN-ONNX model. In particular, all the intermediate tensors need to have statically defined shapes. To do this, we apply some graph transformations to the model like a kind of \"tidy-up\" to make it easier to process. \n",
+    "\n",
+    "**Graph transformations in FINN.** The whole FINN compiler is built around the idea of transformations, which gradually transform the model into a synthesizable hardware description. Although FINN offers functionality that automatically calls a standard sequence of transformations (covered in the next notebook), you can also manually call individual transformations (like we do here), as well as adding your own transformations, to create custom flows. You can read more about these transformations in [this notebook](../bnn-pynq/tfc_end2end_example.ipynb)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames, RemoveStaticGraphInputs\n",
+    "from finn.transformation.infer_shapes import InferShapes\n",
+    "from finn.transformation.infer_datatypes import InferDataTypes\n",
+    "from finn.transformation.fold_constants import FoldConstants\n",
+    "\n",
+    "model_for_sim = model_for_sim.transform(InferShapes())\n",
+    "model_for_sim = model_for_sim.transform(FoldConstants())\n",
+    "model_for_sim = model_for_sim.transform(GiveUniqueNodeNames())\n",
+    "model_for_sim = model_for_sim.transform(GiveReadableTensorNames())\n",
+    "model_for_sim = model_for_sim.transform(InferDataTypes())\n",
+    "model_for_sim = model_for_sim.transform(RemoveStaticGraphInputs())\n",
     "\n",
-    "* all intermediate tensors now have their shapes specified (indicated by numbers next to the arrows going between layers)\n",
-    "* the datatype on the input tensor is set to DataType.BIPOLAR (click on the `global_in` node to view properties)"
+    "verif_model_filename = \"cybsec-mlp-verification.onnx\"\n",
+    "model_for_sim.save(verif_model_filename)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Would the FINN compiler still work if we didn't do this?** The compilation step in the next notebook applies these transformations internally and would work fine, but we're going to use FINN's verification capabilities below and these require the tidy-up transformations."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's view our ready-to-go model after the transformations. Note that all intermediate tensors now have their shapes specified (indicated by numbers next to the arrows going between layers). Additionally, the datatype inference step has propagated quantization annotations to the outputs of `MultiThreshold` layers (expand by clicking the + next to the name of the tensor to see the quantization annotation) and the final output tensor."
    ]
   },
   {
@@ -199,9 +269,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\n",
-      "Stopping http://0.0.0.0:8081\n",
-      "Serving 'cybsec-mlp-verified.onnx' at http://0.0.0.0:8081\n"
+      "Serving 'cybsec-mlp-verification.onnx' at http://0.0.0.0:8081\n"
      ]
     },
     {
@@ -211,14 +279,14 @@
        "        <iframe\n",
        "            width=\"100%\"\n",
        "            height=\"400\"\n",
-       "            src=\"http://0.0.0.0:8081/\"\n",
+       "            src=\"http://localhost:8081/\"\n",
        "            frameborder=\"0\"\n",
        "            allowfullscreen\n",
        "        ></iframe>\n",
        "        "
       ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x7fc280154278>"
+       "<IPython.lib.display.IFrame at 0x7f388298b470>"
       ]
      },
      "execution_count": 6,
@@ -227,7 +295,9 @@
     }
    ],
    "source": [
-    "showInNetron(verified_model_filename)"
+    "from finn.util.visualization import showInNetron\n",
+    "\n",
+    "showInNetron(verif_model_filename)"
    ]
   },
   {
@@ -236,9 +306,7 @@
    "source": [
     "# 3. Load the Dataset and the Brevitas Model <a id=\"load_dataset\"></a>\n",
     "\n",
-    "We'll use some example data from the quantized UNSW-NB15 dataset (from the previous notebook) to use as inputs for the verification. \n",
-    "\n",
-    "Recall that the quantized values from the dataset are 593-bit binary {0, 1} vectors whereas our exported model takes 600-bit bipolar {-1, +1} vectors, so we'll have to preprocess it a bit before we can use it for verifying the ONNX model."
+    "We'll use some example data from the quantized UNSW-NB15 dataset (from the previous notebook) to use as inputs for the verification. "
    ]
   },
   {
@@ -258,16 +326,24 @@
     }
    ],
    "source": [
-    "from torch.utils.data import DataLoader, Dataset\n",
-    "from dataloader_quantized import UNSW_NB15_quantized\n",
+    "import numpy as np\n",
+    "from torch.utils.data import TensorDataset\n",
     "\n",
-    "test_quantized_dataset = UNSW_NB15_quantized(file_path_train='UNSW_NB15_training-set.csv', \\\n",
-    "                                              file_path_test = \"UNSW_NB15_testing-set.csv\", \\\n",
-    "                                              train=False)\n",
+    "def get_preqnt_dataset(data_dir: str, train: bool):\n",
+    "    unsw_nb15_data = np.load(data_dir + \"/unsw_nb15_binarized.npz\")\n",
+    "    if train:\n",
+    "        partition = \"train\"\n",
+    "    else:\n",
+    "        partition = \"test\"\n",
+    "    part_data = unsw_nb15_data[partition].astype(np.float32)\n",
+    "    part_data = torch.from_numpy(part_data)\n",
+    "    part_data_in = part_data[:, :-1]\n",
+    "    part_data_out = part_data[:, -1]\n",
+    "    return TensorDataset(part_data_in, part_data_out)\n",
     "\n",
     "n_verification_inputs = 100\n",
-    "# last column is the label, exclude it\n",
-    "input_tensor = test_quantized_dataset.data[:n_verification_inputs,:-1]\n",
+    "test_quantized_dataset = get_preqnt_dataset(\".\", False)\n",
+    "input_tensor = test_quantized_dataset.tensors[0][:n_verification_inputs]\n",
     "input_tensor.shape"
    ]
   },
@@ -325,6 +401,9 @@
     "# replace this with your trained network checkpoint if you're not\n",
     "# using the pretrained weights\n",
     "trained_state_dict = torch.load(\"state_dict.pth\")[\"models_state_dict\"][0]\n",
+    "# Uncomment the following line if you previously chose to train the network yourself\n",
+    "#trained_state_dict = torch.load(\"state_dict_self-trained.pth\")\n",
+    "\n",
     "brevitas_model.load_state_dict(trained_state_dict, strict=False)"
    ]
   },
@@ -355,7 +434,9 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Let's make helper functions to execute the same input with Brevitas and FINN. For FINN, we'll use the [`finn.core.onnx_exec`](https://finn.readthedocs.io/en/latest/source_code/finn.core.html#finn.core.onnx_exec.execute_onnx) function to execute the exported FINN-ONNX on the inputs."
+    "Let's make helper functions to execute the same input with Brevitas and FINN. For FINN, we'll use the [`finn.core.onnx_exec`](https://finn.readthedocs.io/en/latest/source_code/finn.core.html#finn.core.onnx_exec.execute_onnx) function to execute the exported FINN-ONNX on the inputs. Note that this ONNX execution is for verification only; not for accelerated execution.\n",
+    "\n",
+    "Recall that the quantized values from the dataset are 593-bit binary {0, 1} vectors whereas our exported model takes 600-bit bipolar {-1, +1} vectors, so we'll have to preprocess it a bit before we can use it for verifying the ONNX model."
    ]
   },
   {
@@ -364,7 +445,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "import finn.core.onnx_exec as oxe\n",
+    "\n",
     "def inference_with_finn_onnx(current_inp):\n",
+    "    finnonnx_in_tensor_name = model_for_sim.graph.input[0].name\n",
+    "    finnonnx_model_in_shape = model_for_sim.get_tensor_shape(finnonnx_in_tensor_name)\n",
+    "    finnonnx_out_tensor_name = model_for_sim.graph.output[0].name\n",
     "    # convert input to numpy for FINN\n",
     "    current_inp = current_inp.detach().numpy()\n",
     "    # add padding and re-scale to bipolar\n",
@@ -397,12 +483,11 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "ok 100 nok 0: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 100/100 [00:48<00:00,  2.09it/s]\n"
+      "ok 100 nok 0: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 100/100 [00:47<00:00,  2.09it/s]\n"
      ]
     }
    ],
    "source": [
-    "import finn.core.onnx_exec as oxe\n",
     "import numpy as np\n",
     "from tqdm import trange\n",
     "\n",
@@ -421,12 +506,12 @@
     "    ok += 1 if finn_output == brevitas_output else 0\n",
     "    nok += 1 if finn_output != brevitas_output else 0\n",
     "    verify_range.set_description(\"ok %d nok %d\" % (ok, nok))\n",
-    "    verify_range.refresh() # to show immediately the update"
+    "    verify_range.refresh()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
diff --git a/notebooks/end2end_example/cybersecurity/3-build-accelerator-with-finn.ipynb b/notebooks/end2end_example/cybersecurity/3-build-accelerator-with-finn.ipynb
index 1ee1cefbe17d96ffd7a2e6384e037e1d9fbdd989..551c321534cfefa13b8d34b7f1e7685000702ec0 100644
--- a/notebooks/end2end_example/cybersecurity/3-build-accelerator-with-finn.ipynb
+++ b/notebooks/end2end_example/cybersecurity/3-build-accelerator-with-finn.ipynb
@@ -6,7 +6,9 @@
    "source": [
     "# Building the Streaming Dataflow Accelerator\n",
     "\n",
-    "**Important: This notebook depends on the 2-cybersecurity-finn-verification notebook because we are using models that were created by these notebooks. So please make sure the needed .onnx files are generated prior to running this notebook.**\n",
+    "<font color=\"red\">**Live FINN tutorial:** We recommend clicking **Cell -> Run All** when you start reading this notebook for \"latency hiding\".</font>\n",
+    "\n",
+    "**Important: This notebook depends on the 1-train-mlp-with-brevitas notebook because we are using models that were created by that notebook. So please make sure the needed .onnx files are generated prior to running this notebook.**\n",
     "\n",
     "<img align=\"left\" src=\"finn-example.png\" alt=\"drawing\" style=\"margin-right: 20px\" width=\"250\"/>\n",
     "\n",
@@ -29,7 +31,8 @@
     "    2.3 [Configuring the Performance](#config_perf)    \n",
     "4. [Launch a Build: Only Estimate Reports](#build_estimate_report)\n",
     "5. [Launch a Build: Stitched IP, out-of-context synth and rtlsim Performance](#build_ip_synth_rtlsim)\n",
-    "6. [Launch a Build: PYNQ Bitfile and Driver](#build_bitfile_driver)"
+    "6. [(Optional) Launch a Build: PYNQ Bitfile and Driver](#build_bitfile_driver)\n",
+    "7. [(Optional) Run on PYNQ board](#run_on_pynq)"
    ]
   },
   {
@@ -38,10 +41,10 @@
    "source": [
     "## Introduction to  `build_dataflow` Tool <a id=\"intro_build_dataflow\"></a>\n",
     "\n",
-    "Since version 0.5b, the FINN compiler has a `build_dataflow` tool. Compared to previous versions which required setting up all the needed transformations in a Python script, it makes experimenting with dataflow architecture generation easier. The core idea is to specify the relevant build info as a configuration `dict`, which invokes all the necessary steps to make the dataflow build happen. It can be invoked either from the [command line](https://finn-dev.readthedocs.io/en/latest/command_line.html) or with a single Python function call\n",
+    "Since version 0.5b, the FINN compiler has a `build_dataflow` tool. Compared to previous versions which required setting up all the needed transformations in a Python script, it makes experimenting with dataflow architecture generation easier. The core idea is to specify the relevant build info as a configuration `dict`, which invokes all the necessary steps to make the dataflow build happen. It can be invoked either from the [command line](https://finn-dev.readthedocs.io/en/latest/command_line.html) or with a single Python function call.\n",
     "\n",
     "\n",
-    "In this notebook, we'll use the Python function call to invoke the builds to stay inside the Jupyter notebook, but feel free to experiment with reproducing what we do here with the `./run-docker.sh build_dataflow` and `./run-docker.sh build_custom` command-line entry points too, as documented [here]((https://finn-dev.readthedocs.io/en/latest/command_line.html))."
+    "In this notebook, we'll use the Python function call to invoke the builds to stay inside the Jupyter notebook, but feel free to experiment with reproducing what we do here with the `./run-docker.sh build_dataflow` and `./run-docker.sh build_custom` command-line entry points too. "
    ]
   },
   {
@@ -69,8 +72,8 @@
     "    - `BITFILE` : integrate the accelerator into a shell to produce a standalone bitfile\n",
     "    - `PYNQ_DRIVER` : generate a PYNQ Python driver that can be used to launch the accelerator\n",
     "    - `DEPLOYMENT_PACKAGE` : create a folder with the `BITFILE` and `PYNQ_DRIVER` outputs, ready to be copied to the target FPGA platform.\n",
-    "* `output_dir`: the directory where the all the generated build outputs above will be written into.\n",
-    "* `steps`: list of predefined (or custom) build steps FINN will go through. Use `build_dataflow_config.estimate_only_dataflow_steps` to execute only the steps needed for estimation (without any synthesis), and the `build_dataflow_config.default_build_dataflow_steps` otherwise (which is the default value).\n",
+    "* `output_dir`: the directory where all the generated build outputs above will be written into.\n",
+    "* `steps`: list of predefined (or custom) build steps FINN will go through. Use `build_dataflow_config.estimate_only_dataflow_steps` to execute only the steps needed for estimation (without any synthesis), and the `build_dataflow_config.default_build_dataflow_steps` otherwise (which is the default value). You can find the list of default steps [here](https://finn.readthedocs.io/en/latest/source_code/finn.builder.html#finn.builder.build_dataflow_config.default_build_dataflow_steps) in the documentation.\n",
     "\n",
     "### Configuring the Board and FPGA Part <a id=\"config_fpga\"></a>\n",
     "\n",
@@ -80,7 +83,7 @@
     "\n",
     "### Configuring the Performance <a id=\"config_perf\"></a>\n",
     "\n",
-    "You can configure the performance (and correspondingly, the FPGA resource footprint) of the generated in two ways:\n",
+    "You can configure the performance (and correspondingly, the FPGA resource footprint) of the generated dataflow accelerator in two ways:\n",
     "\n",
     "1) (basic) Set a target performance and let the compiler figure out the per-node parallelization settings.\n",
     "\n",
@@ -88,7 +91,7 @@
     "\n",
     "This notebook only deals with the basic approach, for which you need to set up:\n",
     "\n",
-    "* `target_fps`: target inference performance in frames per second. Note that target may not be achievable due to specific layer constraints, or due to resource limitations of the FPGA.\n",
+    "* `target_fps`: target inference performance in frames per second. Note that target may not be achievable due to specific layer constraints, or due to resource limitations of the FPGA. \n",
     "* `synth_clk_period_ns`: target clock frequency (in nanoseconds) for Vivado synthesis. e.g. `synth_clk_period_ns=5.0` will target a 200 MHz clock. Note that the target clock period may not be achievable depending on the FPGA part and design complexity."
    ]
   },
@@ -103,15 +106,57 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Previous run results deleted!\n"
+     ]
+    }
+   ],
+   "source": [
+    "import finn.builder.build_dataflow as build\n",
+    "import finn.builder.build_dataflow_config as build_cfg\n",
+    "import os\n",
+    "import shutil\n",
+    "\n",
+    "model_file = \"cybsec-mlp-ready.onnx\"\n",
+    "\n",
+    "estimates_output_dir = \"output_estimates_only\"\n",
+    "\n",
+    "#Delete previous run results if exist\n",
+    "if os.path.exists(estimates_output_dir):\n",
+    "    shutil.rmtree(estimates_output_dir)\n",
+    "    print(\"Previous run results deleted!\")\n",
+    "\n",
+    "\n",
+    "cfg_estimates = build.DataflowBuildConfig(\n",
+    "    output_dir          = estimates_output_dir,\n",
+    "    mvau_wwidth_max     = 80,\n",
+    "    target_fps          = 1000000,\n",
+    "    synth_clk_period_ns = 10.0,\n",
+    "    fpga_part           = \"xc7z020clg400-1\",\n",
+    "    steps               = build_cfg.estimate_only_dataflow_steps,\n",
+    "    generate_outputs=[\n",
+    "        build_cfg.DataflowOutputType.ESTIMATE_REPORTS,\n",
+    "    ]\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Building dataflow accelerator from cybsec-mlp-verified.onnx\n",
-      "Intermediate outputs will be generated in /tmp/finn_dev_osboxes\n",
+      "Building dataflow accelerator from cybsec-mlp-ready.onnx\n",
+      "Intermediate outputs will be generated in /tmp/finn_dev_ubuntu\n",
       "Final outputs will be generated in output_estimates_only\n",
       "Build log is at output_estimates_only/build_dataflow.log\n",
       "Running step: step_tidy_up [1/7]\n",
@@ -121,7 +166,9 @@
       "Running step: step_target_fps_parallelization [5/7]\n",
       "Running step: step_apply_folding_config [6/7]\n",
       "Running step: step_generate_estimate_reports [7/7]\n",
-      "Completed successfully\n"
+      "Completed successfully\n",
+      "CPU times: user 1.84 s, sys: 599 ms, total: 2.44 s\n",
+      "Wall time: 1.77 s\n"
      ]
     },
     {
@@ -130,31 +177,14 @@
        "0"
       ]
      },
-     "execution_count": 1,
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "import finn.builder.build_dataflow as build\n",
-    "import finn.builder.build_dataflow_config as build_cfg\n",
-    "\n",
-    "model_file = \"cybsec-mlp-verified.onnx\"\n",
-    "\n",
-    "estimates_output_dir = \"output_estimates_only\"\n",
-    "\n",
-    "cfg = build.DataflowBuildConfig(\n",
-    "    output_dir          = estimates_output_dir,\n",
-    "    target_fps          = 1000000,\n",
-    "    synth_clk_period_ns = 10.0,\n",
-    "    fpga_part           = \"xc7z020clg400-1\",\n",
-    "    steps               = build_cfg.estimate_only_dataflow_steps,\n",
-    "    generate_outputs=[\n",
-    "        build_cfg.DataflowOutputType.ESTIMATE_REPORTS,\n",
-    "    ]\n",
-    ")\n",
-    "\n",
-    "build.build_dataflow_cfg(model_file, cfg)"
+    "%%time\n",
+    "build.build_dataflow_cfg(model_file, cfg_estimates)"
    ]
   },
   {
@@ -166,7 +196,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -183,7 +213,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
@@ -209,7 +239,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
@@ -217,11 +247,11 @@
      "output_type": "stream",
      "text": [
       "{\r\n",
-      "  \"critical_path_cycles\": 272,\r\n",
-      "  \"max_cycles\": 80,\r\n",
-      "  \"max_cycles_node_name\": \"StreamingFCLayer_Batch_0\",\r\n",
-      "  \"estimated_throughput_fps\": 1250000.0,\r\n",
-      "  \"estimated_latency_ns\": 2720.0\r\n",
+      "  \"critical_path_cycles\": 252,\r\n",
+      "  \"max_cycles\": 64,\r\n",
+      "  \"max_cycles_node_name\": \"StreamingFCLayer_Batch_1\",\r\n",
+      "  \"estimated_throughput_fps\": 1562500.0,\r\n",
+      "  \"estimated_latency_ns\": 2520.0\r\n",
       "}"
      ]
     }
@@ -234,12 +264,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Since all of these reports are .json files, we can easily load them into Python for further processing. Let's define a helper function and look at the `estimate_layer_cycles.json` report."
+    "Since all of these reports are .json files, we can easily load them into Python for further processing. This can be useful if you are building your own design automation tools on top of FINN. Let's define a helper function and look at the `estimate_layer_cycles.json` report."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -252,19 +282,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "{'StreamingFCLayer_Batch_0': 80,\n",
+       "{'StreamingFCLayer_Batch_0': 60,\n",
        " 'StreamingFCLayer_Batch_1': 64,\n",
        " 'StreamingFCLayer_Batch_2': 64,\n",
        " 'StreamingFCLayer_Batch_3': 64}"
       ]
      },
-     "execution_count": 6,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -277,34 +307,34 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Here, we can see the estimated number of clock cycles each layer will take. Recall that all of these layers will be running in parallel, and the slowest layer will determine the overall throughput of the entire neural network. FINN attempts to parallelize each layer such that they all take a similar number of cycles, and less than the corresponding number of cycles that would be required to meet `target_fps`.\n",
+    "Here, we can see the estimated number of clock cycles each layer will take. Recall that all of these layers will be running in parallel, and the slowest layer will determine the overall throughput of the entire neural network. FINN attempts to parallelize each layer such that they all take a similar number of cycles, and less than the corresponding number of cycles that would be required to meet `target_fps`. Additionally by summing up all layer cycle estimates one can obtain an estimate for the overall latency of the whole network. \n",
     "\n",
     "Finally, we can see the layer-by-layer resource estimates in the `estimate_layer_resources.json` report:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "{'StreamingFCLayer_Batch_0': {'BRAM_18K': 27,\n",
-       "  'BRAM_efficiency': 0.15432098765432098,\n",
-       "  'LUT': 8149,\n",
+       "{'StreamingFCLayer_Batch_0': {'BRAM_18K': 36,\n",
+       "  'BRAM_efficiency': 0.11574074074074074,\n",
+       "  'LUT': 8184,\n",
        "  'URAM': 0,\n",
        "  'URAM_efficiency': 1,\n",
        "  'DSP': 0},\n",
        " 'StreamingFCLayer_Batch_1': {'BRAM_18K': 4,\n",
        "  'BRAM_efficiency': 0.1111111111111111,\n",
-       "  'LUT': 1435,\n",
+       "  'LUT': 1217,\n",
        "  'URAM': 0,\n",
        "  'URAM_efficiency': 1,\n",
        "  'DSP': 0},\n",
        " 'StreamingFCLayer_Batch_2': {'BRAM_18K': 4,\n",
        "  'BRAM_efficiency': 0.1111111111111111,\n",
-       "  'LUT': 1435,\n",
+       "  'LUT': 1217,\n",
        "  'URAM': 0,\n",
        "  'URAM_efficiency': 1,\n",
        "  'DSP': 0},\n",
@@ -314,10 +344,10 @@
        "  'URAM': 0,\n",
        "  'URAM_efficiency': 1,\n",
        "  'DSP': 0},\n",
-       " 'total': {'BRAM_18K': 36.0, 'LUT': 11360.0, 'URAM': 0.0, 'DSP': 0.0}}"
+       " 'total': {'BRAM_18K': 45.0, 'LUT': 10959.0, 'URAM': 0.0, 'DSP': 0.0}}"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -332,7 +362,7 @@
    "source": [
     "This particular report is useful to determine whether the current configuration will fit into a particular FPGA. If you see that the resource requirements are too high for the FPGA you had in mind, you should consider lowering the `target_fps`.\n",
     "\n",
-    "*Note that the analytical models tend to over-estimate how much resources are needed, since they can't capture the effects of various synthesis optimizations.*"
+    "**Note that the analytical models tend to over-estimate how much resources are needed, since they can't capture the effects of various synthesis optimizations.**"
    ]
   },
   {
@@ -341,61 +371,47 @@
    "source": [
     "## Launch a Build: Stitched IP, out-of-context synth and rtlsim Performance <a id=\"build_ip_synth_rtlsim\"></a>\n",
     "\n",
-    "Once we have a configuration that gives satisfactory estimates, we can move on to generating the accelerator. We can do this in different ways depending on how we want to integrate the accelerator into a larger system. For instance, if we have a larger streaming system built in Vivado or if we'd like to re-use this generated accelerator as an IP component in other projects, the `STITCHED_IP` output product is a good choice. We can also use the `OOC_SYNTH` output product to get post-synthesis resource and clock frequency numbers for our accelerator."
+    "Once we have a configuration that gives satisfactory estimates, we can move on to generating the accelerator. We can do this in different ways depending on how we want to integrate the accelerator into a larger system. For instance, if we have a larger streaming system built in Vivado or if we'd like to re-use this generated accelerator as an IP component in other projects, the `STITCHED_IP` output product is a good choice. We can also use the `OOC_SYNTH` output product to get post-synthesis resource and clock frequency numbers for our accelerator.\n",
+    "\n",
+    "<font color=\"red\">**Live FINN tutorial:** These next builds will take about 10 minutes to complete since multiple calls to Vivado and a call to RTL simulation are involved. While this is running, you can examine the generated files with noVNC -- it is running on **(your AWS URL):6080/vnc.html**\n",
+    "\n",
+    "* Once the `step_hls_codegen [8/16]` below is completed, you can view the generated HLS code under its own folder for each layer: `/tmp/finn_dev_ubuntu/code_gen_ipgen_StreamingFCLayer_Batch_XXXXXX`\n",
+    "    \n",
+    "* Once the `step_create_stitched_ip [11/16]` below is completed, you can view the generated stitched IP in Vivado under `/home/ubuntu/finn/notebooks/end2end_example/cybersecurity/output_ipstitch_ooc_rtlsim/stitched_ip`\n",
+    "</font> "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Building dataflow accelerator from cybsec-mlp-verified.onnx\n",
-      "Intermediate outputs will be generated in /tmp/finn_dev_osboxes\n",
-      "Final outputs will be generated in output_ipstitch_ooc_rtlsim\n",
-      "Build log is at output_ipstitch_ooc_rtlsim/build_dataflow.log\n",
-      "Running step: step_tidy_up [1/15]\n",
-      "Running step: step_streamline [2/15]\n",
-      "Running step: step_convert_to_hls [3/15]\n",
-      "Running step: step_create_dataflow_partition [4/15]\n",
-      "Running step: step_target_fps_parallelization [5/15]\n",
-      "Running step: step_apply_folding_config [6/15]\n",
-      "Running step: step_generate_estimate_reports [7/15]\n",
-      "Running step: step_hls_ipgen [8/15]\n",
-      "Running step: step_set_fifo_depths [9/15]\n",
-      "Running step: step_create_stitched_ip [10/15]\n",
-      "Running step: step_measure_rtlsim_performance [11/15]\n",
-      "Running step: step_make_pynq_driver [12/15]\n",
-      "Running step: step_out_of_context_synthesis [13/15]\n",
-      "Running step: step_synthesize_bitfile [14/15]\n",
-      "Running step: step_deployment_package [15/15]\n",
-      "Completed successfully\n"
+      "Previous run results deleted!\n"
      ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "0"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
     }
    ],
    "source": [
     "import finn.builder.build_dataflow as build\n",
     "import finn.builder.build_dataflow_config as build_cfg\n",
+    "import os\n",
+    "import shutil\n",
     "\n",
-    "model_file = \"cybsec-mlp-verified.onnx\"\n",
+    "model_file = \"cybsec-mlp-ready.onnx\"\n",
     "\n",
     "rtlsim_output_dir = \"output_ipstitch_ooc_rtlsim\"\n",
     "\n",
-    "cfg = build.DataflowBuildConfig(\n",
+    "#Delete previous run results if exist\n",
+    "if os.path.exists(rtlsim_output_dir):\n",
+    "    shutil.rmtree(rtlsim_output_dir)\n",
+    "    print(\"Previous run results deleted!\")\n",
+    "\n",
+    "cfg_stitched_ip = build.DataflowBuildConfig(\n",
     "    output_dir          = rtlsim_output_dir,\n",
+    "    mvau_wwidth_max     = 80,\n",
     "    target_fps          = 1000000,\n",
     "    synth_clk_period_ns = 10.0,\n",
     "    fpga_part           = \"xc7z020clg400-1\",\n",
@@ -404,21 +420,76 @@
     "        build_cfg.DataflowOutputType.RTLSIM_PERFORMANCE,\n",
     "        build_cfg.DataflowOutputType.OOC_SYNTH,\n",
     "    ]\n",
-    ")\n",
-    "\n",
-    "build.build_dataflow_cfg(model_file, cfg)"
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Building dataflow accelerator from cybsec-mlp-ready.onnx\n",
+      "Intermediate outputs will be generated in /tmp/finn_dev_ubuntu\n",
+      "Final outputs will be generated in output_ipstitch_ooc_rtlsim\n",
+      "Build log is at output_ipstitch_ooc_rtlsim/build_dataflow.log\n",
+      "Running step: step_tidy_up [1/16]\n",
+      "Running step: step_streamline [2/16]\n",
+      "Running step: step_convert_to_hls [3/16]\n",
+      "Running step: step_create_dataflow_partition [4/16]\n",
+      "Running step: step_target_fps_parallelization [5/16]\n",
+      "Running step: step_apply_folding_config [6/16]\n",
+      "Running step: step_generate_estimate_reports [7/16]\n",
+      "Running step: step_hls_codegen [8/16]\n",
+      "Running step: step_hls_ipgen [9/16]\n",
+      "Running step: step_set_fifo_depths [10/16]\n",
+      "Running step: step_create_stitched_ip [11/16]\n",
+      "Running step: step_measure_rtlsim_performance [12/16]\n",
+      "Running step: step_make_pynq_driver [13/16]\n",
+      "Running step: step_out_of_context_synthesis [14/16]\n",
+      "Running step: step_synthesize_bitfile [15/16]\n",
+      "Running step: step_deployment_package [16/16]\n",
+      "Completed successfully\n",
+      "CPU times: user 4.76 s, sys: 710 ms, total: 5.47 s\n",
+      "Wall time: 8min 5s\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "0"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%time\n",
+    "build.build_dataflow_cfg(model_file, cfg_stitched_ip)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Why is e.g. `step_synthesize_bitfile` listed above even though we didn't ask for a bitfile in the output products? This is because we're using the default set of build steps, which includes `step_synthesize_bitfile`. Since its output product is not selected, this step will do nothing."
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Among the output products, we will find the accelerator exported as IP:"
+    "Among the output products, we will find the accelerator exported as a stitched IP block design:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
@@ -427,9 +498,9 @@
      "text": [
       "all_verilog_srcs.txt\t\t       finn_vivado_stitch_proj.xpr\r\n",
       "finn_vivado_stitch_proj.cache\t       ip\r\n",
-      "finn_vivado_stitch_proj.hbs\t       make_project.sh\r\n",
-      "finn_vivado_stitch_proj.hw\t       make_project.tcl\r\n",
-      "finn_vivado_stitch_proj.ip_user_files  vivado.jou\r\n",
+      "finn_vivado_stitch_proj.hw\t       make_project.sh\r\n",
+      "finn_vivado_stitch_proj.ip_user_files  make_project.tcl\r\n",
+      "finn_vivado_stitch_proj.sim\t       vivado.jou\r\n",
       "finn_vivado_stitch_proj.srcs\t       vivado.log\r\n"
      ]
     }
@@ -447,7 +518,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
@@ -472,7 +543,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
@@ -480,15 +551,15 @@
      "output_type": "stream",
      "text": [
       "{\r\n",
-      "  \"vivado_proj_folder\": \"/tmp/finn_dev_osboxes/synth_out_of_context_wy3b6qf4/results_finn_design_wrapper\",\r\n",
-      "  \"LUT\": 7073.0,\r\n",
-      "  \"FF\": 7534.0,\r\n",
+      "  \"vivado_proj_folder\": \"/tmp/finn_dev_ubuntu/synth_out_of_context_iut077er/results_finn_design_wrapper\",\r\n",
+      "  \"LUT\": 8667.0,\r\n",
+      "  \"FF\": 9063.0,\r\n",
       "  \"DSP\": 0.0,\r\n",
-      "  \"BRAM\": 18.0,\r\n",
-      "  \"WNS\": 0.632,\r\n",
+      "  \"BRAM\": 22.0,\r\n",
+      "  \"WNS\": 0.946,\r\n",
       "  \"\": 0,\r\n",
-      "  \"fmax_mhz\": 106.7463706233988,\r\n",
-      "  \"estimated_throughput_fps\": 1334329.6327924852\r\n",
+      "  \"fmax_mhz\": 110.44842058758559,\r\n",
+      "  \"estimated_throughput_fps\": 1725756.5716810247\r\n",
       "}"
      ]
     }
@@ -501,12 +572,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "In `rtlsim_performance.json` we can find the steady-state throughput and latency for the accelerator, as obtained by rtlsim. If the DRAM bandwidth numbers reported here are below what the hardware platform is capable of (i.e. the accelerator is not memory-bound), you can expect the same steady-state throughput in real hardware."
+    "In `rtlsim_performance.json` we can find the steady-state throughput and latency for the accelerator, as obtained by rtlsim. If the DRAM bandwidth numbers reported here are below what the hardware platform is capable of (i.e. the accelerator is not memory-bound), you can expect the same steady-state throughput (excluding any software/driver overheads) in real hardware."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
@@ -514,14 +585,14 @@
      "output_type": "stream",
      "text": [
       "{\r\n",
-      "  \"cycles\": 838,\r\n",
-      "  \"runtime[ms]\": 0.00838,\r\n",
-      "  \"throughput[images/s]\": 954653.9379474939,\r\n",
-      "  \"DRAM_in_bandwidth[Mb/s]\": 71.59904534606204,\r\n",
-      "  \"DRAM_out_bandwidth[Mb/s]\": 0.11933174224343673,\r\n",
+      "  \"cycles\": 643,\r\n",
+      "  \"runtime[ms]\": 0.00643,\r\n",
+      "  \"throughput[images/s]\": 1088646.967340591,\r\n",
+      "  \"DRAM_in_bandwidth[Mb/s]\": 81.64852255054431,\r\n",
+      "  \"DRAM_out_bandwidth[Mb/s]\": 0.13608087091757387,\r\n",
       "  \"fclk[mhz]\": 100.0,\r\n",
-      "  \"N\": 8,\r\n",
-      "  \"latency_cycles\": 229\r\n",
+      "  \"N\": 7,\r\n",
+      "  \"latency_cycles\": 211\r\n",
       "}"
      ]
     }
@@ -539,7 +610,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
@@ -554,8 +625,8 @@
       "    \"impl_style\": \"rtl\"\r\n",
       "  },\r\n",
       "  \"StreamingFCLayer_Batch_0\": {\r\n",
-      "    \"PE\": 32,\r\n",
-      "    \"SIMD\": 15,\r\n",
+      "    \"PE\": 16,\r\n",
+      "    \"SIMD\": 40,\r\n",
       "    \"ram_style\": \"auto\",\r\n",
       "    \"resType\": \"lut\",\r\n",
       "    \"mem_mode\": \"decoupled\",\r\n",
@@ -565,8 +636,8 @@
       "    \"impl_style\": \"hls\"\r\n",
       "  },\r\n",
       "  \"StreamingFCLayer_Batch_1\": {\r\n",
-      "    \"PE\": 4,\r\n",
-      "    \"SIMD\": 16,\r\n",
+      "    \"PE\": 1,\r\n",
+      "    \"SIMD\": 64,\r\n",
       "    \"ram_style\": \"auto\",\r\n",
       "    \"resType\": \"lut\",\r\n",
       "    \"mem_mode\": \"decoupled\",\r\n",
@@ -576,16 +647,13 @@
       "    \"impl_style\": \"hls\"\r\n",
       "  },\r\n",
       "  \"StreamingFCLayer_Batch_2\": {\r\n",
-      "    \"PE\": 4,\r\n",
-      "    \"SIMD\": 16,\r\n",
+      "    \"PE\": 1,\r\n",
+      "    \"SIMD\": 64,\r\n",
       "    \"ram_style\": \"auto\",\r\n",
       "    \"resType\": \"lut\",\r\n",
       "    \"mem_mode\": \"decoupled\",\r\n",
       "    \"runtime_writeable_weights\": 0\r\n",
       "  },\r\n",
-      "  \"StreamingDataWidthConverter_Batch_2\": {\r\n",
-      "    \"impl_style\": \"hls\"\r\n",
-      "  },\r\n",
       "  \"StreamingFCLayer_Batch_3\": {\r\n",
       "    \"PE\": 1,\r\n",
       "    \"SIMD\": 1,\r\n",
@@ -606,38 +674,78 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Launch a Build: PYNQ Bitfile and Driver <a id=\"build_bitfile_driver\"></a>"
+    "## (Optional) Launch a Build: PYNQ Bitfile and Driver <a id=\"build_bitfile_driver\"></a>\n",
+    "\n",
+    "<font color=\"red\">**Live FINN tutorial:** This section is not included in the hands-on tutorial due to the bitfile synthesis time (15-20 min). If you own a PYNQ board, we encourage you to uncomment the cells below to try it out on your own after the tutorial.</font>"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import finn.builder.build_dataflow as build\n",
+    "import finn.builder.build_dataflow_config as build_cfg\n",
+    "import os\n",
+    "import shutil\n",
+    "\n",
+    "model_file = \"cybsec-mlp-ready.onnx\"\n",
+    "\n",
+    "final_output_dir = \"output_final\"\n",
+    "\n",
+    "#Delete previous run results if exist\n",
+    "if os.path.exists(final_output_dir):\n",
+    "    shutil.rmtree(final_output_dir)\n",
+    "    print(\"Previous run results deleted!\")\n",
+    "\n",
+    "cfg = build.DataflowBuildConfig(\n",
+    "    output_dir          = final_output_dir,\n",
+    "    mvau_wwidth_max     = 80,\n",
+    "    target_fps          = 1000000,\n",
+    "    synth_clk_period_ns = 10.0,\n",
+    "    board               = \"Pynq-Z1\",\n",
+    "    shell_flow_type     = build_cfg.ShellFlowType.VIVADO_ZYNQ,\n",
+    "    generate_outputs=[\n",
+    "        build_cfg.DataflowOutputType.BITFILE,\n",
+    "        build_cfg.DataflowOutputType.PYNQ_DRIVER,\n",
+    "        build_cfg.DataflowOutputType.DEPLOYMENT_PACKAGE,\n",
+    "    ]\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Building dataflow accelerator from cybsec-mlp-verified.onnx\n",
-      "Intermediate outputs will be generated in /tmp/finn_dev_osboxes\n",
+      "Building dataflow accelerator from cybsec-mlp-ready.onnx\n",
+      "Intermediate outputs will be generated in /tmp/finn_dev_ubuntu\n",
       "Final outputs will be generated in output_final\n",
       "Build log is at output_final/build_dataflow.log\n",
-      "Running step: step_tidy_up [1/15]\n",
-      "Running step: step_streamline [2/15]\n",
-      "Running step: step_convert_to_hls [3/15]\n",
-      "Running step: step_create_dataflow_partition [4/15]\n",
-      "Running step: step_target_fps_parallelization [5/15]\n",
-      "Running step: step_apply_folding_config [6/15]\n",
-      "Running step: step_generate_estimate_reports [7/15]\n",
-      "Running step: step_hls_ipgen [8/15]\n",
-      "Running step: step_set_fifo_depths [9/15]\n",
-      "Running step: step_create_stitched_ip [10/15]\n",
-      "Running step: step_measure_rtlsim_performance [11/15]\n",
-      "Running step: step_make_pynq_driver [12/15]\n",
-      "Running step: step_out_of_context_synthesis [13/15]\n",
-      "Running step: step_synthesize_bitfile [14/15]\n",
-      "Running step: step_deployment_package [15/15]\n",
-      "Completed successfully\n"
+      "Running step: step_tidy_up [1/16]\n",
+      "Running step: step_streamline [2/16]\n",
+      "Running step: step_convert_to_hls [3/16]\n",
+      "Running step: step_create_dataflow_partition [4/16]\n",
+      "Running step: step_target_fps_parallelization [5/16]\n",
+      "Running step: step_apply_folding_config [6/16]\n",
+      "Running step: step_generate_estimate_reports [7/16]\n",
+      "Running step: step_hls_codegen [8/16]\n",
+      "Running step: step_hls_ipgen [9/16]\n",
+      "Running step: step_set_fifo_depths [10/16]\n",
+      "Running step: step_create_stitched_ip [11/16]\n",
+      "Running step: step_measure_rtlsim_performance [12/16]\n",
+      "Running step: step_make_pynq_driver [13/16]\n",
+      "Running step: step_out_of_context_synthesis [14/16]\n",
+      "Running step: step_synthesize_bitfile [15/16]\n",
+      "Running step: step_deployment_package [16/16]\n",
+      "Completed successfully\n",
+      "CPU times: user 4.47 s, sys: 766 ms, total: 5.24 s\n",
+      "Wall time: 22min 13s\n"
      ]
     },
     {
@@ -646,33 +754,14 @@
        "0"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "import finn.builder.build_dataflow as build\n",
-    "import finn.builder.build_dataflow_config as build_cfg\n",
-    "\n",
-    "model_file = \"cybsec-mlp-verified.onnx\"\n",
-    "\n",
-    "final_output_dir = \"output_final\"\n",
-    "\n",
-    "cfg = build.DataflowBuildConfig(\n",
-    "    output_dir          = final_output_dir,\n",
-    "    target_fps          = 1000000,\n",
-    "    synth_clk_period_ns = 10.0,\n",
-    "    board               = \"Pynq-Z1\",\n",
-    "    shell_flow_type     = build_cfg.ShellFlowType.VIVADO_ZYNQ,\n",
-    "    generate_outputs=[\n",
-    "        build_cfg.DataflowOutputType.BITFILE,\n",
-    "        build_cfg.DataflowOutputType.PYNQ_DRIVER,\n",
-    "        build_cfg.DataflowOutputType.DEPLOYMENT_PACKAGE,\n",
-    "    ]\n",
-    ")\n",
-    "\n",
-    "build.build_dataflow_cfg(model_file, cfg)"
+    "#%%time\n",
+    "#build.build_dataflow_cfg(model_file, cfg)"
    ]
   },
   {
@@ -684,7 +773,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
@@ -696,7 +785,7 @@
     }
    ],
    "source": [
-    "! ls {final_output_dir}/bitfile"
+    "#! ls {final_output_dir}/bitfile"
    ]
   },
   {
@@ -708,7 +797,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
@@ -720,7 +809,7 @@
     }
    ],
    "source": [
-    "! ls {final_output_dir}/driver"
+    "#! ls {final_output_dir}/driver"
    ]
   },
   {
@@ -732,7 +821,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [
     {
@@ -745,7 +834,7 @@
     }
    ],
    "source": [
-    "! ls {final_output_dir}/report"
+    "#! ls {final_output_dir}/report"
    ]
   },
   {
@@ -757,7 +846,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [
     {
@@ -769,7 +858,138 @@
     }
    ],
    "source": [
-    "! ls {final_output_dir}/deploy"
+    "#! ls {final_output_dir}/deploy"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## (Optional) Run on PYNQ board <a id=\"run_on_pynq\"></a>\n",
+    "\n",
+    "<font color=\"red\">**Live FINN tutorial:** This section is not included in the hands-on tutorial due to the bitfile synthesis time (15-20 min) of the previous section. If you own a PYNQ board, we encourage you to uncomment the cells below to try it out on your own after the tutorial.</font>\n",
+    "\n",
+    "To test the accelerator on the board, we'll put a copy of the dataset and a premade Python script that validates the accuracy into the `driver` folder, then make a zip archive of the whole deployment folder."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#! cp unsw_nb15_binarized.npz {final_output_dir}/deploy/driver"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#! cp validate-unsw-nb15.py {final_output_dir}/deploy/driver"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "driver.py\tfinn\t\t unsw_nb15_binarized.npz  validate.py\r\n",
+      "driver_base.py\truntime_weights  validate-unsw-nb15.py\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "#! ls {final_output_dir}/deploy/driver"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'/workspace/finn/notebooks/end2end_example/cybersecurity/deploy-on-pynq.zip'"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#from shutil import make_archive\n",
+    "#make_archive('deploy-on-pynq', 'zip', final_output_dir+\"/deploy\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can now download the created zipfile (**File -> Open**, mark the checkbox next to the `deploy-on-pynq.zip` and select Download from the toolbar), then copy it to your PYNQ board (for instance via `scp` or `rsync`). Then, run the following commands **on the PYNQ board** to extract the archive and run the validation:"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "```shell\n",
+    "unzip deploy-on-pynq.zip -d finn-cybsec-mlp-demo\n",
+    "cd finn-cybsec-mlp-demo/driver\n",
+    "sudo python3.6 -m pip install bitstring\n",
+    "sudo python3.6 validate-unsw-nb15.py --batchsize 1000\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You should see `Final accuracy: 91.868293` at the end. You may have noticed that the validation doesn't *quite* run at 1M inferences per second. This is because of the Python packing/unpacking and data movement overheads. To see this in more detail, the generated driver includes a benchmarking mode that shows the runtime breakdown:"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "```shell\n",
+    "sudo python3.6 driver.py --exec_mode throughput_test --bitfile ../bitfile/finn-accel.bit --batchsize 1000\n",
+    "cat nw_metrics.txt\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "```{'runtime[ms]': 1.0602474212646484,\n",
+    " 'throughput[images/s]': 943176.0737575893,\n",
+    " 'DRAM_in_bandwidth[Mb/s]': 70.7382055318192,\n",
+    " 'DRAM_out_bandwidth[Mb/s]': 0.9431760737575894,\n",
+    " 'fclk[mhz]': 100.0,\n",
+    " 'batch_size': 1000,\n",
+    " 'fold_input[ms]': 9.679794311523438e-05,\n",
+    " 'pack_input[ms]': 0.060115814208984375,\n",
+    " 'copy_input_data_to_device[ms]': 0.002428770065307617,\n",
+    " 'copy_output_data_from_device[ms]': 0.0005249977111816406,\n",
+    " 'unpack_output[ms]': 0.3773000240325928,\n",
+    " 'unfold_output[ms]': 6.818771362304688e-05}```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here, the various `pack_input/unpack_output` calls show the overhead of packing/unpacking the inputs/outputs to convert from numpy arrays to the bit-contiguous data representation our accelerator expects. The `copy_input_data_to_device` and `copy_output_data_from_device` indicate the cost of moving the data between the CPU and accelerator memories. These overheads can dominate the execution time when running with small batch sizes.\n",
+    "\n",
+    "Finally, we can see that `throughput[images/s]`, which is the pure hardware throughput without any software and data movement overheads, is close to 1M inferences per second."
    ]
   },
   {
diff --git a/notebooks/end2end_example/cybersecurity/validate-unsw-nb15.py b/notebooks/end2end_example/cybersecurity/validate-unsw-nb15.py
new file mode 100644
index 0000000000000000000000000000000000000000..622c69c8d0abdf8025b0486c63bf336e4f8675f5
--- /dev/null
+++ b/notebooks/end2end_example/cybersecurity/validate-unsw-nb15.py
@@ -0,0 +1,103 @@
+# Copyright (c) 2020 Xilinx, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of Xilinx nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+from driver import io_shape_dict
+from driver_base import FINNExampleOverlay
+import numpy as np
+
+
+def make_unsw_nb15_test_batches(bsize, dataset_root):
+    unsw_nb15_data = np.load(dataset_root + "/unsw_nb15_binarized.npz")["test"][:82000]
+    test_imgs = unsw_nb15_data[:, :-1]
+    test_labels = unsw_nb15_data[:, -1]
+    n_batches = int(test_imgs.shape[0] / bsize)
+    test_imgs = test_imgs.reshape(n_batches, bsize, -1)
+    test_labels = test_labels.reshape(n_batches, bsize)
+    return (test_imgs, test_labels)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Validate top-1 accuracy for FINN-generated accelerator"
+    )
+    parser.add_argument(
+        "--batchsize", help="number of samples for inference", type=int, default=1000
+    )
+    parser.add_argument(
+        "--platform", help="Target platform: zynq-iodma alveo", default="zynq-iodma"
+    )
+    parser.add_argument(
+        "--bitfile",
+        help='name of bitfile (i.e. "resizer.bit")',
+        default="../bitfile/finn-accel.bit",
+    )
+    parser.add_argument(
+        "--dataset_root", help="dataset root dir for download/reuse", default="."
+    )
+    # parse arguments
+    args = parser.parse_args()
+    bsize = args.batchsize
+    bitfile = args.bitfile
+    platform = args.platform
+    dataset_root = args.dataset_root
+
+    print("Loading dataset...")
+    (test_imgs, test_labels) = make_unsw_nb15_test_batches(bsize, dataset_root)
+
+    ok = 0
+    nok = 0
+    n_batches = test_imgs.shape[0]
+    total = n_batches * bsize
+
+    print("Initializing driver, flashing bitfile...")
+
+    driver = FINNExampleOverlay(
+        bitfile_name=bitfile,
+        platform=platform,
+        io_shape_dict=io_shape_dict,
+        batch_size=bsize,
+    )
+
+    n_batches = int(total / bsize)
+
+    print("Starting...")
+
+    for i in range(n_batches):
+        inp = np.pad(test_imgs[i].astype(np.float32), [(0, 0), (0, 7)], mode="constant")
+        exp = test_labels[i].astype(np.float32)
+        inp = 2 * inp - 1
+        exp = 2 * exp - 1
+        out = driver.execute(inp)
+        matches = np.count_nonzero(out.flatten() == exp.flatten())
+        nok += bsize - matches
+        ok += matches
+        print("batch %d / %d : total OK %d NOK %d" % (i + 1, n_batches, ok, nok))
+
+    acc = 100.0 * ok / (total)
+    print("Final accuracy: %f" % acc)
diff --git a/run-docker.sh b/run-docker.sh
index 012dce9b9aa25bfcd4499c067eb9e6370835b22c..19c9f7bd3ce18819bca7bd2f24732b1fa18841cc 100755
--- a/run-docker.sh
+++ b/run-docker.sh
@@ -89,7 +89,9 @@ SCRIPTPATH=$(dirname "$SCRIPT")
 # the settings below will be taken from environment variables if available,
 # otherwise the defaults below will be used
 : ${JUPYTER_PORT=8888}
+: ${JUPYTER_PASSWD_HASH=""}
 : ${NETRON_PORT=8081}
+: ${LOCALHOST_URL="localhost"}
 : ${PYNQ_USERNAME="xilinx"}
 : ${PYNQ_PASSWORD="xilinx"}
 : ${PYNQ_BOARD="Pynq-Z1"}
@@ -116,7 +118,12 @@ elif [ "$1" = "quicktest" ]; then
   DOCKER_CMD="quicktest.sh"
 elif [ "$1" = "notebook" ]; then
   gecho "Running Jupyter notebook server"
-  DOCKER_CMD="jupyter notebook --ip=0.0.0.0 --port $JUPYTER_PORT notebooks"
+  if [ -z "$JUPYTER_PASSWD_HASH" ]; then
+    JUPYTER_PASSWD_ARG=""
+  else
+    JUPYTER_PASSWD_ARG="--NotebookApp.password='$JUPYTER_PASSWD_HASH'"
+  fi
+  DOCKER_CMD="jupyter notebook --no-browser --ip=0.0.0.0 --port $JUPYTER_PORT $JUPYTER_PASSWD_ARG notebooks"
   DOCKER_EXTRA+="-e JUPYTER_PORT=$JUPYTER_PORT "
   DOCKER_EXTRA+="-e NETRON_PORT=$NETRON_PORT "
   DOCKER_EXTRA+="-p $JUPYTER_PORT:$JUPYTER_PORT "
@@ -185,6 +192,7 @@ DOCKER_EXEC+="-v $FINN_HOST_BUILD_DIR:$FINN_HOST_BUILD_DIR "
 DOCKER_EXEC+="-v $FINN_SSH_KEY_DIR:/home/$DOCKER_UNAME/.ssh "
 DOCKER_EXEC+="-e FINN_BUILD_DIR=$FINN_HOST_BUILD_DIR "
 DOCKER_EXEC+="-e FINN_ROOT="/workspace/finn" "
+DOCKER_EXEC+="-e LOCALHOST_URL=$LOCALHOST_URL "
 DOCKER_EXEC+="-e VIVADO_IP_CACHE=$VIVADO_IP_CACHE "
 DOCKER_EXEC+="-e PYNQ_BOARD=$PYNQ_BOARD "
 DOCKER_EXEC+="-e PYNQ_IP=$PYNQ_IP "
diff --git a/src/finn/builder/build_dataflow_config.py b/src/finn/builder/build_dataflow_config.py
index b206e00a2eb6da1d76ccf57c078b16f61868a98c..bd938f17411ee42e94e95e02776ad8e973ea10fa 100644
--- a/src/finn/builder/build_dataflow_config.py
+++ b/src/finn/builder/build_dataflow_config.py
@@ -114,9 +114,9 @@ default_build_dataflow_steps = [
     "step_set_fifo_depths",
     "step_create_stitched_ip",
     "step_measure_rtlsim_performance",
-    "step_make_pynq_driver",
     "step_out_of_context_synthesis",
     "step_synthesize_bitfile",
+    "step_make_pynq_driver",
     "step_deployment_package",
 ]
 
diff --git a/src/finn/custom_op/fpgadataflow/addstreams_batch.py b/src/finn/custom_op/fpgadataflow/addstreams_batch.py
index a4d47fee4a3c046e7a0149b652e3a2f016cbc1a4..fd764212b865d778993e69ec673b0a46180b301a 100644
--- a/src/finn/custom_op/fpgadataflow/addstreams_batch.py
+++ b/src/finn/custom_op/fpgadataflow/addstreams_batch.py
@@ -63,7 +63,7 @@ class AddStreams_Batch(HLSCustomOp):
         ishape = tuple(vecs + [ich])
         return ishape
 
-    def get_folded_input_shape(self):
+    def get_folded_input_shape(self, ind=0):
         ich = self.get_nodeattr("NumChannels")
         pe = self.get_nodeattr("PE")
         assert ich % pe == 0, "PE must divide NumChannels"
@@ -363,5 +363,6 @@ class AddStreams_Batch(HLSCustomOp):
     def get_verilog_top_module_intf_names(self):
         intf_names = super().get_verilog_top_module_intf_names()
         sname = self.hls_sname()
-        intf_names["s_axis"] = [x + "_" + sname for x in ["in0", "in1"]]
+        swidth = self.get_instream_width_padded()
+        intf_names["s_axis"] = [(x + "_" + sname, swidth) for x in ["in0", "in1"]]
         return intf_names
diff --git a/src/finn/custom_op/fpgadataflow/duplicatestreams_batch.py b/src/finn/custom_op/fpgadataflow/duplicatestreams_batch.py
index 517e708ef3feb5d071bc5e8e1e736da38d8742fb..764dee98e278a1d5088419fdb3fedd64a55e17ee 100644
--- a/src/finn/custom_op/fpgadataflow/duplicatestreams_batch.py
+++ b/src/finn/custom_op/fpgadataflow/duplicatestreams_batch.py
@@ -312,7 +312,7 @@ class DuplicateStreams_Batch(HLSCustomOp):
     def docompute(self):
         self.code_gen_dict["$DOCOMPUTE$"] = [
             """DuplicateStreams_Batch<{}, {}> (in0, out0, out1, 1);""".format(
-                self.get_outstream_width(), self.get_number_output_values() // 2,
+                self.get_outstream_width(), self.get_number_output_values() // 2
             )
         ]
 
@@ -379,6 +379,6 @@ class DuplicateStreams_Batch(HLSCustomOp):
     def get_verilog_top_module_intf_names(self):
         intf_names = super().get_verilog_top_module_intf_names()
         sname = self.hls_sname()
-        intf_names["m_axis"] = [x + "_" + sname for x in ["out0", "out1"]]
-
+        swidth = self.get_outstream_width_padded()
+        intf_names["m_axis"] = [(x + "_" + sname, swidth) for x in ["out0", "out1"]]
         return intf_names
diff --git a/src/finn/custom_op/fpgadataflow/hlscustomop.py b/src/finn/custom_op/fpgadataflow/hlscustomop.py
index 3b8054a767c81cac68f9c4fe9cddffa19ff0de4e..2af08b920804881211dae81c61f88a913c315e77 100644
--- a/src/finn/custom_op/fpgadataflow/hlscustomop.py
+++ b/src/finn/custom_op/fpgadataflow/hlscustomop.py
@@ -132,16 +132,17 @@ class HLSCustomOp(CustomOp):
         """Return a dict of names of input and output interfaces.
         The keys reflect the protocols each interface implements:
         'clk', 'rst', 'm_axis', 's_axis', 'aximm', 'axilite'.
-        Values are lists of names:
-        's_axis' names correspond to the list of node inputs in order,
-        'm_axis' names correspond to the list of node outputs in order'
+        Values are lists of tuples (axis, aximm) or names (axilite):
+        'axis' tuples correspond to the list of node inputs in order,
+        each tuple is (interface_name, interface_width_bits).
+        axilite always assumed to be 32 bits and is not tuple (name only).
         Each block must have at most one aximm and one axilite."""
         intf_names = {}
         intf_names["clk"] = ["ap_clk"]
         intf_names["rst"] = ["ap_rst_n"]
         sname = self.hls_sname()
-        intf_names["s_axis"] = ["in0_" + sname]
-        intf_names["m_axis"] = ["out_" + sname]
+        intf_names["s_axis"] = [("in0_" + sname, self.get_instream_width_padded())]
+        intf_names["m_axis"] = [("out_" + sname, self.get_outstream_width_padded())]
         intf_names["aximm"] = []
         intf_names["axilite"] = []
         return intf_names
diff --git a/src/finn/custom_op/fpgadataflow/iodma.py b/src/finn/custom_op/fpgadataflow/iodma.py
index 7b47cd657d60aaa27c91a88b84727b35ac267ee4..d812d018fbeb062da8414cceb9f3dc13ddf8f52d 100644
--- a/src/finn/custom_op/fpgadataflow/iodma.py
+++ b/src/finn/custom_op/fpgadataflow/iodma.py
@@ -399,13 +399,10 @@ class IODMA(HLSCustomOp):
 
     def get_verilog_top_module_intf_names(self):
         intf_names = super().get_verilog_top_module_intf_names()
-        sname = self.hls_sname()
         if self.get_nodeattr("direction") == "out":
-            intf_names["s_axis"] = ["in0_" + sname]
             intf_names["m_axis"] = []
         else:
             intf_names["s_axis"] = []
-            intf_names["m_axis"] = ["out_" + sname]
         intf_names["axilite"] = ["s_axi_control"]
-        intf_names["aximm"] = ["m_axi_gmem"]
+        intf_names["aximm"] = [("m_axi_gmem", self.get_nodeattr("intfWidth"))]
         return intf_names
diff --git a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py
index 27238fc2a8764ea1fe357ffae4e884429af3e13e..b276d33876b488073f94a9e58030e4a6ce0a12e3 100644
--- a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py
+++ b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py
@@ -389,8 +389,8 @@ class StreamingDataWidthConverter_Batch(HLSCustomOp):
             # create a hierarchy for this layer, with the same port names
             clk_name = self.get_verilog_top_module_intf_names()["clk"][0]
             rst_name = self.get_verilog_top_module_intf_names()["rst"][0]
-            dout_name = self.get_verilog_top_module_intf_names()["m_axis"][0]
-            din_name = self.get_verilog_top_module_intf_names()["s_axis"][0]
+            dout_name = self.get_verilog_top_module_intf_names()["m_axis"][0][0]
+            din_name = self.get_verilog_top_module_intf_names()["s_axis"][0][0]
             cmd.append("create_bd_cell -type hier %s" % node_name)
             cmd.append("create_bd_pin -dir I -type clk /%s/%s" % (node_name, clk_name))
             cmd.append("create_bd_pin -dir I -type rst /%s/%s" % (node_name, rst_name))
diff --git a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
index b986a60717f9cf1708fbe7cb0b6f858bb5259ba9..bdcbf995280c3fb641e91d303c714982a98094f5 100644
--- a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
+++ b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
@@ -444,12 +444,24 @@ class StreamingFCLayer_Batch(HLSCustomOp):
         single_pe_w = simd * weight_bits
         return max([weightstream, max_of_io, single_pe_w])
 
-    def get_folded_input_shape(self):
+    def get_folded_input_shape(self, ind=0):
         mw = self.get_nodeattr("MW")
+        mh = self.get_nodeattr("MH")
         simd = self.get_nodeattr("SIMD")
+        pe = self.get_nodeattr("PE")
         sf = mw // simd
+        nf = mh // pe
         vecs = list(self.get_nodeattr("numInputVectors"))
-        folded_input_shape = tuple(vecs + [sf, simd])
+
+        if ind == 0:
+            # calculate shape of input 0
+            folded_input_shape = tuple(vecs + [sf, simd])
+        elif ind == 1 and self.get_nodeattr("mem_mode") == "external":
+            # calculate shape of input 1 (weights)
+            folded_input_shape = tuple(vecs + [sf * nf, simd * pe])
+        else:
+            raise Exception("Undefined input shape for requested input")
+
         return folded_input_shape
 
     def get_folded_output_shape(self):
@@ -1254,8 +1266,8 @@ class StreamingFCLayer_Batch(HLSCustomOp):
             # create a hierarchy for this layer, with the same port names
             clk_name = self.get_verilog_top_module_intf_names()["clk"][0]
             rst_name = self.get_verilog_top_module_intf_names()["rst"][0]
-            dout_name = self.get_verilog_top_module_intf_names()["m_axis"][0]
-            din_name = self.get_verilog_top_module_intf_names()["s_axis"][0]
+            dout_name = self.get_verilog_top_module_intf_names()["m_axis"][0][0]
+            din_name = self.get_verilog_top_module_intf_names()["s_axis"][0][0]
             cmd.append("create_bd_cell -type hier %s" % node_name)
             cmd.append("create_bd_pin -dir I -type clk /%s/%s" % (node_name, clk_name))
             cmd.append("create_bd_pin -dir I -type rst /%s/%s" % (node_name, rst_name))
@@ -1349,8 +1361,8 @@ class StreamingFCLayer_Batch(HLSCustomOp):
                 # TODO calculate and pass in segment size here
                 cmd.append("assign_bd_address")
             cmd.append("save_bd_design")
-        elif mem_mode == "const":
-            # base class impl sufficient for const mode
+        elif mem_mode == "const" or mem_mode == "external":
+            # base class impl sufficient for const/external modes
             return super().code_generation_ipi()
         else:
             raise Exception("Unrecognized mem_mode for StreamingFCLayer")
@@ -1361,7 +1373,9 @@ class StreamingFCLayer_Batch(HLSCustomOp):
         mem_mode = self.get_nodeattr("mem_mode")
         sname = self.hls_sname()
         if mem_mode == "external":
-            intf_names["s_axis"] = ["in0_" + sname, "weights_" + sname]
+            intf_names["s_axis"].append(
+                ("weights_" + sname, self.get_weightstream_width_padded())
+            )
         if mem_mode == "decoupled":
             # only expose axilite interface if attribute is set
             runtime_writable = self.get_nodeattr("runtime_writeable_weights") == 1
diff --git a/src/finn/custom_op/fpgadataflow/streamingfifo.py b/src/finn/custom_op/fpgadataflow/streamingfifo.py
index 728d076353bdc3c112b272f2691c22029f1789e8..3a2aa5016519b4334ca7d233edb90c8a71826458 100644
--- a/src/finn/custom_op/fpgadataflow/streamingfifo.py
+++ b/src/finn/custom_op/fpgadataflow/streamingfifo.py
@@ -345,8 +345,8 @@ class StreamingFIFO(HLSCustomOp):
             # create a hierarchy for this layer, with the same port names
             clk_name = self.get_verilog_top_module_intf_names()["clk"][0]
             rst_name = self.get_verilog_top_module_intf_names()["rst"][0]
-            dout_name = self.get_verilog_top_module_intf_names()["m_axis"][0]
-            din_name = self.get_verilog_top_module_intf_names()["s_axis"][0]
+            dout_name = self.get_verilog_top_module_intf_names()["m_axis"][0][0]
+            din_name = self.get_verilog_top_module_intf_names()["s_axis"][0][0]
             cmd.append("create_bd_cell -type hier %s" % node_name)
             cmd.append("create_bd_pin -dir I -type clk /%s/%s" % (node_name, clk_name))
             cmd.append("create_bd_pin -dir I -type rst /%s/%s" % (node_name, rst_name))
diff --git a/src/finn/custom_op/fpgadataflow/thresholding_batch.py b/src/finn/custom_op/fpgadataflow/thresholding_batch.py
index dbdfb994dbd7ac41b57c766e7e4c30fc6b874b0c..eb09d4c529e81908a389b5ec4bc3dabb3dcb95ef 100644
--- a/src/finn/custom_op/fpgadataflow/thresholding_batch.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_batch.py
@@ -604,7 +604,7 @@ class Thresholding_Batch(HLSCustomOp):
         numReps = numInputVectors[0]
         self.code_gen_dict["$DEFINES$"] = [
             """#define NumChannels1 {}\n #define PE1 {}\n #define numReps {}""".format(
-                self.get_nodeattr("NumChannels"), self.get_nodeattr("PE"), numReps,
+                self.get_nodeattr("NumChannels"), self.get_nodeattr("PE"), numReps
             )
         ]
         if self.get_nodeattr("mem_mode") == "decoupled":
@@ -686,7 +686,7 @@ class Thresholding_Batch(HLSCustomOp):
             self.code_gen_dict["$DOCOMPUTE$"] = [
                 """{}<{}, NumChannels1, PE1, {}, {}>
                 (in0, out, threshs, numReps);""".format(
-                    node.op_type, imgdim, tmpl_args["TSrcI"], tmpl_args["TDstI"],
+                    node.op_type, imgdim, tmpl_args["TSrcI"], tmpl_args["TDstI"]
                 )
             ]
         elif mem_mode == "decoupled":
@@ -826,8 +826,8 @@ class Thresholding_Batch(HLSCustomOp):
             # create a hierarchy for this layer, with the same port names
             clk_name = self.get_verilog_top_module_intf_names()["clk"][0]
             rst_name = self.get_verilog_top_module_intf_names()["rst"][0]
-            dout_name = self.get_verilog_top_module_intf_names()["m_axis"][0]
-            din_name = self.get_verilog_top_module_intf_names()["s_axis"][0]
+            dout_name = self.get_verilog_top_module_intf_names()["m_axis"][0][0]
+            din_name = self.get_verilog_top_module_intf_names()["s_axis"][0][0]
             cmd.append("create_bd_cell -type hier %s" % node_name)
             cmd.append("create_bd_pin -dir I -type clk /%s/%s" % (node_name, clk_name))
             cmd.append("create_bd_pin -dir I -type rst /%s/%s" % (node_name, rst_name))
diff --git a/src/finn/qnn-data/cybsec-mlp/state_dict.pth b/src/finn/qnn-data/cybsec-mlp/state_dict.pth
new file mode 100644
index 0000000000000000000000000000000000000000..53c002e3fa6f2ae3e7c8f0abb71fa446d80a8f09
Binary files /dev/null and b/src/finn/qnn-data/cybsec-mlp/state_dict.pth differ
diff --git a/src/finn/qnn-data/cybsec-mlp/validate-unsw-nb15.py b/src/finn/qnn-data/cybsec-mlp/validate-unsw-nb15.py
new file mode 100644
index 0000000000000000000000000000000000000000..2fabc716a66a3cc24697e49aa26ec3bbbb231b43
--- /dev/null
+++ b/src/finn/qnn-data/cybsec-mlp/validate-unsw-nb15.py
@@ -0,0 +1,109 @@
+# Copyright (c) 2020 Xilinx, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of Xilinx nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+from driver import io_shape_dict
+from driver_base import FINNExampleOverlay
+import numpy as np
+
+
+def make_unsw_nb15_test_batches(bsize, dataset_root, limit_batches):
+    unsw_nb15_data = np.load(dataset_root + "/unsw_nb15_binarized.npz")["test"][:82000]
+    test_imgs = unsw_nb15_data[:, :-1]
+    test_labels = unsw_nb15_data[:, -1]
+    n_batches = int(test_imgs.shape[0] / bsize)
+    if limit_batches == -1:
+        limit_batches = n_batches
+    test_imgs = test_imgs.reshape(n_batches, bsize, -1)[:limit_batches]
+    test_labels = test_labels.reshape(n_batches, bsize)[:limit_batches]
+    return (test_imgs, test_labels)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Validate top-1 accuracy for FINN-generated accelerator"
+    )
+    parser.add_argument("--batchsize", help="samples per batch", type=int, default=1000)
+    parser.add_argument(
+        "--platform", help="Target platform: zynq-iodma alveo", default="zynq-iodma"
+    )
+    parser.add_argument(
+        "--bitfile",
+        help='name of bitfile (i.e. "resizer.bit")',
+        default="../bitfile/finn-accel.bit",
+    )
+    parser.add_argument(
+        "--dataset_root", help="dataset root dir for download/reuse", default="."
+    )
+    parser.add_argument(
+        "--limit_batches", help="number of batches, -1 for max", type=int, default=-1
+    )
+    # parse arguments
+    args = parser.parse_args()
+    bsize = args.batchsize
+    bitfile = args.bitfile
+    platform = args.platform
+    dataset_root = args.dataset_root
+    limit_batches = args.limit_batches
+
+    print("Loading dataset...")
+    (test_imgs, test_labels) = make_unsw_nb15_test_batches(
+        bsize, dataset_root, limit_batches
+    )
+
+    ok = 0
+    nok = 0
+    n_batches = test_imgs.shape[0]
+    total = n_batches * bsize
+
+    print("Initializing driver, flashing bitfile...")
+
+    driver = FINNExampleOverlay(
+        bitfile_name=bitfile,
+        platform=platform,
+        io_shape_dict=io_shape_dict,
+        batch_size=bsize,
+    )
+
+    n_batches = int(total / bsize)
+
+    print("Starting...")
+
+    for i in range(n_batches):
+        inp = np.pad(test_imgs[i].astype(np.float32), [(0, 0), (0, 7)], mode="constant")
+        exp = test_labels[i].astype(np.float32)
+        inp = 2 * inp - 1
+        exp = 2 * exp - 1
+        out = driver.execute(inp)
+        matches = np.count_nonzero(out.flatten() == exp.flatten())
+        nok += bsize - matches
+        ok += matches
+        print("batch %d / %d : total OK %d NOK %d" % (i + 1, n_batches, ok, nok))
+
+    acc = 100.0 * ok / (total)
+    print("Final accuracy: %f" % acc)
diff --git a/src/finn/qnn-data/templates/driver/driver_base.py b/src/finn/qnn-data/templates/driver/driver_base.py
index ef16a537ce18c52ea42ce9178a7178e8f8b667dd..df3c9881372659a4d8f6fceb8a385e6055c161e1 100644
--- a/src/finn/qnn-data/templates/driver/driver_base.py
+++ b/src/finn/qnn-data/templates/driver/driver_base.py
@@ -37,6 +37,9 @@ from finn.util.data_packing import (
     packed_bytearray_to_finnpy,
 )
 
+from finn.util.basic import gen_finn_dt_tensor
+from finn.core.datatype import DataType
+
 # Driver base class for FINN-generated dataflow accelerators.
 # The particulars of the generated accelerator are specified via the
 # io_shape_dict (generated by the MakePYNQDriver transformation).
@@ -84,25 +87,78 @@ class FINNExampleOverlay(Overlay):
         self.batch_size = batch_size
         self.fclk_mhz = fclk_mhz
         if self.platform == "alveo":
-            self.idma = self.idma0
+            if "input_dma_name" in io_shape_dict.keys():
+                self.idma = getattr(self, io_shape_dict["input_dma_name"])
+            else:
+                self.idma = self.idma0
             self.odma = self.odma0
             self.odma_handle = None
         elif self.platform == "zynq-iodma":
-            self.idma = self.idma0
+            if "input_dma_name" in io_shape_dict.keys():
+                self.idma = getattr(self, io_shape_dict["input_dma_name"])
+            else:
+                self.idma = self.idma0
             self.odma = self.odma0
             # set the clock frequency as specified by user during transformations
             if self.fclk_mhz > 0:
                 Clocks.fclk0_mhz = self.fclk_mhz
         else:
             raise ValueError("Supported platforms are zynq-iodma alveo")
-        # load any runtime weights
+        # load any external + runtime weights
+        self.load_external_weights()
         self.load_runtime_weights()
 
+    def load_external_weights(self):
+        """Load any existing external (DRAM) weights from the specified dir into the
+        appropriate layer of the accelerator. Note that this must be enabled
+        during the accelerator build process. The weights directory
+        is specified as the class member ``runtime_weight_dir``. External (DRAM)
+        weights are one .npy file per layer.
+        """
+
+        self.external_weights = []
+        w_filenames = []
+        if not os.path.isdir(self.runtime_weight_dir):
+            return
+        for (dirpath, dirnames, filenames) in os.walk(self.runtime_weight_dir):
+            w_filenames.extend(filenames)
+
+        tmp_weight_dict = {}
+
+        for w_filename in w_filenames:
+            if w_filename.endswith(".npy"):
+                weight_tensor = np.load(self.runtime_weight_dir + "/" + w_filename)
+            else:
+                continue
+
+            idma_name = w_filename.split(".")[0]
+            tmp_weight_dict[idma_name] = weight_tensor
+
+        for idma_name in tmp_weight_dict.keys():
+            if idma_name in self.ip_dict.keys():
+                iwdma = getattr(self, idma_name)
+                weight_tensor = tmp_weight_dict[idma_name]
+                weight_buf = allocate(weight_tensor.shape, dtype=np.uint8)
+                weight_buf[:] = weight_tensor
+                # weight_buf.sync_to_device()
+                weight_buf.flush()
+
+                self.external_weights += [(iwdma, weight_buf, idma_name)]
+
+        if "number_of_external_weights" in self._io_shape_dict:
+            hw_ext_weights = self._io_shape_dict["number_of_external_weights"]
+            assert len(self.external_weights) == hw_ext_weights, (
+                "Number of hardware external weights and number of external "
+                + "weight tensors available do not match. \n"
+                + "Is runtime_weight_dir pointing to the correct folder?"
+            )
+
     def load_runtime_weights(self, flush_accel=True, verify=True):
-        """Load any existing runtime weights from the specified dir into the
+        """Load any existing runtime-writable weights from the specified dir into the
         appropriate layer of the accelerator. Note that this must be enabled
         during the accelerator build process. The runtime weights directory
-        is specified as the class member ``runtime_weight_dir``.
+        is specified as the class member ``runtime_weight_dir``. Runtime-writable
+        weights are provided as one .dat file per layer.
 
         Parameters
         ----------
@@ -122,18 +178,25 @@ class FINNExampleOverlay(Overlay):
             if w_filename.endswith(".dat"):
                 with open(self.runtime_weight_dir + "/" + w_filename, "r") as f:
                     dat = f.read()
+            else:
+                continue
             layer_w = np.fromiter(
                 [int(x, 16) for x in dat.strip().split()], dtype=np.uint32
             )
-            layer_ind = int(w_filename.split("_")[0])
-            rt_weight_dict[layer_ind] = layer_w
-        for layer_ind in rt_weight_dict.keys():
-            cand_if_name = "StreamingDataflowPartition_1/s_axilite_%d" % layer_ind
+            sdp_ind = int(w_filename.split("_")[0])
+            layer_ind = int(w_filename.split("_")[1])
+            rt_weight_dict[(sdp_ind, layer_ind)] = layer_w
+        for sdp_ind, layer_ind in rt_weight_dict.keys():
+            cand_if_name = "StreamingDataflowPartition_%d/s_axilite_%d" % (
+                sdp_ind,
+                layer_ind,
+            )
             if cand_if_name in self.ip_dict.keys():
                 layer_mmio = getattr(
-                    self.StreamingDataflowPartition_1, "s_axilite_%d" % layer_ind
+                    getattr(self, "StreamingDataflowPartition_%d" % sdp_ind),
+                    "s_axilite_%d" % layer_ind,
                 ).mmio
-                layer_w = rt_weight_dict[layer_ind]
+                layer_w = rt_weight_dict[(sdp_ind, layer_ind)]
                 layer_mmio.write_mm(0, layer_w.tobytes())
                 if verify:
                     new_w = np.copy(layer_mmio.array[: layer_w.shape[0]])
@@ -278,6 +341,10 @@ class FINNExampleOverlay(Overlay):
         if self.platform == "zynq-iodma":
             assert self.odma.read(0x00) & 0x4 != 0, "Output DMA is not idle"
             # manually launch IODMAs since signatures are missing
+            for iwdma, iwbuf, iwdma_name in self.external_weights:
+                iwdma.write(0x10, iwbuf.device_address)
+                iwdma.write(0x1C, batch_size)
+                iwdma.write(0x00, 1)
             self.idma.write(0x10, self.ibuf_packed_device.device_address)
             self.idma.write(0x1C, batch_size)
             self.odma.write(0x10, self.obuf_packed_device.device_address)
@@ -287,6 +354,8 @@ class FINNExampleOverlay(Overlay):
         elif self.platform == "alveo":
             assert self.odma_handle is None, "Output DMA is already running"
             self.idma.start(self.ibuf_packed_device, batch_size)
+            for iwdma, iwbuf, iwdma_name in self.external_weights:
+                iwdma.start(iwbuf, batch_size)
             self.odma_handle = self.odma.start(self.obuf_packed_device, batch_size)
         else:
             raise Exception("Unrecognized platform: %s" % self.platform)
@@ -338,46 +407,55 @@ class FINNExampleOverlay(Overlay):
         res["DRAM_out_bandwidth[Mb/s]"] = (
             np.prod(self.oshape_packed) * 0.000001 / runtime
         )
+        for iwdma, iwbuf, iwdma_name in self.external_weights:
+            res["DRAM_extw_%s_bandwidth[Mb/s]" % iwdma_name] = (
+                self.batch_size * np.prod(iwbuf.shape) * 0.000001 / runtime
+            )
         if self.platform == "zynq-iodma":
             res["fclk[mhz]"] = Clocks.fclk0_mhz
         elif self.platform == "alveo":
             res["fclk[mhz]"] = self.clock_dict["clock0"]["frequency"]
         res["batch_size"] = self.batch_size
         # also benchmark driver-related overheads
-        input_npy = np.zeros(self.ishape_normal, dtype=self.idt.to_numpy_dt())
+        input_npy = gen_finn_dt_tensor(self.idt, self.ishape_normal)
+        # provide as int8/uint8 to support fast packing path where possible
+        if self.idt == DataType.UINT8:
+            input_npy = input_npy.astype(np.uint8)
+        elif self.idt == DataType.INT8:
+            input_npy = input_npy.astype(np.int8)
         start = time.time()
         ibuf_folded = self.fold_input(input_npy)
         end = time.time()
         runtime = end - start
-        res["fold_input[ms]"] = runtime
+        res["fold_input[ms]"] = runtime * 1000
 
         start = time.time()
         ibuf_packed = self.pack_input(ibuf_folded)
         end = time.time()
         runtime = end - start
-        res["pack_input[ms]"] = runtime
+        res["pack_input[ms]"] = runtime * 1000
 
         start = time.time()
         self.copy_input_data_to_device(ibuf_packed)
         end = time.time()
         runtime = end - start
-        res["copy_input_data_to_device[ms]"] = runtime
+        res["copy_input_data_to_device[ms]"] = runtime * 1000
 
         start = time.time()
         self.copy_output_data_from_device(self.obuf_packed)
         end = time.time()
         runtime = end - start
-        res["copy_output_data_from_device[ms]"] = runtime
+        res["copy_output_data_from_device[ms]"] = runtime * 1000
 
         start = time.time()
         obuf_folded = self.unpack_output(self.obuf_packed)
         end = time.time()
         runtime = end - start
-        res["unpack_output[ms]"] = runtime
+        res["unpack_output[ms]"] = runtime * 1000
 
         start = time.time()
         self.unfold_output(obuf_folded)
         end = time.time()
         runtime = end - start
-        res["unfold_output[ms]"] = runtime
+        res["unfold_output[ms]"] = runtime * 1000
         return res
diff --git a/src/finn/qnn-data/test_ext_weights/tfc-w1a1-extw.json b/src/finn/qnn-data/test_ext_weights/tfc-w1a1-extw.json
new file mode 100644
index 0000000000000000000000000000000000000000..299a8be815aeaba70c0f41e4b1b3252b77c6f042
--- /dev/null
+++ b/src/finn/qnn-data/test_ext_weights/tfc-w1a1-extw.json
@@ -0,0 +1,30 @@
+{
+    "Defaults": {},
+    "Thresholding_Batch_0": {
+      "PE": 49,
+      "ram_style": "distributed"
+    },
+    "StreamingFCLayer_Batch_0": {
+      "PE": 16,
+      "SIMD": 49,
+      "ram_style": "block"
+    },
+    "StreamingFCLayer_Batch_1": {
+      "PE": 8,
+      "SIMD": 8,
+      "mem_mode": "external"
+    },
+    "StreamingFCLayer_Batch_2": {
+      "PE": 8,
+      "SIMD": 8,
+      "mem_mode": "external"
+    },
+    "StreamingFCLayer_Batch_3": {
+      "PE": 10,
+      "SIMD": 8,
+      "ram_style": "distributed"
+    },
+    "LabelSelect_Batch_0": {
+      "PE": 1
+    }
+  }
diff --git a/src/finn/transformation/fpgadataflow/create_stitched_ip.py b/src/finn/transformation/fpgadataflow/create_stitched_ip.py
index dc9825fe16439dfb1d75fd95e9cb05232f237a75..92f5f22201396eb0df15e2411b88642568f491c7 100644
--- a/src/finn/transformation/fpgadataflow/create_stitched_ip.py
+++ b/src/finn/transformation/fpgadataflow/create_stitched_ip.py
@@ -29,17 +29,43 @@
 import os
 import warnings
 import subprocess
+import json
 
 from finn.transformation.base import Transformation
-from finn.util.basic import get_by_name, make_build_dir, is_finn_op
+from finn.util.basic import make_build_dir, get_num_default_workers
+from finn.util.fpgadataflow import is_fpgadataflow_node
 from finn.custom_op.registry import getCustomOp
-from finn.util.basic import get_num_default_workers
 import multiprocessing as mp
 from finn.transformation.fpgadataflow.replace_verilog_relpaths import (
     ReplaceVerilogRelPaths,
 )
 
 
+def is_external_input(model, node, i):
+    # indicate whether input i of node should be made external
+    # True only if input is unconnected and has no initializer
+    # Only esception is second input of FC layers when mem_mode is external
+    node_inst = getCustomOp(node)
+    producer = model.find_producer(node.input[i])
+    if producer is None:
+        if model.get_initializer(node.input[i]) is None:
+            return True
+        else:
+            if node.op_type == "StreamingFCLayer_Batch":
+                if node_inst.get_nodeattr("mem_mode") == "external":
+                    return True
+    return False
+
+
+def is_external_output(model, node, i):
+    # indicate whether output i of node should be made external
+    # True only if output is unconnected
+    consumers = model.find_consumers(node.output[i])
+    if consumers is None:
+        return True
+    return False
+
+
 class CreateStitchedIP(Transformation):
     """Create a Vivado IP Block Design project from all the generated IPs of a
     graph. All nodes in the graph must have the fpgadataflow backend attribute,
@@ -134,21 +160,24 @@ class CreateStitchedIP(Transformation):
         if len(aximm_intf_name) != 0:
             self.connect_cmds.append(
                 "make_bd_intf_pins_external [get_bd_intf_pins %s/%s]"
-                % (inst_name, aximm_intf_name[0])
+                % (inst_name, aximm_intf_name[0][0])
             )
             self.connect_cmds.append(
                 "set_property name m_axi_gmem0 [get_bd_intf_ports m_axi_gmem_0]"
             )
-            self.intf_names["aximm"] = ["m_axi_gmem0"]
+            self.intf_names["aximm"] = [("m_axi_gmem0", aximm_intf_name[0][1])]
             assert self.has_aximm is False, "Currently limited to one AXI-MM interface"
             self.has_aximm = True
 
-    def connect_m_axis_external(self, node):
+    def connect_m_axis_external(self, node, idx=None):
         inst_name = node.name
         node_inst = getCustomOp(node)
         output_intf_names = node_inst.get_verilog_top_module_intf_names()["m_axis"]
         # make output axis external
-        for output_intf_name in output_intf_names:
+        for i in range(len(output_intf_names)):
+            if idx is not None and idx != i:
+                continue
+            output_intf_name = output_intf_names[i][0]
             self.connect_cmds.append(
                 "make_bd_intf_pins_external [get_bd_intf_pins %s/%s]"
                 % (inst_name, output_intf_name)
@@ -158,15 +187,20 @@ class CreateStitchedIP(Transformation):
                 % (self.m_axis_idx, output_intf_name)
             )
             self.has_m_axis = True
-            self.intf_names["m_axis"].append("m_axis_%d" % self.m_axis_idx)
+            self.intf_names["m_axis"].append(
+                ("m_axis_%d" % self.m_axis_idx, output_intf_names[i][1])
+            )
             self.m_axis_idx += 1
 
-    def connect_s_axis_external(self, node):
+    def connect_s_axis_external(self, node, idx=None):
         inst_name = node.name
         node_inst = getCustomOp(node)
         input_intf_names = node_inst.get_verilog_top_module_intf_names()["s_axis"]
         # make input axis external
-        for input_intf_name in input_intf_names:
+        for i in range(len(input_intf_names)):
+            if idx is not None and idx != i:
+                continue
+            input_intf_name = input_intf_names[i][0]
             self.connect_cmds.append(
                 "make_bd_intf_pins_external [get_bd_intf_pins %s/%s]"
                 % (inst_name, input_intf_name)
@@ -176,7 +210,9 @@ class CreateStitchedIP(Transformation):
                 % (self.s_axis_idx, input_intf_name)
             )
             self.has_s_axis = True
-            self.intf_names["s_axis"].append("s_axis_%d" % self.s_axis_idx)
+            self.intf_names["s_axis"].append(
+                ("s_axis_%d" % self.s_axis_idx, input_intf_names[i][1])
+            )
             self.s_axis_idx += 1
 
     def apply(self, model):
@@ -187,70 +223,38 @@ class CreateStitchedIP(Transformation):
         ip_dirs.append("/workspace/finn/finn-rtllib/memstream")
         # ensure that all nodes are fpgadataflow, and that IPs are generated
         for node in model.graph.node:
-            assert is_finn_op(node.domain), "Found non-FINN node"
-            backend_attribute = get_by_name(node.attribute, "backend")
-            assert backend_attribute is not None, "Backend node attribute is not set."
-            backend_value = backend_attribute.s.decode("UTF-8")
-            assert (
-                backend_value == "fpgadataflow"
-            ), """Backend node attribute is not
-            set to "fpgadataflow"."""
+            assert is_fpgadataflow_node(
+                node
+            ), "All nodes must be FINN fpgadataflow nodes."
             node_inst = getCustomOp(node)
             ip_dir_value = node_inst.get_nodeattr("ip_path")
             assert os.path.isdir(ip_dir_value), "IP generation directory doesn't exist."
             ip_dirs += [ip_dir_value]
             self.create_cmds += node_inst.code_generation_ipi()
-            my_producer = model.find_producer(node.input[0])
             self.connect_clk_rst(node)
             self.connect_axi(node)
-            if my_producer is None:
-                # first node in graph
-                self.connect_s_axis_external(node)
-                if node.op_type == "TLastMarker":
-                    assert (
-                        node_inst.get_nodeattr("Direction") == "in"
-                    ), """Output TLastMarker incorrect direction"""
-                elif node.op_type == "IODMA" and len(model.graph.node) != 1:
-                    # don't apply this check for a 1-node partition
-                    assert (
-                        node_inst.get_nodeattr("direction") == "in"
-                    ), """Input DMA incorrect direction"""
-            else:
-                # intermediate node
-                # wire up input(s) to previous node output(s)
-                # foreach input
-                #     find producer
-                #     find index of producer output connected to our target input
-                #     get names of hdl interfaces for input and producer output
-                #     issue a TCL directive to connect input to output
-                #     if FC layer with mode "decoupled", add a streamer on input 1
-                for i in range(len(node.input)):
+            for i in range(len(node.input)):
+                if is_external_input(model, node, i):
+                    self.connect_s_axis_external(node, idx=i)
+                else:
                     producer = model.find_producer(node.input[i])
                     if producer is None:
                         continue
                     j = list(producer.output).index(node.input[i])
                     src_intf_name = getCustomOp(
                         producer
-                    ).get_verilog_top_module_intf_names()["m_axis"][j]
+                    ).get_verilog_top_module_intf_names()["m_axis"][j][0]
                     dst_intf_name = node_inst.get_verilog_top_module_intf_names()[
                         "s_axis"
-                    ][i]
+                    ][i][0]
                     self.connect_cmds.append(
                         "connect_bd_intf_net [get_bd_intf_pins %s/%s] "
                         "[get_bd_intf_pins %s/%s]"
                         % (producer.name, src_intf_name, node.name, dst_intf_name)
                     )
-            if model.find_consumers(node.output[0]) is None:
-                # last node in graph
-                self.connect_m_axis_external(node)
-                if node.op_type == "TLastMarker":
-                    assert (
-                        node_inst.get_nodeattr("Direction") == "out"
-                    ), """Output TLastMarker incorrect direction"""
-                elif node.op_type == "IODMA" and len(model.graph.node) != 1:
-                    assert (
-                        node_inst.get_nodeattr("direction") == "out"
-                    ), """Output DMA incorrect direction"""
+            for i in range(len(node.output)):
+                if is_external_output(model, node, i):
+                    self.connect_m_axis_external(node, idx=i)
 
         # create a temporary folder for the project
         prjname = "finn_vivado_stitch_proj"
@@ -316,7 +320,7 @@ class CreateStitchedIP(Transformation):
         block_library = "finn"
         block_vlnv = "%s:%s:%s:1.0" % (block_vendor, block_library, block_name)
         model.set_metadata_prop("vivado_stitch_vlnv", block_vlnv)
-        model.set_metadata_prop("vivado_stitch_ifnames", str(self.intf_names))
+        model.set_metadata_prop("vivado_stitch_ifnames", json.dumps(self.intf_names))
         tcl.append(
             (
                 "ipx::package_project -root_dir %s/ip -vendor %s "
diff --git a/src/finn/transformation/fpgadataflow/insert_dwc.py b/src/finn/transformation/fpgadataflow/insert_dwc.py
index 0f2b8ef6a4c0858cd98218538930c97c6df2ad9d..c8df80659d30e1855fc658bad83c3fe9bccb9bf9 100644
--- a/src/finn/transformation/fpgadataflow/insert_dwc.py
+++ b/src/finn/transformation/fpgadataflow/insert_dwc.py
@@ -44,8 +44,8 @@ class InsertDWC(Transformation):
         for n in graph.node:
             node_ind += 1
             if _suitable_node(n):
-                for n_output in n.output:
-                    consumers = model.find_consumers(n_output)
+                for output_name in n.output:
+                    consumers = model.find_consumers(output_name)
                     if consumers is None:
                         continue
                     if len(consumers) > 1:
@@ -59,7 +59,22 @@ class InsertDWC(Transformation):
                         n0 = getCustomOp(n)
                         n1 = getCustomOp(consumer)
                         n0_out_shape = n0.get_folded_output_shape()
-                        n1_in_shape = n1.get_folded_input_shape()
+
+                        # If FC and external mem, it could be connected to input 1
+                        if (
+                            consumer.op_type == "StreamingFCLayer_Batch"
+                            and n1.get_nodeattr("mem_mode") == "external"
+                        ):
+                            # get input idx
+                            in_idx = None
+                            for idx, n_input in enumerate(consumer.input):
+                                if output_name == n_input:
+                                    in_idx = idx
+                            assert in_idx is not None, "Malformed model"
+                            n1_in_shape = n1.get_folded_input_shape(in_idx)
+                        else:
+                            n1_in_shape = n1.get_folded_input_shape()
+
                         if n0_out_shape[-1] != n1_in_shape[-1]:
                             graph_modified = True
                             # determine dwc inwidth
@@ -82,7 +97,7 @@ class InsertDWC(Transformation):
 
                             dwc_node = oh.make_node(
                                 "StreamingDataWidthConverter_Batch",
-                                [n_output],
+                                [output_name],
                                 [dwc_output_tensor.name],
                                 domain="finn.custom_op.fpgadataflow",
                                 backend="fpgadataflow",
@@ -96,7 +111,7 @@ class InsertDWC(Transformation):
 
                             # set dwc output tensor as new input tensor of second node
                             for idx, inp in enumerate(consumer.input):
-                                if inp == n_output:
+                                if inp == output_name:
                                     consumer.input[idx] = dwc_output_tensor.name
 
         return (model, graph_modified)
diff --git a/src/finn/transformation/fpgadataflow/insert_iodma.py b/src/finn/transformation/fpgadataflow/insert_iodma.py
index 67143547557a9b24b311e69cff6f885f8745cd3c..27055a4fd29dba3849c0e4a889f27802f8c36081 100644
--- a/src/finn/transformation/fpgadataflow/insert_iodma.py
+++ b/src/finn/transformation/fpgadataflow/insert_iodma.py
@@ -33,7 +33,6 @@ from finn.util.basic import get_by_name
 from finn.custom_op.registry import getCustomOp
 from finn.transformation.base import Transformation
 from finn.transformation.general import SortGraph
-import finn.core.data_layout as DataLayout
 import math
 import numpy as np
 
@@ -48,6 +47,45 @@ class InsertIODMA(Transformation):
         ), "max_intfwidth must be a power of 2"
         self.max_intfwidth = max_intfwidth
 
+    def get_mem_init(self, weights, pe, simd):
+        """
+        Returns matrix ready for pack_innermost_dim_as_hex_string with
+        reverse=False (finn.util.data_packing) to return the memory init file
+        little endian packed.
+        That is, get_mem_init returns:
+        elem(pe,simd)
+        addr = 0: [(pe-1,simd-1),(pe-1,simd-2),...(0,1),(0,0)]
+        addr = 1: [(pe-1,simd*2-1),.......(0,simd+1),(0,simd)]
+        .
+        """
+
+        # TODO: refactor this into streamingfclayer_batch.py, could go into
+        # make_weight_file except it doesn't write a file but returns a npy
+        # array instead
+        w_shape = weights.shape
+        assert len(w_shape) == 2, "weights withincorrect number of dims"
+        inp_w, out_w = w_shape
+
+        assert out_w % pe == 0, "Malformed weight matrix"
+        assert inp_w % simd == 0, "Malformed weight matrix"
+        reshaped_w = np.zeros(inp_w * out_w).reshape(-1, pe * simd)
+
+        addr = 0
+        for fr in range(out_w // pe):
+            for fc in range(inp_w // simd):
+                w0_lower = fc * simd
+                w0_upper = (fc + 1) * simd
+                w1_lower = fr * pe
+                w1_upper = (fr + 1) * pe
+                tile = weights[w0_lower:w0_upper, w1_lower:w1_upper]
+                for p in range(pe):
+                    rw0_lower = p * simd
+                    rw0_upper = (p + 1) * simd
+                    reshaped_w[addr, rw0_lower:rw0_upper] = tile[:, p].transpose()
+                addr += 1
+        reshaped_w = np.flip(reshaped_w, axis=-1)
+        return reshaped_w
+
     def apply(self, model):
         # only makes sense for a pure fpgadataflow graph -- so we check!
         all_nodes = list(model.graph.node)
@@ -59,8 +97,7 @@ class InsertIODMA(Transformation):
         fc_extw_nodes = list(
             filter(
                 lambda x: x.op_type == "StreamingFCLayer_Batch"
-                and get_by_name(x.attribute, "mem_mode") is not None
-                and get_by_name(x.attribute, "mem_mode").s.decode("UTF-8") == "external"
+                and getCustomOp(x).get_nodeattr("mem_mode") == "external"
                 and model.find_producer(x.input[1]) is None,
                 all_nodes,
             )
@@ -78,11 +115,6 @@ class InsertIODMA(Transformation):
             return (model, False)
         else:
             if final_node.op_type != "IODMA":
-                # check if tensor is NHWC
-                assert (
-                    model.get_tensor_layout(graph_out_name) == DataLayout.NHWC
-                    or model.get_tensor_layout(graph_out_name) == DataLayout.NC
-                ), "Data layout of output tensor must be NHWC or NC"
                 out_shape = model.get_tensor_shape(graph_out_name)
                 out_dtype = model.get_tensor_datatype(graph_out_name)
                 final_node_inst = getCustomOp(final_node)
@@ -123,11 +155,6 @@ class InsertIODMA(Transformation):
                 )
                 model.graph.node.append(dma_node)
             if first_node.op_type != "IODMA":
-                # check if tensor is NHWC
-                assert (
-                    model.get_tensor_layout(graph_in_name) == DataLayout.NHWC
-                    or model.get_tensor_layout(graph_in_name) == DataLayout.NC
-                ), "Data layout of input tensor must be NHWC or NC"
                 in_shape = model.get_tensor_shape(graph_in_name)
                 in_dtype = model.get_tensor_datatype(graph_in_name)
                 first_node_inst = getCustomOp(first_node)
@@ -168,11 +195,7 @@ class InsertIODMA(Transformation):
                 )
                 model.graph.node.insert(0, dma_node)
             for fc_node in fc_extw_nodes:
-                # check if tensor is NHWC
-                assert (
-                    model.get_tensor_layout(fc_node.input[1]) == DataLayout.NHWC
-                    or model.get_tensor_layout(graph_in_name) == DataLayout.NC
-                ), "Data layout of tensors must be NHWC or NC"
+                fc_inst = getCustomOp(fc_node)
                 fc_w_name = fc_node.input[1]
                 w_shape = model.get_tensor_shape(fc_w_name)
                 w_dtype = model.get_tensor_datatype(fc_w_name)
@@ -185,21 +208,24 @@ class InsertIODMA(Transformation):
                 # calculate width of stream output from DMA
                 pe = get_by_name(fc_node.attribute, "PE").i
                 simd = get_by_name(fc_node.attribute, "SIMD").i
-                assert pe * simd == w_shape[0], "Malformed weight matrix"
-                streamWidth = simd * pe * w_dtype.bitwidth()
+                streamWidth = fc_inst.get_weightstream_width_padded()
                 # make new buffer
+                W = model.get_initializer(fc_w_name)
+                iodma_mem = self.get_mem_init(W, pe, simd)
+                model.set_initializer(fc_w_name, iodma_mem)
+
                 fc_node_in = oh.make_tensor_value_info(
-                    model.make_new_valueinfo_name(), TensorProto.FLOAT, w_shape
+                    model.make_new_valueinfo_name(), TensorProto.FLOAT, iodma_mem.shape
                 )
                 model.graph.value_info.append(fc_node_in)
                 model.set_tensor_datatype(fc_node_in.name, w_dtype)
-                model.set_initializer(fc_node_in.name, model.get_initializer(fc_w_name))
+                model.set_initializer(fc_node_in.name, W)
                 dma_node = oh.make_node(
                     "IODMA",
                     [fc_w_name],
                     [fc_node_in.name],
-                    numInputVectors=[w_shape[1]],
-                    NumChannels=w_shape[0],
+                    numInputVectors=[iodma_mem.shape[0]],
+                    NumChannels=pe * simd,
                     dataType=str(w_dtype.name),
                     intfWidth=intfwidth,
                     streamWidth=streamWidth,
diff --git a/src/finn/transformation/fpgadataflow/make_pynq_driver.py b/src/finn/transformation/fpgadataflow/make_pynq_driver.py
index 84dc01e536e96298ecb57e133610d800fcd2eb5c..6ab12548abbcbe00496101bd146b2c9b873204c8 100644
--- a/src/finn/transformation/fpgadataflow/make_pynq_driver.py
+++ b/src/finn/transformation/fpgadataflow/make_pynq_driver.py
@@ -37,12 +37,39 @@ import os
 import warnings
 import pkg_resources as pk
 from . import template_driver
+from finn.core.modelwrapper import ModelWrapper
+import numpy as np
+
+from finn.util.data_packing import (
+    pack_innermost_dim_as_hex_string,
+    hexstring2npbytearray,
+)
+from finn.util.basic import roundup_to_integer_multiple
+
+
+def to_external_tensor(init, w_dtype):
+    """Return an appropriately formatted and packed numpy byte array for given
+    external parameter tensor."""
+
+    weight_width = init.shape[1] * w_dtype.bitwidth()
+    weight_width_padded = roundup_to_integer_multiple(weight_width, 4)
+    hex_init = pack_innermost_dim_as_hex_string(
+        init, w_dtype, weight_width_padded, prefix="0x"
+    )
+    ext_weight = np.array([], dtype=np.uint8)
+    for line in hex_init:
+        array_line = [
+            x for x in reversed(hexstring2npbytearray(line, remove_prefix="0x"))
+        ]
+        ext_weight = np.append(ext_weight, array_line)
+
+    return ext_weight
 
 
 class MakePYNQDriver(Transformation):
     """Create PYNQ Python code to correctly interface the generated
     accelerator, including data packing/unpacking. Should be called
-    after conversion to HLS layers and folding, but prior to the creation of
+    after conversion to HLS layers, folding and the creation of
     dataflow partitions for correct operation.
 
     platform: one of ["zynq-iodma", "alveo"]
@@ -77,15 +104,38 @@ class MakePYNQDriver(Transformation):
         o_tensor_shape_normal = tuple(model.get_tensor_shape(o_tensor_name))
         i_tensor_dt = model.get_tensor_datatype(i_tensor_name)
         o_tensor_dt = model.get_tensor_datatype(o_tensor_name)
-        # folded shapes for i/o simply derived from regular tensor shapes
-        # this used to be extracted from first/last node folded shapes, but
-        # can't do this anymore due to IODMAs
-        i_tensor_shape_folded = list(i_tensor_shape_normal)
-        i_tensor_shape_folded.insert(-1, 1)
-        i_tensor_shape_folded = tuple(i_tensor_shape_folded)
-        o_tensor_shape_folded = list(o_tensor_shape_normal)
-        o_tensor_shape_folded.insert(-1, 1)
-        o_tensor_shape_folded = tuple(o_tensor_shape_folded)
+
+        first_node = model.find_consumer(i_tensor_name)
+        last_node = model.find_producer(o_tensor_name)
+        if first_node.op_type == "StreamingDataflowPartition":
+            # IODMAs and dataflow partitions have already been created
+            # extract folded i/o shapes from IODMA consumer/producer
+            first_df_model = ModelWrapper(getCustomOp(first_node).get_nodeattr("model"))
+            assert (
+                first_df_model.graph.node[0].op_type == "IODMA"
+            ), "First partition must hold input IODMA"
+            successors = model.find_direct_successors(first_node)
+            successor_sdp = getCustomOp(successors[0])
+            successor_df_model = ModelWrapper(successor_sdp.get_nodeattr("model"))
+            first_node = successor_df_model.find_consumer(
+                successor_df_model.graph.input[0].name
+            )
+
+            last_df_model = ModelWrapper(getCustomOp(last_node).get_nodeattr("model"))
+            assert (
+                last_df_model.graph.node[0].op_type == "IODMA"
+            ), "Last partition must hold output IODMA"
+            predecessors = model.find_direct_predecessors(last_node)
+            predecessor_sdp = getCustomOp(predecessors[0])
+            predecessor_df_model = ModelWrapper(predecessor_sdp.get_nodeattr("model"))
+            last_node = predecessor_df_model.find_producer(
+                predecessor_df_model.graph.output[0].name
+            )
+
+        # else: transformation called before IODMA/SDP creation (legacy flow)
+        # can access folded i/o shapes directly
+        i_tensor_shape_folded = tuple(getCustomOp(first_node).get_folded_input_shape())
+        o_tensor_shape_folded = tuple(getCustomOp(last_node).get_folded_output_shape())
 
         # generate dummy folded i/o tensors and their packed versions
         i_tensor_dummy_folded = gen_finn_dt_tensor(i_tensor_dt, i_tensor_shape_folded)
@@ -99,6 +149,35 @@ class MakePYNQDriver(Transformation):
         i_tensor_shape_packed = i_tensor_dummy_packed.shape
         o_tensor_shape_packed = o_tensor_dummy_packed.shape
 
+        # generate external weights npy files
+        weights_dir = pynq_driver_dir + "/runtime_weights"
+
+        os.makedirs(weights_dir)
+        idma_idx = 0
+        ext_weight_dma_cnt = 0
+
+        for node in model.graph.node:
+            assert (
+                node.op_type == "StreamingDataflowPartition"
+            ), "CreateDataflowPartition needs to be applied before driver generation"
+
+            producer = model.find_producer(node.input[0])
+            init_tensor = model.get_initializer(node.input[0])
+
+            if producer is None:  # input dma?
+                idma_name = "idma" + str(idma_idx)
+                if init_tensor is not None:  # input weights dma?
+                    ext_weight_dma_cnt += 1
+                    w_dtype = model.get_tensor_datatype(node.input[0])
+                    init_external_tensor = to_external_tensor(init_tensor, w_dtype)
+                    np.save(
+                        weights_dir + "/" + idma_name + ".npy", init_external_tensor
+                    )
+                else:
+                    net_input_name = idma_name
+
+                idma_idx += 1
+
         # fill in the driver template
         driver_py = pynq_driver_dir + "/driver.py"
         driver = template_driver.pynq_driver_template
@@ -122,6 +201,8 @@ class MakePYNQDriver(Transformation):
         driver = driver.replace("$OUTPUT_SHAPE_NORMAL$", mss(o_tensor_shape_normal))
         driver = driver.replace("$OUTPUT_SHAPE_FOLDED$", mss(o_tensor_shape_folded))
         driver = driver.replace("$OUTPUT_SHAPE_PACKED$", mss(o_tensor_shape_packed))
+        driver = driver.replace("$INPUT_DMA_NAME$", "'%s'" % net_input_name)
+        driver = driver.replace("$EXT_WEIGHT_NUM$", str(ext_weight_dma_cnt))
 
         with open(driver_py, "w") as f:
             f.write(driver)
@@ -148,25 +229,35 @@ class MakePYNQDriver(Transformation):
         shutil.copytree(dtp_root, pynq_driver_dir + "/finn/core")
 
         # generate weight files for runtime-writable layers
-        weights_dir = pynq_driver_dir + "/runtime_weights"
-        rt_layer_ind = 0
-        os.makedirs(weights_dir)
-        for node in model.graph.node:
-            if node.op_type in ["StreamingFCLayer_Batch", "Thresholding_Batch"]:
-                node_inst = getCustomOp(node)
-                is_rt_weights = node_inst.get_nodeattr("runtime_writeable_weights")
-                if is_rt_weights == 1:
-                    fcl_w = model.get_initializer(node.input[1])
-                    w_filename = weights_dir + "/%d_%s.dat" % (rt_layer_ind, node.name)
-                    node_inst.make_weight_file(fcl_w, "decoupled_runtime", w_filename)
-                    rt_layer_ind += 1
-            elif node.op_type == "StreamingDataflowPartition":
-                warnings.warn(
-                    """Please call MakePYNQDriver prior to
-                CreateDataflowPartition. Can only extract runtime-writable
-                weights from HLSCustomOp instances and not StreamingDataflowPartition.
-                """
-                )
-            else:
-                continue
+
+        for sdp_ind, sdp_node in enumerate(model.graph.node):
+            assert sdp_node.op_type == "StreamingDataflowPartition"
+            # get dataflow model
+            sdp_node = getCustomOp(sdp_node)
+            dataflow_model_filename = sdp_node.get_nodeattr("model")
+            dataflow_model = ModelWrapper(dataflow_model_filename)
+            rt_layer_ind = 0
+            for node in dataflow_model.graph.node:
+                if node.op_type in ["StreamingFCLayer_Batch", "Thresholding_Batch"]:
+                    node_inst = getCustomOp(node)
+                    is_rt_weights = node_inst.get_nodeattr("runtime_writeable_weights")
+                    if is_rt_weights == 1:
+                        fcl_w = dataflow_model.get_initializer(node.input[1])
+                        w_filename = weights_dir + "/%d_%d_%s.dat" % (
+                            sdp_ind,
+                            rt_layer_ind,
+                            node.name,
+                        )
+                        node_inst.make_weight_file(
+                            fcl_w, "decoupled_runtime", w_filename
+                        )
+                        rt_layer_ind += 1
+                elif node.op_type == "StreamingDataflowPartition":
+                    warnings.warn(
+                        """Nested StreamingDataflowPartition are not supported
+                    """
+                    )
+                else:
+                    continue
+
         return (model, False)
diff --git a/src/finn/transformation/fpgadataflow/make_zynq_proj.py b/src/finn/transformation/fpgadataflow/make_zynq_proj.py
index 82a11af2aea3ae182e79f6526aa650d31770c528..59df07e8e578ad0903b9742a5a9e5ad6ef288f91 100644
--- a/src/finn/transformation/fpgadataflow/make_zynq_proj.py
+++ b/src/finn/transformation/fpgadataflow/make_zynq_proj.py
@@ -173,6 +173,11 @@ class MakeZYNQProject(Transformation):
                     "[get_bd_intf_pins axi_interconnect_0/M%02d_AXI]"
                     % (instance_names[node.name], axilite_intf_name, axilite_idx)
                 )
+                # assign_bd_address with appropriate range/offset
+                config.append(
+                    "assign_axi_addr_proc %s/%s"
+                    % (instance_names[node.name], axilite_intf_name)
+                )
                 idma_idx += 1
                 aximm_idx += 1
                 axilite_idx += 1
@@ -188,6 +193,11 @@ class MakeZYNQProject(Transformation):
                         "[get_bd_intf_pins axi_interconnect_0/M%02d_AXI]"
                         % (instance_names[node.name], axilite_intf_name, axilite_idx)
                     )
+                    # assign_bd_address with appropriate range/offset
+                    config.append(
+                        "assign_axi_addr_proc %s/%s"
+                        % (instance_names[node.name], axilite_intf_name)
+                    )
                     axilite_idx += 1
 
             config.append(
@@ -282,7 +292,10 @@ class MakeZYNQProject(Transformation):
 
 
 class ZynqBuild(Transformation):
-    """Best-effort attempt at building the accelerator for Zynq."""
+    """Best-effort attempt at building the accelerator for Zynq.
+    It assumes the model has only fpgadataflow nodes
+
+    """
 
     def __init__(self, platform, period_ns, enable_debug=False):
         super().__init__()
@@ -296,7 +309,6 @@ class ZynqBuild(Transformation):
         model = model.transform(InferDataLayouts())
         # prepare at global level, then break up into kernels
         prep_transforms = [
-            MakePYNQDriver(platform="zynq-iodma"),
             InsertIODMA(64),
             InsertDWC(),
             Floorplan(),
@@ -331,6 +343,10 @@ class ZynqBuild(Transformation):
         model = model.transform(
             MakeZYNQProject(self.platform, enable_debug=self.enable_debug)
         )
+
         # set platform attribute for correct remote execution
         model.set_metadata_prop("platform", "zynq-iodma")
+
+        # create driver
+        model = model.transform(MakePYNQDriver(platform="zynq-iodma"))
         return (model, False)
diff --git a/src/finn/transformation/fpgadataflow/template_driver.py b/src/finn/transformation/fpgadataflow/template_driver.py
index b595205714d8cb630816d2b42fe96640e49e506e..5265835dd2530a5c93ceefbef629a43d6f33de52 100644
--- a/src/finn/transformation/fpgadataflow/template_driver.py
+++ b/src/finn/transformation/fpgadataflow/template_driver.py
@@ -77,7 +77,9 @@ io_shape_dict = {
     "ishape_folded" : $INPUT_SHAPE_FOLDED$,
     "oshape_folded" : $OUTPUT_SHAPE_FOLDED$,
     "ishape_packed" : $INPUT_SHAPE_PACKED$,
-    "oshape_packed" : $OUTPUT_SHAPE_PACKED$
+    "oshape_packed" : $OUTPUT_SHAPE_PACKED$,
+    "input_dma_name" : $INPUT_DMA_NAME$,
+    "number_of_external_weights": $EXT_WEIGHT_NUM$
 }
 
 if __name__ == "__main__":
diff --git a/src/finn/transformation/fpgadataflow/templates.py b/src/finn/transformation/fpgadataflow/templates.py
index 73beb62f06a6b625a992bd2a7401a91ed09789f3..9c0169a98f515d0b32e10bdfc834eca5fb681ffd 100644
--- a/src/finn/transformation/fpgadataflow/templates.py
+++ b/src/finn/transformation/fpgadataflow/templates.py
@@ -153,6 +153,7 @@ set_property -dict [list CONFIG.NUM_MI $NUM_AXILITE] [get_bd_cells axi_interconn
 
 #create reset controller and connect interconnects to PS
 if {$ZYNQ_TYPE == "zynq_us+"} {
+    set axi_peripheral_base 0xA0000000
     connect_bd_intf_net [get_bd_intf_pins smartconnect_0/M00_AXI] [get_bd_intf_pins zynq_ps/S_AXI_HP0_FPD]
     connect_bd_intf_net [get_bd_intf_pins zynq_ps/M_AXI_HPM0_FPD] -boundary_type upper [get_bd_intf_pins axi_interconnect_0/S00_AXI]
     #connect interconnect clocks and resets
@@ -160,6 +161,7 @@ if {$ZYNQ_TYPE == "zynq_us+"} {
     apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config { Clk {/zynq_ps/pl_clk0} Freq {} Ref_Clk0 {} Ref_Clk1 {} Ref_Clk2 {}}  [get_bd_pins axi_interconnect_0/S00_ACLK]
     apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config { Clk {/zynq_ps/pl_clk0} Freq {} Ref_Clk0 {} Ref_Clk1 {} Ref_Clk2 {}}  [get_bd_pins zynq_ps/saxihp0_fpd_aclk]
 } elseif {$ZYNQ_TYPE == "zynq_7000"} {
+    set axi_peripheral_base 0x40000000
     connect_bd_intf_net -boundary_type upper [get_bd_intf_pins zynq_ps/M_AXI_GP0] [get_bd_intf_pins axi_interconnect_0/S00_AXI]
     connect_bd_intf_net [get_bd_intf_pins smartconnect_0/M00_AXI] [get_bd_intf_pins zynq_ps/S_AXI_HP0]
     apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config { Clk {/zynq_ps/FCLK_CLK0} Freq {} Ref_Clk0 {} Ref_Clk1 {} Ref_Clk2 {}}  [get_bd_pins axi_interconnect_0/ACLK]
@@ -168,6 +170,21 @@ if {$ZYNQ_TYPE == "zynq_us+"} {
 }
 connect_bd_net [get_bd_pins axi_interconnect_0/ARESETN] [get_bd_pins smartconnect_0/aresetn]
 
+#procedure used by below IP instantiations to map BD address segments based on the axi interface aperture
+proc assign_axi_addr_proc {axi_intf_path} {
+    #global variable holds current base address
+    global axi_peripheral_base
+    #infer range
+    set range [expr 2**[get_property CONFIG.ADDR_WIDTH [get_bd_intf_pins $axi_intf_path]]]
+    set range [expr $range < 4096 ? 4096 : $range]
+    #align base address to range
+    set offset [expr ($axi_peripheral_base + ($range-1)) & ~($range-1)]
+    #perform assignment
+    assign_bd_address [get_bd_addr_segs $axi_intf_path/Reg] -offset $offset -range $range
+    #advance base address
+    set axi_peripheral_base [expr $offset + $range]
+}
+
 #custom IP instantiations/connections start here
 %s
 
diff --git a/src/finn/transformation/fpgadataflow/vitis_build.py b/src/finn/transformation/fpgadataflow/vitis_build.py
index e52fb14b158a7927311d1b7e90067fea4bde6e27..0fe4276096852c08d0798be8e1ee715cc5769286 100644
--- a/src/finn/transformation/fpgadataflow/vitis_build.py
+++ b/src/finn/transformation/fpgadataflow/vitis_build.py
@@ -28,6 +28,7 @@
 
 import os
 import subprocess
+import json
 
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.base import Transformation
@@ -38,14 +39,17 @@ from finn.transformation.fpgadataflow.create_dataflow_partition import (
 )
 from finn.transformation.fpgadataflow.insert_dwc import InsertDWC
 from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
-from finn.transformation.fpgadataflow.insert_tlastmarker import InsertTLastMarker
 from finn.transformation.fpgadataflow.insert_iodma import InsertIODMA
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
 from finn.transformation.fpgadataflow.floorplan import Floorplan
 from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver
-from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
+from finn.transformation.general import (
+    GiveReadableTensorNames,
+    GiveUniqueNodeNames,
+    RemoveUnusedTensors,
+)
 from finn.util.basic import make_build_dir
 from finn.transformation.infer_data_layouts import InferDataLayouts
 from . import templates
@@ -89,63 +93,47 @@ class CreateVitisXO(Transformation):
         _check_vitis_envvars()
         vivado_proj_dir = model.get_metadata_prop("vivado_stitch_proj")
         stitched_ip_dir = vivado_proj_dir + "/ip"
+        interfaces = json.loads(model.get_metadata_prop("vivado_stitch_ifnames"))
         args_string = []
-        m_axis_idx = 0
-        s_axis_idx = 0
+        arg_id = 0
         # NOTE: this assumes the graph is Vitis-compatible: max one axi lite interface
         # developed from instructions in UG1393 (v2019.2) and package_xo documentation
         # package_xo is responsible for generating the kernel xml
-        ifnames = eval(model.get_metadata_prop("vivado_stitch_ifnames"))
         assert (
-            len(ifnames["axilite"]) <= 1
+            len(interfaces["axilite"]) <= 1
         ), "CreateVitisXO supports max 1 AXI lite interface"
-        if len(ifnames["axilite"]) == 1:
-            axilite_intf_name = ifnames["axilite"][0]
-        else:
-            axilite_intf_name = None
-
-        for node in model.graph.node:
-            node_inst = getCustomOp(node)
-            arg_id = 0
-            if node.op_type == "TLastMarker":
-                stream_width = node_inst.get_nodeattr("StreamWidth")
-                # add a stream input or output port, based on direction
-                if node_inst.get_nodeattr("Direction") == "in":
-                    args_string.append(
-                        "{in:4:%s:s_axis_%d:0x0:0x0:ap_uint&lt;%s>:0}"
-                        % (str(arg_id), s_axis_idx, str(stream_width))
-                    )
-                    s_axis_idx += 1
-                else:
-                    args_string.append(
-                        "{out:4:%s:m_axis_%d:0x0:0x0:ap_uint&lt;%s>:0}"
-                        % (str(arg_id), m_axis_idx, str(stream_width))
+        axilite_intf_name = None
+        if len(interfaces["axilite"]) == 1:
+            axilite_intf_name = interfaces["axilite"][0]
+            if len(interfaces["aximm"]) > 0:
+                args_string.append(
+                    "{addr:1:%s:%s:0x8:0x10:ap_uint&lt;%s>*:0}"
+                    % (
+                        str(arg_id),
+                        interfaces["aximm"][0][0],
+                        str(interfaces["aximm"][0][1]),
                     )
-                    m_axis_idx += 1
+                )
                 arg_id += 1
-                # add a axilite port if dynamic
-                # add a count parameter if dynamic
-                if node_inst.get_nodeattr("DynIters") == 1:
-                    assert axilite_intf_name is not None
-                    args_string.append(
-                        "{numReps:0:%s:%s:0x4:0x10:uint:0}"
-                        % (str(arg_id), axilite_intf_name)
-                    )
-                    arg_id += 1
-            elif node.op_type == "IODMA":
-                port_width = node_inst.get_nodeattr("intfWidth")
-                # add an address parameter
-                # add a count parameter
                 args_string.append(
-                    "{addr:1:%s:m_axi_gmem0:0x8:0x10:ap_uint&lt;%s>*:0}"
-                    % (str(arg_id), str(port_width))
+                    "{numReps:0:%s:%s:0x4:0x1C:uint:0}" 
+                    % (str(arg_id), axilite_intf_name)
                 )
                 arg_id += 1
+            else:
                 args_string.append(
-                    "{numReps:0:%s:%s:0x4:0x1C:uint:0}"
+                    "{numReps:0:%s:%s:0x4:0x10:uint:0}"
                     % (str(arg_id), axilite_intf_name)
                 )
                 arg_id += 1
+        for intf in interfaces["s_axis"] + interfaces["m_axis"]:
+            stream_width = intf[1]
+            stream_name = intf[0]
+            args_string.append(
+                "{%s:4:%s:%s:0x0:0x0:ap_uint&lt;%s>:0}"
+                % (stream_name, str(arg_id), stream_name, str(stream_width))
+            )
+            arg_id += 1
 
         # save kernel xml then run package_xo
         xo_name = self.ip_name + ".xo"
@@ -342,6 +330,7 @@ class VitisLink(Transformation):
 
 class VitisBuild(Transformation):
     """Best-effort attempt at building the accelerator with Vitis.
+    It assumes the model has only fpgadataflow nodes
 
     fpga_part: string identifying the target FPGA
     period_ns: target clock period
@@ -377,7 +366,6 @@ class VitisBuild(Transformation):
         model = model.transform(InferDataLayouts())
         # prepare at global level, then break up into kernels
         prep_transforms = [
-            MakePYNQDriver(platform="alveo"),
             InsertIODMA(512),
             InsertDWC(),
         ]
@@ -399,9 +387,7 @@ class VitisBuild(Transformation):
             dataflow_model_filename = sdp_node.get_nodeattr("model")
             kernel_model = ModelWrapper(dataflow_model_filename)
             kernel_model = kernel_model.transform(InsertFIFO())
-            kernel_model = kernel_model.transform(
-                InsertTLastMarker(both=True, external=False, dynamic=False)
-            )
+            kernel_model = kernel_model.transform(RemoveUnusedTensors())
             kernel_model = kernel_model.transform(GiveUniqueNodeNames())
             kernel_model.save(dataflow_model_filename)
             kernel_model = kernel_model.transform(
@@ -430,4 +416,6 @@ class VitisBuild(Transformation):
         # set platform attribute for correct remote execution
         model.set_metadata_prop("platform", "alveo")
 
+        #create driver
+        model = model.transform(MakePYNQDriver(platform="alveo"))
         return (model, False)
diff --git a/src/finn/transformation/streamline/reorder.py b/src/finn/transformation/streamline/reorder.py
index b23f9f14909a5bd93ae24b34ef65304dafc7e0c1..7163a95c4dbbe5c8bcee4ebeea87c5e9611c179e 100644
--- a/src/finn/transformation/streamline/reorder.py
+++ b/src/finn/transformation/streamline/reorder.py
@@ -40,6 +40,7 @@ from finn.core.datatype import DataType
 from finn.core.onnx_exec import execute_node
 from finn.util.basic import get_by_name
 from finn.custom_op.registry import getCustomOp
+from finn.transformation.general import SortGraph
 
 
 class MoveAddPastMul(Transformation):
@@ -1039,3 +1040,77 @@ class MoveTransposePastScalarMul(Transformation):
             model = model.transform(InferDataLayouts())
             model = model.transform(InferShapes())
         return (model, graph_modified)
+
+
+class MoveIdenticalOpPastJoinOp(Transformation):
+    """
+    Move identical operations on different branches past the common join node.
+    This transformation assumes that the identical operations only change the
+    data layout. For linear operations, see the transformation MoveLinearPastEltwiseAdd.
+    Specifically, this transformation matches and transforms the following patterns:
+    f(x) + f(y) -> f(x + y)
+    where f(.) is currently only supporting 'Transpose', and an 'Add' node is
+    the join node.
+    """
+
+    def __init__(self, identical_op_list, join_node_list):
+        super().__init__()
+        self.ops_to_move = identical_op_list
+        self.join_node_op = join_node_list
+
+    def move_node(self, model, n, prod0, prod1):
+        # Found! move one of the identical_ops to output, remove the other one
+        identical_op0_in0 = prod0.input[0]
+        identical_op1_in0 = prod1.input[0]
+        add_in0 = n.input[0]
+        add_out = n.output[0]
+
+        # Rewire
+        n.input[0] = identical_op0_in0
+        n.input[1] = identical_op1_in0
+
+        # Output tensor of the join node must have the same shape as
+        # its input tensor (original shape is preserved)
+        new_shape = model.get_tensor_shape(identical_op0_in0)
+
+        # Set new tensor shape
+        model.set_tensor_shape(tensor_name=add_in0, tensor_shape=new_shape)
+
+        n.output[0] = add_in0
+        prod0.input[0] = add_in0
+        prod0.output[0] = add_out
+
+        model.graph.node.remove(prod1)
+
+    def apply(self, model):
+        graph = model.graph
+        graph_modified = False
+        for n in graph.node:
+            if n.op_type in self.join_node_op and model.is_join_node(n):
+                in0 = n.input[0]
+                in1 = n.input[1]
+                if in0 is None or in1 is None:
+                    continue
+
+                prod0 = model.find_producer(in0)
+                prod1 = model.find_producer(in1)
+                # Checks if the join node is preceded by
+                # two different, but identical operations
+                if prod0 == prod1:
+                    continue
+
+                identical_op = prod0.op_type == prod1.op_type
+
+                if identical_op and prod0.op_type in self.ops_to_move:
+                    self.move_node(model, n, prod0, prod1)
+                    graph_modified = True
+
+        if graph_modified:
+            model = model.transform(SortGraph(), make_deepcopy=False, cleanup=False)
+
+        return (model, graph_modified)
+
+
+class MoveTransposePastJoinAdd(MoveIdenticalOpPastJoinOp):
+    def __init__(self):
+        super().__init__(["Transpose"], ["Add"])
diff --git a/src/finn/util/visualization.py b/src/finn/util/visualization.py
index 3eb7e55e307c380ecc6712ff4d0c74577a9e7a43..d8547a32e06aa3b688601aa550abb2c50bcf77d6 100644
--- a/src/finn/util/visualization.py
+++ b/src/finn/util/visualization.py
@@ -27,6 +27,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import inspect
+import os
 import netron
 from IPython.display import IFrame
 
@@ -34,6 +35,8 @@ from IPython.display import IFrame
 def showSrc(what):
     print("".join(inspect.getsourcelines(what)[0]))
 
+
 def showInNetron(model_filename):
     netron.start(model_filename, address=("0.0.0.0", 8081))
-    return IFrame(src="http://0.0.0.0:8081/", width="100%", height=400)
\ No newline at end of file
+    localhost_url = os.getenv("LOCALHOST_URL", default="localhost")
+    return IFrame(src="http://%s:8081/" % localhost_url, width="100%", height=400)
diff --git a/tests/end2end/test_end2end_bnn_pynq.py b/tests/end2end/test_end2end_bnn_pynq.py
index 5f54eeacf6b68c019e37762bad9677264e6c234d..ddea2dafce02c181a279d9c95759b97dee00a504 100644
--- a/tests/end2end/test_end2end_bnn_pynq.py
+++ b/tests/end2end/test_end2end_bnn_pynq.py
@@ -140,7 +140,6 @@ def fold_tfc(model):
         fcl_inst.set_nodeattr("PE", pe)
         fcl_inst.set_nodeattr("SIMD", simd)
         fcl_inst.set_nodeattr("ram_style", ramstyle)
-        fcl_inst.set_nodeattr("runtime_writeable_weights", 1)
     # set parallelism for input quantizer to be same as first layer's SIMD
     inp_qnt_node = model.get_nodes_by_op_type("Thresholding_Batch")[0]
     inp_qnt = getCustomOp(inp_qnt_node)
@@ -164,6 +163,7 @@ def fold_lfc(model):
         fcl_inst.set_nodeattr("PE", pe)
         fcl_inst.set_nodeattr("SIMD", simd)
         fcl_inst.set_nodeattr("ram_style", ramstyle)
+        fcl_inst.set_nodeattr("runtime_writeable_weights", 1)
     # set parallelism for input quantizer to be same as first layer's SIMD
     inp_qnt_node = model.get_nodes_by_op_type("Thresholding_Batch")[0]
     inp_qnt = getCustomOp(inp_qnt_node)
diff --git a/tests/end2end/test_end2end_cybsec_mlp.py b/tests/end2end/test_end2end_cybsec_mlp.py
new file mode 100644
index 0000000000000000000000000000000000000000..eedbf97f389754440a116cf8755c25d597c433ee
--- /dev/null
+++ b/tests/end2end/test_end2end_cybsec_mlp.py
@@ -0,0 +1,246 @@
+# Copyright (c) 2021, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import torch
+from brevitas.nn import QuantLinear, QuantReLU
+import torch.nn as nn
+import numpy as np
+from brevitas.core.quant import QuantType
+from brevitas.nn import QuantIdentity
+import brevitas.onnx as bo
+from finn.core.modelwrapper import ModelWrapper
+from finn.core.datatype import DataType
+import finn.builder.build_dataflow as build
+import finn.builder.build_dataflow_config as build_cfg
+import os
+import shutil
+from finn.util.test import get_build_env, load_test_checkpoint_or_skip
+import pytest
+from finn.util.basic import make_build_dir
+import pkg_resources as pk
+import json
+import wget
+import subprocess
+
+target_clk_ns = 10
+build_kind = "zynq"
+build_dir = os.environ["FINN_BUILD_DIR"]
+
+
+def get_checkpoint_name(step):
+    if step == "build":
+        # checkpoint for build step is an entire dir
+        return build_dir + "/end2end_cybsecmlp_build"
+    else:
+        # other checkpoints are onnx files
+        return build_dir + "/end2end_cybsecmlp_%s.onnx" % (step)
+
+
+class CybSecMLPForExport(nn.Module):
+    def __init__(self, my_pretrained_model):
+        super(CybSecMLPForExport, self).__init__()
+        self.pretrained = my_pretrained_model
+        self.qnt_output = QuantIdentity(
+            quant_type=QuantType.BINARY, bit_width=1, min_val=-1.0, max_val=1.0
+        )
+
+    def forward(self, x):
+        # assume x contains bipolar {-1,1} elems
+        # shift from {-1,1} -> {0,1} since that is the
+        # input range for the trained network
+        x = (x + torch.tensor([1.0])) / 2.0
+        out_original = self.pretrained(x)
+        out_final = self.qnt_output(out_original)  # output as {-1,1}
+        return out_final
+
+
+def test_end2end_cybsec_mlp_export():
+    assets_dir = pk.resource_filename("finn.qnn-data", "cybsec-mlp/")
+    # load up trained net in Brevitas
+    input_size = 593
+    hidden1 = 64
+    hidden2 = 64
+    hidden3 = 64
+    weight_bit_width = 2
+    act_bit_width = 2
+    num_classes = 1
+    model = nn.Sequential(
+        QuantLinear(input_size, hidden1, bias=True, weight_bit_width=weight_bit_width),
+        nn.BatchNorm1d(hidden1),
+        nn.Dropout(0.5),
+        QuantReLU(bit_width=act_bit_width),
+        QuantLinear(hidden1, hidden2, bias=True, weight_bit_width=weight_bit_width),
+        nn.BatchNorm1d(hidden2),
+        nn.Dropout(0.5),
+        QuantReLU(bit_width=act_bit_width),
+        QuantLinear(hidden2, hidden3, bias=True, weight_bit_width=weight_bit_width),
+        nn.BatchNorm1d(hidden3),
+        nn.Dropout(0.5),
+        QuantReLU(bit_width=act_bit_width),
+        QuantLinear(hidden3, num_classes, bias=True, weight_bit_width=weight_bit_width),
+    )
+    trained_state_dict = torch.load(assets_dir + "/state_dict.pth")[
+        "models_state_dict"
+    ][0]
+    model.load_state_dict(trained_state_dict, strict=False)
+    W_orig = model[0].weight.data.detach().numpy()
+    # pad the second (593-sized) dimensions with 7 zeroes at the end
+    W_new = np.pad(W_orig, [(0, 0), (0, 7)])
+    model[0].weight.data = torch.from_numpy(W_new)
+    model_for_export = CybSecMLPForExport(model)
+    export_onnx_path = get_checkpoint_name("export")
+    input_shape = (1, 600)
+    bo.export_finn_onnx(model_for_export, input_shape, export_onnx_path)
+    assert os.path.isfile(export_onnx_path)
+    # fix input datatype
+    finn_model = ModelWrapper(export_onnx_path)
+    finnonnx_in_tensor_name = finn_model.graph.input[0].name
+    finn_model.set_tensor_datatype(finnonnx_in_tensor_name, DataType.BIPOLAR)
+    finn_model.save(export_onnx_path)
+    assert tuple(finn_model.get_tensor_shape(finnonnx_in_tensor_name)) == (1, 600)
+    assert len(finn_model.graph.node) == 30
+    assert finn_model.graph.node[0].op_type == "Add"
+    assert finn_model.graph.node[1].op_type == "Div"
+    assert finn_model.graph.node[2].op_type == "MatMul"
+    assert finn_model.graph.node[-1].op_type == "MultiThreshold"
+
+
+@pytest.mark.slow
+@pytest.mark.vivado
+def test_end2end_cybsec_mlp_build():
+    model_file = get_checkpoint_name("export")
+    load_test_checkpoint_or_skip(model_file)
+    build_env = get_build_env(build_kind, target_clk_ns)
+    output_dir = make_build_dir("test_end2end_cybsec_mlp_build")
+
+    cfg = build.DataflowBuildConfig(
+        output_dir=output_dir,
+        target_fps=1000000,
+        synth_clk_period_ns=target_clk_ns,
+        board=build_env["board"],
+        shell_flow_type=build_cfg.ShellFlowType.VIVADO_ZYNQ,
+        generate_outputs=[
+            build_cfg.DataflowOutputType.ESTIMATE_REPORTS,
+            build_cfg.DataflowOutputType.BITFILE,
+            build_cfg.DataflowOutputType.PYNQ_DRIVER,
+            build_cfg.DataflowOutputType.DEPLOYMENT_PACKAGE,
+        ],
+    )
+    build.build_dataflow_cfg(model_file, cfg)
+    # check the generated files
+    assert os.path.isfile(output_dir + "/time_per_step.json")
+    assert os.path.isfile(output_dir + "/final_hw_config.json")
+    assert os.path.isfile(output_dir + "/driver/driver.py")
+    est_cycles_report = output_dir + "/report/estimate_layer_cycles.json"
+    assert os.path.isfile(est_cycles_report)
+    est_res_report = output_dir + "/report/estimate_layer_resources.json"
+    assert os.path.isfile(est_res_report)
+    assert os.path.isfile(output_dir + "/report/estimate_network_performance.json")
+    assert os.path.isfile(output_dir + "/bitfile/finn-accel.bit")
+    assert os.path.isfile(output_dir + "/bitfile/finn-accel.hwh")
+    assert os.path.isfile(output_dir + "/report/post_synth_resources.xml")
+    assert os.path.isfile(output_dir + "/report/post_route_timing.rpt")
+    # examine the report contents
+    with open(est_cycles_report, "r") as f:
+        est_cycles_dict = json.load(f)
+        assert est_cycles_dict["StreamingFCLayer_Batch_0"] == 80
+        assert est_cycles_dict["StreamingFCLayer_Batch_1"] == 64
+    with open(est_res_report, "r") as f:
+        est_res_dict = json.load(f)
+        assert est_res_dict["total"]["LUT"] == 11360.0
+        assert est_res_dict["total"]["BRAM_18K"] == 36.0
+    shutil.copytree(output_dir + "/deploy", get_checkpoint_name("build"))
+
+
+def test_end2end_cybsec_mlp_run_on_hw():
+    build_env = get_build_env(build_kind, target_clk_ns)
+    assets_dir = pk.resource_filename("finn.qnn-data", "cybsec-mlp/")
+    deploy_dir = get_checkpoint_name("build")
+    if not os.path.isdir(deploy_dir):
+        pytest.skip(deploy_dir + " not found from previous test step, skipping")
+    driver_dir = deploy_dir + "/driver"
+    assert os.path.isdir(driver_dir)
+    # put all assets into driver dir
+    shutil.copy(assets_dir + "/validate-unsw-nb15.py", driver_dir)
+    # put a copy of binarized dataset into driver dir
+    dataset_url = (
+        "https://zenodo.org/record/4519767/files/unsw_nb15_binarized.npz?download=1"
+    )
+    dataset_local = driver_dir + "/unsw_nb15_binarized.npz"
+    if not os.path.isfile(dataset_local):
+        wget.download(dataset_url, out=dataset_local)
+    assert os.path.isfile(dataset_local)
+    # create a shell script for running validation: 10 batches x 10 imgs
+    with open(driver_dir + "/validate.sh", "w") as f:
+        f.write(
+            """#!/bin/bash
+cd %s/driver
+echo %s | sudo -S python3.6 validate-unsw-nb15.py --batchsize=10 --limit_batches=10
+        """
+            % (
+                build_env["target_dir"] + "/end2end_cybsecmlp_build",
+                build_env["password"],
+            )
+        )
+    # set up rsync command
+    remote_target = "%s@%s:%s" % (
+        build_env["username"],
+        build_env["ip"],
+        build_env["target_dir"],
+    )
+    rsync_res = subprocess.run(
+        [
+            "sshpass",
+            "-p",
+            build_env["password"],
+            "rsync",
+            "-avz",
+            deploy_dir,
+            remote_target,
+        ]
+    )
+    assert rsync_res.returncode == 0
+    remote_verif_cmd = [
+        "sshpass",
+        "-p",
+        build_env["password"],
+        "ssh",
+        "%s@%s" % (build_env["username"], build_env["ip"]),
+        "sh",
+        build_env["target_dir"] + "/end2end_cybsecmlp_build/driver/validate.sh",
+    ]
+    verif_res = subprocess.run(
+        remote_verif_cmd,
+        stdout=subprocess.PIPE,
+        universal_newlines=True,
+        input=build_env["password"],
+    )
+    assert verif_res.returncode == 0
+    log_output = verif_res.stdout.split("\n")
+    assert log_output[-3] == "batch 10 / 10 : total OK 93 NOK 7"
+    assert log_output[-2] == "Final accuracy: 93.000000"
diff --git a/tests/end2end/test_ext_weights.py b/tests/end2end/test_ext_weights.py
new file mode 100644
index 0000000000000000000000000000000000000000..0407395ed57dc07c6700efcebbb1fc8a767877bb
--- /dev/null
+++ b/tests/end2end/test_ext_weights.py
@@ -0,0 +1,157 @@
+# Copyright (c) 2021, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import finn.builder.build_dataflow as build
+import finn.builder.build_dataflow_config as build_cfg
+import os
+import shutil
+from finn.util.test import get_build_env, load_test_checkpoint_or_skip
+import pytest
+from finn.util.basic import make_build_dir
+import pkg_resources as pk
+import wget
+import subprocess
+
+target_clk_ns = 10
+build_kind = "zynq"
+build_dir = os.environ["FINN_BUILD_DIR"]
+onnx_zip_url = "https://github.com/Xilinx/finn-examples"
+onnx_zip_url += "/releases/download/v0.0.1a/onnx-models-bnn-pynq.zip"
+onnx_zip_local = build_dir + "/onnx-models-bnn-pynq.zip"
+onnx_dir_local = build_dir + "/onnx-models-bnn-pynq"
+
+
+def get_checkpoint_name(step):
+    if step == "build":
+        # checkpoint for build step is an entire dir
+        return build_dir + "/end2end_ext_weights_build"
+    elif step == "download":
+        return onnx_dir_local + "/tfc-w1a1.onnx"
+    else:
+        # other checkpoints are onnx files
+        return build_dir + "/end2end_ext_weights_%s.onnx" % (step)
+
+
+def test_end2end_ext_weights_download():
+    if not os.path.isfile(onnx_zip_local):
+        wget.download(onnx_zip_url, out=onnx_zip_local)
+    assert os.path.isfile(onnx_zip_local)
+    subprocess.check_output(["unzip", "-o", onnx_zip_local, "-d", onnx_dir_local])
+    assert os.path.isfile(get_checkpoint_name("download"))
+
+
+@pytest.mark.slow
+@pytest.mark.vivado
+def test_end2end_ext_weights_build():
+    model_file = get_checkpoint_name("download")
+    load_test_checkpoint_or_skip(model_file)
+    build_env = get_build_env(build_kind, target_clk_ns)
+    folding_config_file = pk.resource_filename(
+        "finn.qnn-data", "test_ext_weights/tfc-w1a1-extw.json"
+    )
+    output_dir = make_build_dir("test_end2end_ext_weights_build")
+    cfg = build.DataflowBuildConfig(
+        output_dir=output_dir,
+        folding_config_file=folding_config_file,
+        synth_clk_period_ns=target_clk_ns,
+        board=build_env["board"],
+        shell_flow_type=build_cfg.ShellFlowType.VIVADO_ZYNQ,
+        generate_outputs=[
+            build_cfg.DataflowOutputType.ESTIMATE_REPORTS,
+            build_cfg.DataflowOutputType.BITFILE,
+            build_cfg.DataflowOutputType.PYNQ_DRIVER,
+            build_cfg.DataflowOutputType.DEPLOYMENT_PACKAGE,
+        ],
+    )
+    build.build_dataflow_cfg(model_file, cfg)
+    assert os.path.isfile(output_dir + "/deploy/bitfile/finn-accel.bit")
+    assert os.path.isfile(output_dir + "/deploy/bitfile/finn-accel.hwh")
+    assert os.path.isfile(output_dir + "/deploy/driver/driver.py")
+    assert os.path.isfile(output_dir + "/deploy/driver/runtime_weights/idma0.npy")
+    if os.path.isdir(get_checkpoint_name("build")):
+        shutil.rmtree(get_checkpoint_name("build"))
+    shutil.copytree(output_dir + "/deploy", get_checkpoint_name("build"))
+
+
+def test_end2end_ext_weights_run_on_hw():
+    build_env = get_build_env(build_kind, target_clk_ns)
+    deploy_dir = get_checkpoint_name("build")
+    if not os.path.isdir(deploy_dir):
+        pytest.skip(deploy_dir + " not found from previous test step, skipping")
+    driver_dir = deploy_dir + "/driver"
+    assert os.path.isdir(driver_dir)
+    # create a shell script for running validation: 10 batches x 10 imgs
+    with open(driver_dir + "/validate.sh", "w") as f:
+        f.write(
+            """#!/bin/bash
+cd %s/driver
+echo %s | sudo -S python3.6 validate.py --dataset mnist --bitfile %s
+        """
+            % (
+                build_env["target_dir"] + "/end2end_ext_weights_build",
+                build_env["password"],
+                "../bitfile/finn-accel.bit",
+            )
+        )
+    # set up rsync command
+    remote_target = "%s@%s:%s" % (
+        build_env["username"],
+        build_env["ip"],
+        build_env["target_dir"],
+    )
+    rsync_res = subprocess.run(
+        [
+            "sshpass",
+            "-p",
+            build_env["password"],
+            "rsync",
+            "-avz",
+            deploy_dir,
+            remote_target,
+        ]
+    )
+    assert rsync_res.returncode == 0
+    remote_verif_cmd = [
+        "sshpass",
+        "-p",
+        build_env["password"],
+        "ssh",
+        "%s@%s" % (build_env["username"], build_env["ip"]),
+        "sh",
+        build_env["target_dir"] + "/end2end_ext_weights_build/driver/validate.sh",
+    ]
+    verif_res = subprocess.run(
+        remote_verif_cmd,
+        stdout=subprocess.PIPE,
+        universal_newlines=True,
+        input=build_env["password"],
+    )
+    assert verif_res.returncode == 0
+    log_output = verif_res.stdout.split("\n")
+    assert log_output[-3] == "batch 100 / 100 : total OK 9296 NOK 704"
+    assert log_output[-2] == "Final accuracy: 92.960000"
diff --git a/tests/transformation/streamline/test_move_identical_op_past_join_op.py b/tests/transformation/streamline/test_move_identical_op_past_join_op.py
new file mode 100644
index 0000000000000000000000000000000000000000..94eb52835b1800a839e5a9792e9cf1d7be1e681d
--- /dev/null
+++ b/tests/transformation/streamline/test_move_identical_op_past_join_op.py
@@ -0,0 +1,94 @@
+import pytest
+
+from onnx import helper as oh
+from onnx import TensorProto
+
+from finn.core.modelwrapper import ModelWrapper
+from finn.transformation.streamline.reorder import MoveTransposePastJoinAdd
+from finn.util.basic import gen_finn_dt_tensor
+import finn.core.onnx_exec as oxe
+
+
+def create_model(perm):
+    if perm == [0, 3, 1, 2]:
+        in_shape = [1, 128, 1, 256]
+        out_shape = [1, 256, 128, 1]
+    if perm == [0, 2, 3, 1]:
+        in_shape = [1, 256, 128, 1]
+        out_shape = [1, 128, 1, 256]
+
+    Transpose1_node = oh.make_node(
+        "Transpose", inputs=["in_transpose1"], outputs=["out_transpose1"], perm=perm
+    )
+
+    Transpose2_node = oh.make_node(
+        "Transpose", inputs=["in_transpose2"], outputs=["out_transpose2"], perm=perm
+    )
+
+    Join1_node = oh.make_node(
+        "Add", inputs=["out_transpose1", "out_transpose2"], outputs=["out_join1"]
+    )
+
+    in_transpose1 = oh.make_tensor_value_info(
+        "in_transpose1", TensorProto.FLOAT, in_shape
+    )
+    in_transpose2 = oh.make_tensor_value_info(
+        "in_transpose2", TensorProto.FLOAT, in_shape
+    )
+    out_transpose1 = oh.make_tensor_value_info(
+        "out_transpose1", TensorProto.FLOAT, out_shape
+    )
+    out_transpose2 = oh.make_tensor_value_info(
+        "out_transpose2", TensorProto.FLOAT, out_shape
+    )
+    out_join1 = oh.make_tensor_value_info("out_join1", TensorProto.FLOAT, out_shape)
+
+    graph = oh.make_graph(
+        nodes=[Transpose1_node, Transpose2_node, Join1_node],
+        name="test_graph",
+        inputs=[in_transpose1, in_transpose2],
+        outputs=[out_join1],
+        value_info=[
+            out_transpose1,
+            out_transpose2,
+        ],
+    )
+
+    onnx_model = oh.make_model(graph, producer_name="test_model")
+    model = ModelWrapper(onnx_model)
+
+    return model
+
+
+# Permutation of transpose node
+@pytest.mark.parametrize("perm", [[0, 3, 1, 2], [0, 2, 3, 1]])
+def test_move_identical_op_past_join_op(perm):
+    model = create_model(perm)
+
+    # Create input data
+    input0_tensor_name = model.graph.input[0].name
+    input1_tensor_name = model.graph.input[1].name
+
+    # Note: it is assumed that both tensors have the same shape and data type
+    input_shape = model.get_tensor_shape(input0_tensor_name)
+    input_dtype = model.get_tensor_datatype(input0_tensor_name)
+    input_val = gen_finn_dt_tensor(input_dtype, input_shape)
+    input_dict = {}
+    input_dict[input0_tensor_name] = input_val
+    input_dict[input1_tensor_name] = input_val
+
+    model_transformed = model.transform(MoveTransposePastJoinAdd())
+
+    assert oxe.compare_execution(model, model_transformed, input_dict)
+
+    # Check if order changed
+    node0_input0_model = model.find_consumers(model.graph.input[0].name)[0].op_type
+    node1_input1_model = model.find_consumers(model.graph.input[1].name)[0].op_type
+    node0_input0_model_transformed = model_transformed.find_consumers(
+        model_transformed.graph.input[0].name
+    )[0].op_type
+    node1_input1_model_transformed = model_transformed.find_consumers(
+        model_transformed.graph.input[1].name
+    )[0].op_type
+    assert node0_input0_model != node0_input0_model_transformed
+    assert node1_input1_model != node1_input1_model_transformed