diff --git a/.gitignore b/.gitignore
index c121bbd8e09a08d7f963ca4c5b0efd6f8faf1498..6e557f12a1828f7045b0b475db32f4092c89106c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -53,3 +53,4 @@ brevitas/
 brevitas_cnv_lfc/
 cnpy/
 finn-hlslib/
+pyverilator/
diff --git a/Dockerfile b/Dockerfile
index 3f33ae8d63c2d30f0b1aef2f2e933dcd153e9194..145424bbca9034ebbe798f7dd7fb6dfe2f9f5878 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -11,6 +11,8 @@ RUN apt update; apt install nano
 RUN pip install jupyter
 RUN pip install netron
 RUN pip install matplotlib
+RUN apt-get install -y build-essential libglib2.0-0 libsm6 libxext6 libxrender-dev
+RUN apt install verilator
 
 # Note that we expect the cloned finn directory on the host to be
 # mounted on /workspace/finn -- see run-docker.sh for an example
@@ -20,7 +22,7 @@ RUN pip install matplotlib
 ENV PYTHONPATH "${PYTHONPATH}:/workspace/finn/src"
 ENV PYTHONPATH "${PYTHONPATH}:/workspace/brevitas_cnv_lfc/training_scripts"
 ENV PYTHONPATH "${PYTHONPATH}:/workspace/brevitas"
-
+ENV PYTHONPATH "${PYTHONPATH}:/workspace/pyverilator"
 
 ARG GID
 ARG GNAME
@@ -37,4 +39,6 @@ RUN ln -s /workspace /home/$UNAME
 RUN chown -R $UNAME:$GNAME /home/$UNAME
 USER $UNAME
 
+RUN echo "source \$VIVADO_PATH/settings64.sh" >> /home/$UNAME/.bashrc
+
 WORKDIR /home/$UNAME/finn
diff --git a/notebooks/FINN-VerificationHLSCustomOp.ipynb b/notebooks/FINN-VerificationHLSCustomOp.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..f138ebf7e2727d807fa11153ff0b020267f47fd7
--- /dev/null
+++ b/notebooks/FINN-VerificationHLSCustomOp.ipynb
@@ -0,0 +1,576 @@
+{
+ "cells": [
+  {
+   "cell_type": "raw",
+   "metadata": {},
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# FINN - Verification of an HLSCustomOp node\n",
+    "-----------------------------------------------------------------\n",
+    "This notebook is about the verification flow and options for FINN custom operation nodes. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Outline\n",
+    "-------------\n",
+    "* Example model (sliding window function)\n",
+    "* c++ high level simulation\n",
+    "* Vivado IP synthesis and pyverilator execution flow"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Example model\n",
+    "To show the possibilities of how to verify a FINN HLSCustomOp node, an example model with the [sliding window function](https://finn-hlslib.readthedocs.io/en/latest/library/swg.html) of the finn-hlslib is used. For that a corresponding ONNX node is created. The ONNX node contains all the template parameters of the corresponding finn-hlslib function as attributes. The function is shown below."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "![title](im2col_finnhlslib.PNG)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In the next step the individual parameters are defined. At first the class 'DataType' is imported from FINN to be able to use data types like bipolar. With the member function `bitwidth()` the parameter `Input_precision` can be derived directly from this data type. The other parameters are set to reasonable values. The output dimension can be calculated using the input dimension, the kernel size and the value for stride."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from finn.core.datatype import DataType\n",
+    "idt = DataType.BIPOLAR          # input data type\n",
+    "ip = idt.bitwidth()             # input precision\n",
+    "k = 2                           # kernel size\n",
+    "ifm_dim = 4                     # input dimension\n",
+    "ifm_ch = 1                      # input channels\n",
+    "stride = 2                      # stride\n",
+    "simd = ifm_ch                   # simd\n",
+    "\n",
+    "# output dimension\n",
+    "ofm_dim = int(((ifm_dim - k) / stride) + 1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "An additional variable is defined to be able to infer the shape of the output tensor. Furthermore the output data type is set to the same value as the input data type."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "out_pix = ofm_dim * ofm_dim\n",
+    "odt = idt"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To create an ONNX node, first TensorProto and helper are imported from ONNX. These can be used to create tensors, nodes, graphs and models in ONNX. After importing, the input and output tensors can be created."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from onnx import TensorProto, helper\n",
+    "\n",
+    "inp = helper.make_tensor_value_info(\n",
+    "    \"inp\", TensorProto.FLOAT, [1, ifm_ch, ifm_dim, ifm_dim]\n",
+    ")\n",
+    "outp = helper.make_tensor_value_info(\n",
+    "    \"outp\", TensorProto.FLOAT, [1, out_pix, k * k * ifm_ch]\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now the node can be built. This node is directly integrated into a graph environment and from this the ONNX model is created. For more information about the creation and manipulation of an ONNX model, please refer to jupyter notebook [FINN-HowToWorkWithONNX](FINN-HowToWorkWithONNX.ipynb)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "SlidingWindow_node = helper.make_node(\n",
+    "        \"ConvolutionInputGenerator\",\n",
+    "        [\"inp\"],\n",
+    "        [\"outp\"],\n",
+    "        domain=\"finn\",\n",
+    "        backend=\"fpgadataflow\",\n",
+    "        ConvKernelDim=k,\n",
+    "        IFMChannels=ifm_ch,\n",
+    "        Input_precision=ip,\n",
+    "        IFMDim=ifm_dim,\n",
+    "        OFMDim=ofm_dim,\n",
+    "        SIMD=simd,\n",
+    "        Stride=stride,\n",
+    "        inputDataType=idt.name,\n",
+    "        outputDataType=odt.name,\n",
+    "    )\n",
+    "graph = helper.make_graph(\n",
+    "        nodes=[SlidingWindow_node],\n",
+    "        name=\"slidingwindow_graph\",\n",
+    "        inputs=[inp],\n",
+    "        outputs=[outp],\n",
+    "    )\n",
+    "\n",
+    "model = helper.make_model(graph, producer_name=\"slidingwindow-model\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "FINN provides a thin wrapper around the ONNX model with a lot of helper functions that can be used by importing the class `ModelWrapper`. More information about `ModelWrapper` can be found in Jupyter notebook [FINN-ModelWrapper](FINN-ModelWrapper.ipynb). Here it is used to assign FINN data types to the tensors."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from finn.core.modelwrapper import ModelWrapper\n",
+    "\n",
+    "model = ModelWrapper(model)\n",
+    "\n",
+    "model.set_tensor_datatype(\"inp\", idt)\n",
+    "model.set_tensor_datatype(\"outp\", odt)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "What the model looks like can be visualized with netron. Netron is a visualizer for neural network, deep learning and machine learning models. For this the model is first saved."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model.save(\"original_model.onnx\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Serving 'original_model.onnx' at http://0.0.0.0:8081\n"
+     ]
+    }
+   ],
+   "source": [
+    "import netron\n",
+    "netron.start('original_model.onnx', port=8081, host=\"0.0.0.0\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<iframe src=\"http://0.0.0.0:8081/\" style=\"position: relative; width: 100%;\" height=\"400\"></iframe>\n"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "%%html\n",
+    "<iframe src=\"http://0.0.0.0:8081/\" style=\"position: relative; width: 100%;\" height=\"400\"></iframe>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now that we have the model, we can use various features of FINN to manipulate it. The basic principle of FINN is that there are transformation and analysis passes that can be applied to a model. A transformation pass changes a given model and returns the changed model. An analysis pass traverses the graph structure and produces information about certain properties. It returns a dictionary of named properties.\n",
+    "\n",
+    "The following section describes the transformation passes that can be used to verify an HLSCustomOp node. Firstly the verification with a c++ high level simulation is shown and afterwards with a Vivado IP synthesis and pyverilator execution flow."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### c++ high level simulation\n",
+    "\n",
+    "First, an additional attribute must be set to specify which of the two verification types should be used when executing the node. This is done with the transformation pass `SetSimMode`, to which the desired mode is passed. After that the transformation pass `CodeGen_npysim` can be applied. With this transformation c++ code is generated and stored in a temporary directory. In addition, a further attribute is set, which contains the path to this directory."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from finn.transformation.fpgadataflow.set_sim_mode import SetSimMode\n",
+    "from finn.transformation.fpgadataflow.codegen_npysim import CodeGen_npysim\n",
+    "\n",
+    "model = model.transform(SetSimMode(\"npysim\"))\n",
+    "model = model.transform(CodeGen_npysim())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "If you now save the model again and display it, these changes can be seen."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Stopping http://0.0.0.0:8081\n",
+      "Serving 'modified_model.onnx' at http://0.0.0.0:8081\n"
+     ]
+    }
+   ],
+   "source": [
+    "model.save(\"modified_model.onnx\")\n",
+    "netron.start('modified_model.onnx', port=8081, host=\"0.0.0.0\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<iframe src=\"http://0.0.0.0:8081/\" style=\"position: relative; width: 100%;\" height=\"400\"></iframe>\n"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "%%html\n",
+    "<iframe src=\"http://0.0.0.0:8081/\" style=\"position: relative; width: 100%;\" height=\"400\"></iframe>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The next step is to create the executable from the .cpp file using the `Compile` transformation. The path to the executable is also stored in a new attribute."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from finn.transformation.fpgadataflow.compile import Compile\n",
+    "model = model.transform(Compile())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "All required files are now available and we can execute the node. This is done with the `execute_onnx` function, which gets the model and an input dictionary. That means we have to create an input tensor first. For this we use a numpy array."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[[[-1. -1.  1.  1.]\n",
+      "   [-1. -1. -1. -1.]\n",
+      "   [ 1. -1.  1. -1.]\n",
+      "   [ 1.  1.  1. -1.]]]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "x = np.asarray([-1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, -1], dtype=np.float32).reshape(1, ifm_ch, ifm_dim, ifm_dim)\n",
+    "print(x)\n",
+    "input_dict = {\"inp\": (x + 1) /2}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To be able to use `execute_onnx()` `onnx_exec` must be imported. Inside `execute_onnx()` the attribute `sim_mode` is read and if \"npysim\" is selected, the input array is saved in a .npy file and the previously created executable is executed. The output is saved in another .npy file and is read by `execute_onnx()` and saved as output."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[[-1. -1. -1. -1.]\n",
+      "  [ 1.  1. -1. -1.]\n",
+      "  [ 1. -1.  1.  1.]\n",
+      "  [ 1. -1.  1. -1.]]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "import finn.core.onnx_exec as oxe\n",
+    "y_npysim = oxe.execute_onnx(model, input_dict)[\"outp\"]\n",
+    "print(y_npysim)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "A different transformation flow can be used for verification. This will be discussed in the next section."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Vivado IP synthesis and pyverilator execution flow\n",
+    "\n",
+    "In this verification a .cpp code is generated from the node, which is synthesized to an IP block using Vivado. Afterwards the functionality can be simulated with [pyverilator](https://github.com/maltanar/pyverilator). Pyverilator is a tool which makes it possible to simulate verilog files using verilator via a python interface."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In the first step `sim_mode` must be set to \"rtlsim\" in order to select the corresponding functionality when executing the node. In addition, the nodes in the model are assigned unique names using the `GiveUniqueNodeNames()` transformation. Then the transformation `CodeGen_ipgen()` can be executed. Two arguments are passed to this transformation, one is an fpga part and the other is a value for the clock."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from finn.transformation.general import GiveUniqueNodeNames\n",
+    "from finn.transformation.fpgadataflow.codegen_ipgen import CodeGen_ipgen\n",
+    "model = model.transform(SetSimMode(\"rtlsim\"))\n",
+    "model = model.transform(GiveUniqueNodeNames())\n",
+    "model = model.transform(CodeGen_ipgen(\"xc7z020clg400-1\", 5))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "During the transformation a new attribute with the temporary directory is set, in which the .cpp and a .tcl script are stored, with which the synthesis can be started. This can be seen in the following using netron."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Stopping http://0.0.0.0:8081\n",
+      "Serving 'modified_model.onnx' at http://0.0.0.0:8081\n"
+     ]
+    }
+   ],
+   "source": [
+    "model.save(\"modified_model.onnx\")\n",
+    "netron.start('modified_model.onnx', port=8081, host=\"0.0.0.0\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<iframe src=\"http://0.0.0.0:8081/\" style=\"position: relative; width: 100%;\" height=\"400\"></iframe>\n"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "%%html\n",
+    "<iframe src=\"http://0.0.0.0:8081/\" style=\"position: relative; width: 100%;\" height=\"400\"></iframe>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The next step is to perform the synthesis using the `HLSSynth_IPGen()` transformation and set another attribute with the project directory, which contains the IP block. \n",
+    "\n",
+    "So that the execution can run without errors, two env variables must be set inside the jupyter notebook."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "env: PWD=/workspace/finn/notebooks\n",
+      "env: VIVADO_PATH=/home/maltanar/Xilinx/Vivado/2017.4\n"
+     ]
+    }
+   ],
+   "source": [
+    "# env variable has to be set because it is used inside the trafo\n",
+    "%env PWD=/workspace/finn/notebooks\n",
+    "%env VIVADO_PATH=/home/maltanar/Xilinx/Vivado/2017.4\n",
+    "\n",
+    "from finn.transformation.fpgadataflow.hlssynth_ipgen import HLSSynth_IPGen\n",
+    "model = model.transform(HLSSynth_IPGen())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now the execution can run again and pyverilator is used in the background to simulate the generated verilog files."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[[-1., -1., -1., -1.],\n",
+       "        [ 1.,  1., -1., -1.],\n",
+       "        [ 1., -1.,  1.,  1.],\n",
+       "        [ 1., -1.,  1., -1.]]], dtype=float32)"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "y_rtlsim = oxe.execute_onnx(model, input_dict)[\"outp\"]\n",
+    "y_rtlsim"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In the last step it can be checked whether the two results from the simulations match."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "assert (y_npysim == y_rtlsim).all()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebooks/im2col_finnhlslib.PNG b/notebooks/im2col_finnhlslib.PNG
new file mode 100755
index 0000000000000000000000000000000000000000..4df7c3041426576fe5b422aba345a52b3d3ab51c
Binary files /dev/null and b/notebooks/im2col_finnhlslib.PNG differ
diff --git a/run-docker.sh b/run-docker.sh
index c025ab2a2cf935916bf22d68d1c3f08fc58c30be..4a2f0cfcf884c8711d32c85ce5c52aa2927ef97a 100755
--- a/run-docker.sh
+++ b/run-docker.sh
@@ -21,25 +21,29 @@ BREVITAS_REPO=https://github.com/Xilinx/brevitas.git
 EXAMPLES_REPO=https://github.com/maltanar/brevitas_cnv_lfc.git
 CNPY_REPO=https://github.com/rogersce/cnpy.git
 FINN_HLS_REPO=https://github.com/Xilinx/finn-hlslib.git
+PYVERILATOR_REPO=https://github.com/maltanar/pyverilator
 
 BREVITAS_LOCAL=$SCRIPTPATH/brevitas
 EXAMPLES_LOCAL=$SCRIPTPATH/brevitas_cnv_lfc
 CNPY_LOCAL=$SCRIPTPATH/cnpy
 FINN_HLS_LOCAL=$SCRIPTPATH/finn-hlslib
-VIVADO_HLS_LOCAL=$VIVADO_PATH/include
+PYVERILATOR_LOCAL=$SCRIPTPATH/pyverilator
+VIVADO_HLS_LOCAL=$VIVADO_PATH
 
 # clone dependency repos
 git clone --branch feature/finn_onnx_export $BREVITAS_REPO $BREVITAS_LOCAL ||  git -C "$BREVITAS_LOCAL" pull
 git clone $EXAMPLES_REPO $EXAMPLES_LOCAL ||  git -C "$EXAMPLES_LOCAL" pull
 git clone $CNPY_REPO $CNPY_LOCAL ||  git -C "$CNPY_LOCAL" pull
 git clone $FINN_HLS_REPO $FINN_HLS_LOCAL; git -C "$FINN_HLS_LOCAL" checkout b5dc957a16017b8356a7010144b0a4e2f8cfd124 ||  git -C "$FINN_HLS_LOCAL" checkout b5dc957a16017b8356a7010144b0a4e2f8cfd124
+git clone $PYVERILATOR_REPO $PYVERILATOR_LOCAL ||  git -C "$PYVERILATOR_LOCAL" pull
 
 echo "Mounting $SCRIPTPATH into /workspace/finn"
 echo "Mounting $SCRIPTPATH/brevitas into /workspace/brevitas"
 echo "Mounting $SCRIPTPATH/brevitas_cnv_lfc into /workspace/brevitas_cnv_lfc"
 echo "Mounting $SCRIPTPATH/cnpy into /workspace/cnpy"
 echo "Mounting $SCRIPTPATH/finn-hlslib into /workspace/finn-hlslib"
-echo "Mounting $VIVADO_PATH/include into /workspace/vivado-hlslib"
+echo "Mounting $SCRIPTPATH/pyverilator into /workspace/pyverilator"
+echo "Mounting $VIVADO_PATH into $VIVADO_PATH"
 
 if [ "$1" = "test" ]; then
 	echo "Running test suite"
@@ -62,11 +66,14 @@ docker build --tag=$DOCKER_TAG \
              .
 # Launch container with current directory mounted
 docker run --rm --name finn_dev -it \
+-e "XILINX_VIVADO=$VIVADO_PATH" \
 -v $SCRIPTPATH:/workspace/finn \
 -v $SCRIPTPATH/brevitas:/workspace/brevitas \
 -v $SCRIPTPATH/brevitas_cnv_lfc:/workspace/brevitas_cnv_lfc \
 -v $SCRIPTPATH/cnpy:/workspace/cnpy \
 -v $SCRIPTPATH/finn-hlslib:/workspace/finn-hlslib \
--v $VIVADO_PATH/include:/workspace/vivado-hlslib \
+-v $SCRIPTPATH/pyverilator:/workspace/pyverilator \
+-v $VIVADO_PATH:$VIVADO_PATH \
+-e VIVADO_PATH=$VIVADO_PATH \
 -p 8888:8888 -p 8081:8081 \
 $DOCKER_TAG $DOCKER_CMD
diff --git a/src/finn/core/modelwrapper.py b/src/finn/core/modelwrapper.py
index cd2a2ec5952de072824ff680b0d1ecde0221aa38..483fc0bcf1f1029c50a18ace37bc8d855d55285b 100644
--- a/src/finn/core/modelwrapper.py
+++ b/src/finn/core/modelwrapper.py
@@ -285,6 +285,25 @@ class ModelWrapper:
                 fanout += 1
         return fanout
 
+    def get_metadata_prop(self, key):
+        """Returns the value associated with metadata_prop with given key,
+        or None otherwise."""
+        metadata_prop = util.get_by_name(self.model.metadata_props, key, "key")
+        if metadata_prop is None:
+            return None
+        else:
+            return metadata_prop.value
+
+    def set_metadata_prop(self, key, value):
+        metadata_prop = util.get_by_name(self.model.metadata_props, key, "key")
+        if metadata_prop is None:
+            metadata_prop = onnx.StringStringEntryProto()
+            metadata_prop.key = key
+            metadata_prop.value = value
+            self.model.metadata_props.append(metadata_prop)
+        else:
+            metadata_prop.value = value
+
     def set_attribute(self, node, attribute_name, value):
         """Sets a custom node attribute of given name with given value"""
         """Data types of attributes in onnx are encoded:
diff --git a/src/finn/core/utils.py b/src/finn/core/utils.py
index 0ea2392d903a7ea570b6972a33fab3945a051b43..7f85b244545232979638148e10ae07edc8795b3b 100644
--- a/src/finn/core/utils.py
+++ b/src/finn/core/utils.py
@@ -1,6 +1,7 @@
 import random
 import string
 import subprocess
+import os
 
 import numpy as np
 import onnx
@@ -248,3 +249,31 @@ class CppBuilder:
         bash_command = ["bash", self.compile_script]
         process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE)
         process_compile.communicate()
+
+class IPGenBuilder:
+    def __init__(self):
+        self.tcl_script = ""
+        self.ipgen_path = ""
+        self.code_gen_dir = ""
+        self.ipgen_script=""
+
+    def append_tcl(self, tcl_script):
+        self.tcl_script = tcl_script
+
+    def set_ipgen_path(self, path):
+        self.ipgen_path = path
+
+    def build(self, code_gen_dir):
+        self.code_gen_dir = code_gen_dir
+        self.ipgen_script = str(self.code_gen_dir) + "/ipgen.sh"
+        working_dir = os.environ["PWD"]
+        f = open(self.ipgen_script, "w")
+        f.write("#!/bin/bash \n")
+        f.write("cd {}\n".format(code_gen_dir))
+        f.write("$VIVADO_PATH/bin/vivado_hls {}\n".format(self.tcl_script))
+        f.write("cd {}\n".format(working_dir))
+        f.close()
+        bash_command = ["bash", self.ipgen_script]
+        process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE)
+        process_compile.communicate()
+        
diff --git a/src/finn/custom_op/fpgadataflow/__init__.py b/src/finn/custom_op/fpgadataflow/__init__.py
index 6a37519d372687fbd21fea2c326f55531b6f1eea..3f64b8940bf0e9ffdd03086735818c49d96bf1c2 100644
--- a/src/finn/custom_op/fpgadataflow/__init__.py
+++ b/src/finn/custom_op/fpgadataflow/__init__.py
@@ -3,54 +3,97 @@ import numpy as np
 import os
 import subprocess
 from finn.custom_op import CustomOp
-from finn.core.utils import CppBuilder
+from finn.core.utils import CppBuilder, IPGenBuilder
+import finn.custom_op.fpgadataflow.templates
+from pyverilator import PyVerilator
 
 
 class HLSCustomOp(CustomOp):
     def __init__(self, onnx_node):
         super().__init__(onnx_node)
-        # template for single node execution
-        self.docompute_template = """
-        #include "cnpy.h"
-        #include "npy2apintstream.hpp"
-        #include <vector>
-        #include "bnn-library.h"
 
-        // includes for network parameters
-        $GLOBALS$
+        self.code_gen_dict = {}
 
-        // defines for network parameters
-        $DEFINES$
+        # getting templates from templates.py
 
-        int main(){
+        # template for single node execution
+        self.docompute_template = templates.docompute_template
 
-        $STREAMDECLARATIONS$
+        # templates for single node ip generation
+        # cpp file
+        self.ipgen_template = templates.ipgen_template
+        # tcl script
+        self.ipgentcl_template = templates.ipgentcl_template
 
-        $READNPYDATA$
+    def get_nodeattr_types(self):
+        return {
+            "backend": ("s", True, "fpgadataflow"),
+            "code_gen_dir_npysim": ("s", False, ""),
+            "code_gen_dir_ipgen": ("s", False, ""),
+            "executable_path": ("s", False, ""),
+            "ipgen_path": ("s", False, ""),
+            "sim_mode": ("s", False, ""),
+        }
 
-        $DOCOMPUTE$
+    def code_generation_ipgen(self, model, fpgapart, clk):
+        node = self.onnx_node
 
-        $DATAOUTSTREAM$
+        # generate top cpp file for ip generation
+        path = self.get_nodeattr("code_gen_dir_ipgen")
+        self.generate_params(model, path)
+        self.global_includes()
+        self.defines("ipgen")
+        self.blackboxfunction()
+        self.pragmas()
+        self.docompute()
 
-        $SAVEASCNPY$
+        template = self.ipgen_template
 
-        }
+        for key in self.code_gen_dict:
+            # transform list into long string separated by '\n'
+            code_gen_line = "\n".join(self.code_gen_dict[key])
+            template = template.replace(key, code_gen_line)
+        code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen")
+        f = open(os.path.join(code_gen_dir, "top_{}.cpp".format(node.name)), "w")
+        f.write(template)
+        f.close()
+        self.code_gen_dict.clear()
 
-        """
-        self.code_gen_dict = {}
+        # generate tcl script for ip generation
+        self.code_gen_dict["$PROJECTNAME$"] = ["project_{}".format(node.name)]
+        self.code_gen_dict["$HWSRCDIR$"] = [code_gen_dir]
+        self.code_gen_dict["$FPGAPART$"] = [fpgapart]
+        self.code_gen_dict["$FINNHLSLIBDIR$"] = ["/workspace/finn-hlslib"]
+        self.code_gen_dict["$TOPFXN$"] = [node.name]
+        self.code_gen_dict["$CLKPERIOD$"] = [str(clk)]
 
-    def get_nodeattr_types(self):
-        return {
-            "backend": ("s", True, "fpgadataflow"),
-            "code_gen_dir": ("s", False, ""),
-            "executable_path": ("s", False, ""),
-        }
+        template = self.ipgentcl_template
 
-    def code_generation(self, model):
+        for key in self.code_gen_dict:
+            # transform list into long string separated by '\n'
+            code_gen_line = "\n".join(self.code_gen_dict[key])
+            template = template.replace(key, code_gen_line)
+        code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen")
+        f = open(os.path.join(code_gen_dir, "hls_syn_{}.tcl".format(node.name)), "w")
+        f.write(template)
+        f.close()
+        self.code_gen_dict.clear()
+
+    def ipgen_singlenode_code(self):
+        node = self.onnx_node
+        code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen")
+        builder = IPGenBuilder()
+        builder.append_tcl(code_gen_dir + "/hls_syn_{}.tcl".format(node.name))
+        builder.set_ipgen_path(code_gen_dir + "/project_{}".format(node.name))
+        builder.build(code_gen_dir)
+        self.set_nodeattr("ipgen_path", builder.ipgen_path)
+
+    def code_generation_npysim(self, model):
         node = self.onnx_node
-        self.generate_params(model)
+        path = self.get_nodeattr("code_gen_dir_npysim")
+        self.generate_params(model, path)
         self.global_includes()
-        self.defines()
+        self.defines("npysim")
         self.read_npy_data()
         self.strm_decl()
         self.docompute()
@@ -63,18 +106,21 @@ class HLSCustomOp(CustomOp):
             # transform list into long string separated by '\n'
             code_gen_line = "\n".join(self.code_gen_dict[key])
             template = template.replace(key, code_gen_line)
-        code_gen_dir = self.get_nodeattr("code_gen_dir")
+        code_gen_dir = self.get_nodeattr("code_gen_dir_npysim")
         f = open(os.path.join(code_gen_dir, "execute_{}.cpp".format(node.op_type)), "w")
         f.write(template)
         f.close()
+        self.code_gen_dict.clear()
 
     def compile_singlenode_code(self):
-        code_gen_dir = self.get_nodeattr("code_gen_dir")
+        code_gen_dir = self.get_nodeattr("code_gen_dir_npysim")
         builder = CppBuilder()
+        # to enable additional debug features please uncommand the next line
+        # builder.append_includes("-DDEBUG")
         builder.append_includes("-I/workspace/finn/src/finn/data/cpp")
         builder.append_includes("-I/workspace/cnpy/")
         builder.append_includes("-I/workspace/finn-hlslib")
-        builder.append_includes("-I/workspace/vivado-hlslib")
+        builder.append_includes("-I{}/include".format(os.environ["VIVADO_PATH"]))
         builder.append_includes("--std=c++11")
         builder.append_sources(code_gen_dir + "/*.cpp")
         builder.append_sources("/workspace/cnpy/cnpy.cpp")
@@ -85,11 +131,11 @@ class HLSCustomOp(CustomOp):
 
     def dynamic_input_to_npy(self, context, count):
         node = self.onnx_node
-        code_gen_dir = self.get_nodeattr("code_gen_dir")
+        code_gen_dir = self.get_nodeattr("code_gen_dir_npysim")
         if code_gen_dir == "":
             raise Exception(
                 """
-Found no codegen dir for this node, did you run the codegen transformation?
+Found no codegen dir for this node, did you run the codegen_npysim transformation?
             """
             )
         # create a npy file for each input of the node (in_ind is input index)
@@ -104,7 +150,7 @@ Found no codegen dir for this node, did you run the codegen transformation?
     def npy_to_dynamic_output(self, context):
         # TODO support multi-output nodes as needed
         node = self.onnx_node
-        code_gen_dir = self.get_nodeattr("code_gen_dir")
+        code_gen_dir = self.get_nodeattr("code_gen_dir_npysim")
         output = np.load("{}/output.npy".format(code_gen_dir))
         context[node.output[0]] = output
 
@@ -121,15 +167,74 @@ compilation transformations?
         process_execute = subprocess.Popen(executable_path, stdout=subprocess.PIPE)
         process_execute.communicate()
 
+    def reset_rtlsim(self, sim):
+        for i in range(10):
+            sim.io.ap_rst_n = 0
+            sim.io.ap_clk = 1
+            sim.io.ap_clk = 0
+            sim.io.ap_clk = 1
+            sim.io.ap_clk = 0
+            sim.io.ap_clk = 1
+            sim.io.ap_clk = 0
+            sim.io.ap_clk = 1
+            sim.io.ap_clk = 0
+            sim.io.ap_clk = 1
+            sim.io.ap_clk = 0
+            sim.io.ap_rst_n = 1
+
+    def toggle_clk(self, sim):
+        for i in range(10):
+            sim.io.ap_clk = 1
+            sim.io.ap_clk = 0
+
+    def rtlsim(self, sim, inp):
+        my_inputs = inp
+        print("My inputs before:" + str(my_inputs))
+        my_outputs = []
+        sim.io.out_V_V_TREADY = 1
+        for i in range(100):
+            sim.io.in0_V_V_TVALID = 1 if len(my_inputs) > 0 else 0
+            if sim.io.in0_V_V_TREADY == 1 and len(my_inputs) > 0:
+                print("ready to write input")
+                sim.io.in0_V_V_TDATA = my_inputs[0]
+                my_inputs = my_inputs[1:]
+                sim.io.ap_clk = 1
+                sim.io.ap_clk = 0
+                sim.io.in0_V_V_TVALID = 1 if len(my_inputs) > 0 else 0
+            if sim.io.out_V_V_TVALID == 1:
+                print("ready to pop result")
+                my_outputs = my_outputs + [sim.io.out_V_V_TDATA]
+                sim.io.ap_clk = 1
+                sim.io.ap_clk = 0
+            sim.io.ap_clk = 1
+            sim.io.ap_clk = 0
+            print("Iteration %d" % i)
+            print(sim.io)
+            print(my_inputs)
+            print(my_outputs)
+        return my_outputs
+
     def execute_node(self, context, graph):
-        # save input(s)
-        self.dynamic_input_to_npy(context, 1)
-        # execute the precompiled model
-        self.exec_precompiled_singlenode_model()
-        # load output npy file
-        self.npy_to_dynamic_output(context)
-
-    def generate_params(self, model):
+        mode = self.get_nodeattr("sim_mode")
+        if mode == "npysim":
+            # save input(s)
+            self.dynamic_input_to_npy(context, 1)
+            # execute the precompiled model
+            self.exec_precompiled_singlenode_model()
+            # load output npy file
+            self.npy_to_dynamic_output(context)
+        elif mode == "rtlsim":
+            pass
+
+        else:
+            raise Exception(
+                """Invalid value for attribute sim_mode! Is currently set to: {}
+            has to be set to one of the following value ("npysim", "rtlsim")""".format(
+                    mode
+                )
+            )
+
+    def generate_params(self, model, path):
         pass
 
     @abstractmethod
@@ -137,7 +242,7 @@ compilation transformations?
         pass
 
     @abstractmethod
-    def defines(self):
+    def defines(self, var):
         pass
 
     @abstractmethod
@@ -159,3 +264,11 @@ compilation transformations?
     @abstractmethod
     def save_as_npy(self):
         pass
+
+    @abstractmethod
+    def blackboxfunction(self):
+        pass
+
+    @abstractmethod
+    def pragmas(self):
+        pass
diff --git a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py
new file mode 100644
index 0000000000000000000000000000000000000000..91460fdac4f072b0954da1152fa578cd3338fd80
--- /dev/null
+++ b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py
@@ -0,0 +1,230 @@
+import os
+
+import numpy as np
+from pyverilator import PyVerilator
+
+from finn.core.datatype import DataType
+from finn.custom_op.fpgadataflow import HLSCustomOp
+
+
+class ConvolutionInputGenerator(HLSCustomOp):
+    def __init__(self, onnx_node):
+        super().__init__(onnx_node)
+
+    def get_nodeattr_types(self):
+        my_attrs = {
+            "ConvKernelDim": ("i", True, 0),
+            "IFMChannels": ("i", True, 0),
+            "Input_precision": ("i", True, 0),
+            "IFMDim": ("i", True, 0),
+            "OFMDim": ("i", True, 0),
+            "SIMD": ("i", True, 0),
+            "Stride": ("i", True, 0),
+            # FINN DataTypes for inputs, weights, outputs
+            "inputDataType": ("s", True, ""),
+            "outputDataType": ("s", True, ""),
+        }
+        my_attrs.update(super().get_nodeattr_types())
+        return my_attrs
+
+    def make_shape_compatible_op(self):
+        pass
+
+    def infer_node_datatype(self, model):
+        pass
+
+    def verify_node(self):
+        pass
+
+    def get_input_datatype(self):
+        return DataType[self.get_nodeattr("inputDataType")]
+
+    def get_output_datatype(self):
+        return DataType[self.get_nodeattr("outputDataType")]
+
+    def get_stream_width(self):
+        return self.get_nodeattr("SIMD") * self.get_nodeattr("Input_precision")
+
+    def execute_node(self, context, graph):
+        mode = self.get_nodeattr("sim_mode")
+        node = self.onnx_node
+        k = self.get_nodeattr("ConvKernelDim")
+        ifm_dim = self.get_nodeattr("IFMDim")
+        ifm_ch = self.get_nodeattr("IFMChannels")
+        ofm_dim = self.get_nodeattr("OFMDim")
+        out_pix = ofm_dim * ofm_dim
+
+        if mode == "npysim":
+            idt = self.get_input_datatype()
+            if idt == DataType.BIPOLAR:
+                # use binary for bipolar storage
+                idt = DataType.BINARY
+
+            # TODO ensure codegen dir exists
+            code_gen_dir = self.get_nodeattr("code_gen_dir_npysim")
+            # create a npy file for input of the node
+
+            inp = context[node.input[0]]
+            assert str(inp.dtype) == "float32"
+            assert inp.shape == (1, ifm_ch, ifm_dim, ifm_dim)
+            reshaped_inp = inp.transpose(0, 2, 3, 1)
+            np.save(os.path.join(code_gen_dir, "input_0.npy"), reshaped_inp)
+            # execute the precompiled model
+            super().exec_precompiled_singlenode_model()
+            # load output npy file
+            super().npy_to_dynamic_output(context)
+            if self.get_output_datatype() == DataType.BIPOLAR:
+                out = context[node.output[0]]
+                out = 2 * out - 1
+                context[node.output[0]] = out
+            assert context[node.output[0]].shape == (1, out_pix, k * k, ifm_ch)
+            # reshape output to have expected shape
+            context[node.output[0]] = context[node.output[0]].reshape(
+                1, out_pix, k * k * ifm_ch
+            )
+        elif mode == "rtlsim":
+            code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen")
+            # check if needed file exists
+            verilog_file = "{}/project_{}/sol1/impl/verilog/{}.v".format(
+                code_gen_dir, node.name, node.name
+            )
+            if os.path.isfile(verilog_file):
+                inp = context[node.input[0]]
+                print(inp)
+                inp = inp.flatten()
+                sim = PyVerilator.build(
+                    verilog_file,
+                    verilog_path=[
+                        "{}/project_{}/sol1/impl/verilog/".format(
+                            code_gen_dir, node.name
+                        )
+                    ],
+                )
+                super().reset_rtlsim(sim)
+                super().toggle_clk(sim)
+                output = self.rtlsim(sim, inp)
+                output = [int(x) for x in output]
+                # reshape output (Only valid for sliding window!)
+                output = np.asarray(output, dtype=np.float32).reshape(
+                    1, out_pix, k * k * ifm_ch
+                )
+                context[node.output[0]] = output
+                print(output)
+
+            else:
+                raise Exception(
+                    """Found no verilog files for this node,
+                    did you run the codegen_ipgen transformation?"""
+                )
+        else:
+            raise Exception(
+                """Invalid value for attribute sim_mode! Is currently set to: {}
+            has to be set to one of the following value ("npysim", "rtlsim")""".format(
+                    mode
+                )
+            )
+
+    def global_includes(self):
+        self.code_gen_dict["$GLOBALS$"] = ['#include "slidingwindow.h"']
+
+    def defines(self, var):
+        numReps = 1
+        self.code_gen_dict["$DEFINES$"] = [
+            """#define ConvKernelDim1 {}\n #define IFMChannels1 {}
+            #define Input_precision1 {}\n #define IFMDim1 {}\n #define OFMDim1 {}
+            #define SIMD1 {}\n #define Stride1 {}\n #define numReps {}""".format(
+                self.get_nodeattr("ConvKernelDim"),
+                self.get_nodeattr("IFMChannels"),
+                self.get_nodeattr("Input_precision"),
+                self.get_nodeattr("IFMDim"),
+                self.get_nodeattr("OFMDim"),
+                self.get_nodeattr("SIMD"),
+                self.get_nodeattr("Stride"),
+                numReps,
+            )
+        ]
+
+    def read_npy_data(self):
+        code_gen_dir = self.get_nodeattr("code_gen_dir_npysim")
+        dtype = self.get_input_datatype()
+        if dtype == DataType.BIPOLAR:
+            # use binary for bipolar storage
+            dtype = DataType.BINARY
+        elem_bits = dtype.bitwidth()
+        packed_bits = self.get_stream_width()
+        packed_hls_type = "ap_uint<%d>" % packed_bits
+        elem_hls_type = dtype.get_hls_datatype_str()
+        npy_type = "float"
+        npy_in = "%s/input_0.npy" % code_gen_dir
+        self.code_gen_dict["$READNPYDATA$"] = []
+        self.code_gen_dict["$READNPYDATA$"].append(
+            'npy2apintstream<%s, %s, %d, %s>("%s", in0);'
+            % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in)
+        )
+
+    def strm_decl(self):
+        self.code_gen_dict["$STREAMDECLARATIONS$"] = []
+        self.code_gen_dict["$STREAMDECLARATIONS$"].append(
+            'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_stream_width())
+        )
+        self.code_gen_dict["$STREAMDECLARATIONS$"].append(
+            'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_stream_width())
+        )
+
+    def docompute(self):
+        node = self.onnx_node
+        self.code_gen_dict["$DOCOMPUTE$"] = [
+            """{}<ConvKernelDim1, IFMChannels1, Input_precision1, IFMDim1,
+                OFMDim1, SIMD1, Stride1> (in0, out, numReps);""".format(
+                node.op_type,
+            )
+        ]
+
+    def dataoutstrm(self):
+        code_gen_dir = self.get_nodeattr("code_gen_dir_npysim")
+        dtype = self.get_output_datatype()
+        if dtype == DataType.BIPOLAR:
+            # use binary for bipolar storage
+            dtype = DataType.BINARY
+        elem_bits = dtype.bitwidth()
+        packed_bits = self.get_stream_width()
+        packed_hls_type = "ap_uint<%d>" % packed_bits
+        elem_hls_type = dtype.get_hls_datatype_str()
+        npy_type = "float"
+        npy_out = "%s/output.npy" % code_gen_dir
+        ofm_dim = self.get_nodeattr("OFMDim")
+        out_pix = ofm_dim * ofm_dim
+        k = self.get_nodeattr("ConvKernelDim")
+        ifm_ch = self.get_nodeattr("IFMChannels")
+        shape = (1, out_pix, k * k, ifm_ch)
+        shape_cpp_str = str(shape).replace("(", "{").replace(")", "}")
+
+        self.code_gen_dict["$DATAOUTSTREAM$"] = [
+            'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s");'
+            % (
+                packed_hls_type,
+                elem_hls_type,
+                elem_bits,
+                npy_type,
+                shape_cpp_str,
+                npy_out,
+            )
+        ]
+
+    def save_as_npy(self):
+        self.code_gen_dict["$SAVEASCNPY$"] = []
+
+    def blackboxfunction(self):
+        self.code_gen_dict["$BLACKBOXFUNCTION$"] = [
+            """void {}(hls::stream<ap_uint<SIMD1*Input_precision1>> &in0,
+                hls::stream<ap_uint<SIMD1*Input_precision1>> &out)""".format(
+                self.onnx_node.name
+            )
+        ]
+
+    def pragmas(self):
+        self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"]
+        self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out")
+        self.code_gen_dict["$PRAGMAS$"].append(
+            "#pragma HLS INTERFACE ap_ctrl_none port=return"
+        )
diff --git a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
index 975da666f0ba728f542b06865aaa2c66c5f07c07..8d3d063c7a35bb86c72f1fcc1886c1821dd10c6a 100644
--- a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
+++ b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
@@ -14,9 +14,6 @@ class StreamingFCLayer_Batch(HLSCustomOp):
 
     def get_nodeattr_types(self):
         my_attrs = {
-            # "backend": ("s", True, "fpgadataflow"),
-            # "code_gen_dir": ("s", True, ""),
-            # "executable_path": ("s", True, ""),
             "PE": ("i", True, 0),
             "SIMD": ("i", True, 0),
             "MW": ("i", True, 0),
@@ -91,7 +88,7 @@ class StreamingFCLayer_Batch(HLSCustomOp):
 
         # verify that all necessary attributes exist
         try:
-            self.get_nodeattr("code_gen_dir")
+            self.get_nodeattr("code_gen_dir_npysim")
             self.get_nodeattr("executable_path")
             self.get_nodeattr("resType")
             self.get_nodeattr("MW")
@@ -109,7 +106,7 @@ class StreamingFCLayer_Batch(HLSCustomOp):
             info_messages.append(
                 """The necessary attributes do not exist.
                 StreamingFCLayer_Batch needs the following attributes:
-                code_gen_dir, executable_path, resType, MW, MH, SIMD, PE,
+                code_gen_dir_npysim, executable_path, resType, MW, MH, SIMD, PE,
                 inputDataType, weightDataType, outputDataType, ActVal,
                 binaryXnorMode, noActivation"""
             )
@@ -269,7 +266,8 @@ class StreamingFCLayer_Batch(HLSCustomOp):
         assert ret.shape[2] == n_thres_steps
         return ret.reshape(1, pe, tmem, n_thres_steps)
 
-    def generate_params(self, model):
+    def generate_params(self, model, path):
+        code_gen_dir = path
         # weights
         weights = model.get_initializer(self.onnx_node.input[1])
         # convert weights into hlslib-compatible format
@@ -283,7 +281,7 @@ class StreamingFCLayer_Batch(HLSCustomOp):
             weight_tensor, export_wdt, "weights", True, True
         )
         # write weights into params.h
-        code_gen_dir = self.get_nodeattr("code_gen_dir")
+        # code_gen_dir = self.get_nodeattr("code_gen_dir_npysim")
         f_weights = open("{}/params.h".format(code_gen_dir), "w")
 
         if export_wdt.bitwidth() != 1:
@@ -303,6 +301,7 @@ class StreamingFCLayer_Batch(HLSCustomOp):
             )
         f_weights.write(weight_hls_code)
         f_weights.close()
+
         # thresholds
         if len(self.onnx_node.input) > 2:
             thresholds = model.get_initializer(self.onnx_node.input[2])
@@ -324,7 +323,7 @@ class StreamingFCLayer_Batch(HLSCustomOp):
                     threshold_tensor, tdt, "thresholds", False, True
                 )
                 # write thresholds into thresh.h
-                code_gen_dir = self.get_nodeattr("code_gen_dir")
+                # code_gen_dir = self.get_nodeattr("code_gen_dir_npysim")
                 f_thresh = open("{}/thresh.h".format(code_gen_dir), "w")
                 tdt_hls = tdt.get_hls_datatype_str()
                 # use binary to export bipolar activations
@@ -357,7 +356,7 @@ class StreamingFCLayer_Batch(HLSCustomOp):
         nf = mh // pe
 
         # TODO ensure codegen dir exists
-        code_gen_dir = self.get_nodeattr("code_gen_dir")
+        code_gen_dir = self.get_nodeattr("code_gen_dir_npysim")
         # create a npy file fore each input of the node (in_ind is input index)
         in_ind = 0
         for inputs in node.input:
@@ -402,7 +401,7 @@ class StreamingFCLayer_Batch(HLSCustomOp):
             # TODO find a better way of checking for no pregenerated thresholds
             self.code_gen_dict["$GLOBALS$"] += ['#include "thresh.h"']
 
-    def defines(self):
+    def defines(self, var):
         numReps = 1
         self.code_gen_dict["$DEFINES$"] = [
             """#define MW1 {}\n #define MH1 {}\n #define SIMD1 {}\n
@@ -417,9 +416,12 @@ class StreamingFCLayer_Batch(HLSCustomOp):
                 numReps,
             )
         ]
+        if var == "ipgen":
+            self.code_gen_dict["$DEFINES$"].append("#define PRAGMA_SUB(x) _Pragma (#x)")
+            self.code_gen_dict["$DEFINES$"].append("#define DO_PRAGMA(x) PRAGMA_SUB(x)")
 
     def read_npy_data(self):
-        code_gen_dir = self.get_nodeattr("code_gen_dir")
+        code_gen_dir = self.get_nodeattr("code_gen_dir_npysim")
         dtype = self.get_input_datatype()
         if dtype == DataType.BIPOLAR:
             # use binary for bipolar storage
@@ -466,7 +468,7 @@ class StreamingFCLayer_Batch(HLSCustomOp):
         ]
 
     def dataoutstrm(self):
-        code_gen_dir = self.get_nodeattr("code_gen_dir")
+        code_gen_dir = self.get_nodeattr("code_gen_dir_npysim")
         dtype = self.get_output_datatype()
         if dtype == DataType.BIPOLAR:
             # use binary for bipolar storage
@@ -495,3 +497,36 @@ class StreamingFCLayer_Batch(HLSCustomOp):
 
     def save_as_npy(self):
         self.code_gen_dict["$SAVEASCNPY$"] = []
+
+    def blackboxfunction(self):
+        self.code_gen_dict["$BLACKBOXFUNCTION$"] = [
+            """void {}(hls::stream<ap_uint<{}>> &in0,
+                hls::stream<ap_uint<{}>> &out
+                )""".format(
+                self.onnx_node.name,
+                self.get_instream_width(),
+                self.get_outstream_width(),
+            )
+        ]
+
+    def pragmas(self):
+        self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"]
+        self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out")
+        self.code_gen_dict["$PRAGMAS$"].append(
+            "#pragma HLS INTERFACE ap_ctrl_none port=return"
+        )
+        self.code_gen_dict["$PRAGMAS$"].append(
+            "DO_PRAGMA(HLS ARRAY_PARTITION variable=weights complete dim=1)"
+        )
+
+        self.code_gen_dict["$PRAGMAS$"].append(
+            "DO_PRAGMA(HLS ARRAY_PARTITION variable=weights complete dim=2)"
+        )
+        if self.calc_tmem() != 0:
+            # TODO find a better way of checking for no pregenerated thresholds
+            self.code_gen_dict["$PRAGMAS$"].append(
+                "DO_PRAGMA(HLS ARRAY_PARTITION variable=threshs complete dim=1)"
+            )
+            self.code_gen_dict["$PRAGMAS$"].append(
+                "DO_PRAGMA(HLS ARRAY_PARTITION variable=threshs complete dim=3)"
+            )
diff --git a/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py b/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py
index 92f499b6771efe4455e7259a8eb62ab9c636cb1f..a316695b3d8691ab66cdef7a87093d7a777ef7ff 100644
--- a/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py
+++ b/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py
@@ -4,9 +4,6 @@ from finn.custom_op.fpgadataflow import HLSCustomOp
 class StreamingMaxPool_Batch(HLSCustomOp):
     def get_nodeattr_types(self):
         my_attrs = {
-            # "backend": ("s", True, "fpgadataflow"),
-            # "code_gen_dir": ("s", True, ""),
-            # "executable_path": ("s", True, ""),
             "ImgDim": ("i", True, 0),
             "PoolDim": ("i", True, 0),
             "NumChannels": ("i", True, 0),
@@ -51,7 +48,7 @@ class StreamingMaxPool_Batch(HLSCustomOp):
 
         # verify that all necessary attributes exist
         try:
-            self.get_nodeattr("code_gen_dir")
+            self.get_nodeattr("code_gen_dir_npysim")
             self.get_nodeattr("executable_path")
             self.get_nodeattr("ImgDim")
             self.get_nodeattr("PoolDim")
@@ -61,7 +58,7 @@ class StreamingMaxPool_Batch(HLSCustomOp):
             info_messages.append(
                 """The necessary attributes do not exist.
                 StreamingMaxPool_Batch  needs the following attributes:
-                code_gen_dir, executable_path, ImgDim, PoolDim, NumChannels"""
+                code_gen_dir_npysim, executable_path, ImgDim, PoolDim, NumChannels"""
             )
 
         # verify the number of inputs
@@ -75,7 +72,7 @@ class StreamingMaxPool_Batch(HLSCustomOp):
     def global_includes(self):
         self.code_gen_dict["$GLOBALS$"] = ['#include "maxpool.h"']
 
-    def defines(self):
+    def defines(self, var):
         numReps = 2
         self.code_gen_dict["$DEFINES$"] = [
             """#define ImgDim {}\n #define PoolDim {}\n
@@ -89,7 +86,7 @@ class StreamingMaxPool_Batch(HLSCustomOp):
 
     def read_npy_data(self):
         node = self.onnx_node
-        code_gen_dir = self.get_nodeattr("code_gen_dir")
+        code_gen_dir = self.get_nodeattr("code_gen_dir_npysim")
         # c++ code to read out an npy file
         # and put it in hls::stream in the correct order
         self.code_gen_dict["$READNPYDATA$"] = []
@@ -188,7 +185,7 @@ class StreamingMaxPool_Batch(HLSCustomOp):
         self.code_gen_dict["$DATAOUTSTREAM$"].append("}")
 
     def save_as_npy(self):
-        code_gen_dir = self.get_nodeattr("code_gen_dir")
+        code_gen_dir = self.get_nodeattr("code_gen_dir_npysim")
         numReps = 1
         self.code_gen_dict["$SAVEASCNPY$"] = [
             """cnpy::npy_save("{}/output.npy",&output_data_vector[0],
@@ -200,3 +197,9 @@ class StreamingMaxPool_Batch(HLSCustomOp):
                 int(self.get_nodeattr("ImgDim") / self.get_nodeattr("PoolDim")),
             )
         ]
+
+    def blackboxfunction(self):
+        pass
+
+    def pragmas(self):
+        pass
diff --git a/src/finn/custom_op/fpgadataflow/templates.py b/src/finn/custom_op/fpgadataflow/templates.py
new file mode 100644
index 0000000000000000000000000000000000000000..07a356e5ee7e10a6b51859cb7fb2c4bdb5deeda7
--- /dev/null
+++ b/src/finn/custom_op/fpgadataflow/templates.py
@@ -0,0 +1,73 @@
+# template for single node execution
+docompute_template = """
+#include "cnpy.h"
+#include "npy2apintstream.hpp"
+#include <vector>
+#include "bnn-library.h"
+// includes for network parameters
+$GLOBALS$
+
+// defines for network parameters
+$DEFINES$
+
+int main(){
+
+$STREAMDECLARATIONS$
+
+$READNPYDATA$
+
+$DOCOMPUTE$
+
+$DATAOUTSTREAM$
+
+$SAVEASCNPY$
+
+}
+
+"""
+
+# templates for single node ip generation
+
+# cpp file
+ipgen_template = """
+#include "bnn-library.h"
+// includes for network parameters
+$GLOBALS$
+
+// defines for network parameters
+$DEFINES$
+
+$BLACKBOXFUNCTION$
+{
+$PRAGMAS$
+$DOCOMPUTE$
+}
+"""
+
+# tcl script
+ipgentcl_template = """
+set config_proj_name $PROJECTNAME$
+puts "HLS project: $config_proj_name"
+set config_hwsrcdir "$HWSRCDIR$"
+puts "HW source dir: $config_hwsrcdir"
+set config_proj_part "$FPGAPART$"
+
+set config_bnnlibdir "$FINNHLSLIBDIR$"
+
+set config_toplevelfxn "$TOPFXN$"
+set config_clkperiod $CLKPERIOD$
+
+open_project $config_proj_name
+add_files $config_hwsrcdir/top_$TOPFXN$.cpp -cflags "-std=c++0x -I$config_bnnlibdir"
+
+set_top $config_toplevelfxn
+open_solution sol1
+set_part $config_proj_part
+
+config_interface -m_axi_addr64
+
+create_clock -period $config_clkperiod -name default
+csynth_design
+export_design -format ip_catalog
+exit 0
+"""
diff --git a/src/finn/custom_op/registry.py b/src/finn/custom_op/registry.py
index 4b8f4d6494a829b6da83943cea71f663b8ad651a..fa4b0ed94922a93b0dd1e349cb4655de5a110838 100644
--- a/src/finn/custom_op/registry.py
+++ b/src/finn/custom_op/registry.py
@@ -1,5 +1,8 @@
 # make sure new CustomOp subclasses are imported here so that they get
 # registered and plug in correctly into the infrastructure
+from finn.custom_op.fpgadataflow.convolutioninputgenerator import (
+    ConvolutionInputGenerator
+)
 from finn.custom_op.fpgadataflow.streamingfclayer_batch import StreamingFCLayer_Batch
 from finn.custom_op.fpgadataflow.streamingmaxpool_batch import StreamingMaxPool_Batch
 from finn.custom_op.multithreshold import MultiThreshold
@@ -12,3 +15,4 @@ custom_op["MultiThreshold"] = MultiThreshold
 custom_op["XnorPopcountMatMul"] = XnorPopcountMatMul
 custom_op["StreamingMaxPool_Batch"] = StreamingMaxPool_Batch
 custom_op["StreamingFCLayer_Batch"] = StreamingFCLayer_Batch
+custom_op["ConvolutionInputGenerator"] = ConvolutionInputGenerator
diff --git a/src/finn/transformation/fpgadataflow/cleanup.py b/src/finn/transformation/fpgadataflow/cleanup.py
index 1632d3443a3bf79e55a4b877ae182964ff7caaed..0f62e568b2b92e4a83a34f303ed5a3f9dd71fa13 100644
--- a/src/finn/transformation/fpgadataflow/cleanup.py
+++ b/src/finn/transformation/fpgadataflow/cleanup.py
@@ -13,6 +13,11 @@ class CleanUp(Transformation):
         super().__init__()
 
     def apply(self, model):
+        # delete IP stitching project, if any
+        ipstitch_path = model.get_metadata_prop("vivado_proj")
+        if ipstitch_path is not None and os.path.isdir(ipstitch_path):
+            shutil.rmtree(ipstitch_path)
+            model.set_metadata_prop("vivado_proj", "")
         for node in model.graph.node:
             op_type = node.op_type
             if node.domain == "finn":
@@ -22,11 +27,26 @@ class CleanUp(Transformation):
                     try:
                         # lookup op_type in registry of CustomOps
                         inst = registry.custom_op[op_type](node)
-                        code_gen_dir = inst.get_nodeattr("code_gen_dir")
+                        # delete code_gen_dir from npysim
+                        code_gen_dir = inst.get_nodeattr("code_gen_dir_npysim")
                         if os.path.isdir(code_gen_dir):
                             shutil.rmtree(code_gen_dir)
-                        inst.set_nodeattr("code_gen_dir", "")
+                        inst.set_nodeattr("code_gen_dir_npysim", "")
                         inst.set_nodeattr("executable_path", "")
+                        # delete code_gen_dir from ipgen and project folder
+                        code_gen_dir = inst.get_nodeattr("code_gen_dir_ipgen")
+                        ipgen_path = inst.get_nodeattr("ipgen_path")
+                        if os.path.isdir(code_gen_dir):
+                            shutil.rmtree(code_gen_dir)
+                        if os.path.isdir(ipgen_path):
+                            shutil.rmtree(ipgen_path)
+                        inst.set_nodeattr("code_gen_dir_ipgen", "")
+                        inst.set_nodeattr("ipgen_path", "")
+                        # delete Java HotSpot Performance data log
+                        for d_name in os.listdir("/tmp/"):
+                            if "hsperfdata" in d_name:
+                                shutil.rmtree("/tmp/" + str(d_name))
+
                     except KeyError:
                         # exception if op_type is not supported
                         raise Exception(
diff --git a/src/finn/transformation/fpgadataflow/codegen_ipgen.py b/src/finn/transformation/fpgadataflow/codegen_ipgen.py
new file mode 100644
index 0000000000000000000000000000000000000000..0159873a3b80abeea277d4c8ec16dbf3a23779e2
--- /dev/null
+++ b/src/finn/transformation/fpgadataflow/codegen_ipgen.py
@@ -0,0 +1,48 @@
+import os
+import tempfile as tmp
+
+import finn.custom_op.registry as registry
+from finn.core.utils import get_by_name
+from finn.transformation import Transformation
+
+
+def _codegen_single_node(node, model, fpgapart, clk):
+    """Call custom implementation to generate code for single custom node
+    and create folder that contains all the generated files"""
+    op_type = node.op_type
+    try:
+        # lookup op_type in registry of CustomOps
+        inst = registry.custom_op[op_type](node)
+        # get the path of the code generation directory
+        code_gen_dir = inst.get_nodeattr("code_gen_dir_ipgen")
+        # ensure that there is a directory
+        if code_gen_dir == "" or not os.path.isdir(code_gen_dir):
+            code_gen_dir = tmp.mkdtemp(
+                prefix="code_gen_ipgen_" + str(node.op_type) + "_"
+            )
+            inst.set_nodeattr("code_gen_dir_ipgen", code_gen_dir)
+        # ensure that there is generated code inside the dir
+        inst.code_generation_ipgen(model, fpgapart, clk)
+    except KeyError:
+        # exception if op_type is not supported
+        raise Exception("Custom op_type %s is currently not supported." % op_type)
+
+
+class CodeGen_ipgen(Transformation):
+    """Code generation for all nodes in model"""
+
+    def __init__(self, fpgapart, clk):
+        super().__init__()
+        self.fpgapart = fpgapart
+        self.clk = clk
+
+    def apply(self, model):
+        for node in model.graph.node:
+            if node.domain == "finn":
+                backend_attribute = get_by_name(node.attribute, "backend")
+                if backend_attribute is None:
+                    continue
+                backend_value = backend_attribute.s.decode("UTF-8")
+                if backend_value == "fpgadataflow":
+                    _codegen_single_node(node, model, self.fpgapart, self.clk)
+        return (model, False)
diff --git a/src/finn/transformation/fpgadataflow/codegen_ipstitch.py b/src/finn/transformation/fpgadataflow/codegen_ipstitch.py
new file mode 100644
index 0000000000000000000000000000000000000000..de7438645017c60eb2907b7c9eb9c6a9bfe891df
--- /dev/null
+++ b/src/finn/transformation/fpgadataflow/codegen_ipstitch.py
@@ -0,0 +1,143 @@
+import os
+import subprocess
+import tempfile as tmp
+
+from finn.core.utils import get_by_name
+from finn.transformation import Transformation
+
+
+class CodeGen_ipstitch(Transformation):
+    """Create a Vivado IP Block Design project from all the generated IPs of a
+    graph. All nodes in the graph must have the fpgadataflow backend attribute,
+    and the CodeGen_ipgen transformation must have been previously run on
+    the graph. The resulting block design is also packaged as IP.
+
+    Outcome if successful: sets the vivado_proj attribute in the ONNX
+    ModelProto's metadata_props field, with the created project dir as the
+    value. A make_project.tcl script is also placed under the same folder,
+    which is called to instantiate the per-layer IPs and stitch them together.
+    The packaged block design IP can be found under the ip subdirectory.
+    """
+
+    def __init__(self, fpgapart):
+        super().__init__()
+        self.fpgapart = fpgapart
+
+    def apply(self, model):
+        ip_dirs = ["list"]
+        create_cmds = []
+        connect_cmds = []
+        # ensure that all nodes are fpgadataflow, and that IPs are generated
+        for node in model.graph.node:
+            assert node.domain == "finn"
+            backend_attribute = get_by_name(node.attribute, "backend")
+            assert backend_attribute is not None
+            backend_value = backend_attribute.s.decode("UTF-8")
+            assert backend_value == "fpgadataflow"
+            ip_dir_attribute = get_by_name(node.attribute, "ipgen_path")
+            assert ip_dir_attribute is not None
+            ip_dir_value = ip_dir_attribute.s.decode("UTF-8")
+            ip_dir_value += "/sol1/impl/ip"
+            assert os.path.isdir(ip_dir_value)
+            ip_dirs += [ip_dir_value]
+            vlnv = "xilinx.com:hls:%s:1.0" % node.name
+            inst_name = node.name
+            create_cmd = "create_bd_cell -type ip -vlnv %s %s" % (vlnv, inst_name)
+            create_cmds += [create_cmd]
+            # TODO nonlinear topologies: check this for all inputs
+            my_producer = model.find_producer(node.input[0])
+            if my_producer is None:
+                # first node in graph
+                # make clock and reset external
+                connect_cmds.append(
+                    "make_bd_pins_external [get_bd_pins %s/ap_clk]" % inst_name
+                )
+                connect_cmds.append(
+                    "make_bd_pins_external [get_bd_pins %s/ap_rst_n]" % inst_name
+                )
+                # make input external
+                connect_cmds.append(
+                    "make_bd_intf_pins_external [get_bd_intf_pins %s/in0_V_V]"
+                    % inst_name
+                )
+            else:
+                # intermediate node
+                # wire up global clock and reset
+                connect_cmds.append(
+                    "connect_bd_net [get_bd_ports ap_rst_n_0] [get_bd_pins %s/ap_rst_n]"
+                    % inst_name
+                )
+                connect_cmds.append(
+                    "connect_bd_net [get_bd_ports ap_clk_0] [get_bd_pins %s/ap_clk]"
+                    % inst_name
+                )
+                # wire up input to previous output
+                # TODO nonlinear topologies: loop over all inputs
+                my_in_name = "%s/in0_V_V" % (inst_name)
+                prev_out_name = "%s/out_V_V" % (my_producer.name)
+                connect_cmds.append(
+                    "connect_bd_intf_net [get_bd_intf_pins %s] [get_bd_intf_pins %s]"
+                    % (prev_out_name, my_in_name)
+                )
+            if model.find_consumer(node.output[0]) is None:
+                # last node in graph
+                # connect prev output to input
+                # make output external
+                connect_cmds.append(
+                    "make_bd_intf_pins_external [get_bd_intf_pins %s/out_V_V]"
+                    % inst_name
+                )
+
+        # create a temporary folder for the project
+        vivado_proj_dir = tmp.mkdtemp(prefix="vivado_proj_")
+        model.set_metadata_prop("vivado_proj", vivado_proj_dir)
+        # start building the tcl script
+        tcl = []
+        # create vivado project
+        tcl.append(
+            "create_project %s %s -part %s"
+            % ("finn_vivado_proj", vivado_proj_dir, self.fpgapart)
+        )
+        # add all the generated IP dirs to ip_repo_paths
+        ip_dirs_str = " ".join(ip_dirs)
+        tcl.append("set_property ip_repo_paths [%s] [current_project]" % ip_dirs_str)
+        tcl.append("update_ip_catalog")
+        # create block design and instantiate all layers
+        block_name = "finn_design"
+        tcl.append('create_bd_design "%s"' % block_name)
+        tcl.extend(create_cmds)
+        tcl.extend(connect_cmds)
+        tcl.append("regenerate_bd_layout")
+        tcl.append("validate_bd_design")
+        tcl.append("save_bd_design")
+        # export block design itself as an IP core
+        block_vendor = "xilinx_finn"
+        block_library = "finn"
+        block_vlnv = "%s:%s:%s:1.0" % (block_vendor, block_library, block_name)
+        tcl.append(
+            (
+                "ipx::package_project -root_dir %s/ip -vendor %s "
+                "-library %s -taxonomy /UserIP -module %s -import_files"
+            )
+            % (vivado_proj_dir, block_vendor, block_library, block_name)
+        )
+        tcl.append("set_property core_revision 2 [ipx::find_open_core %s]" % block_vlnv)
+        tcl.append("ipx::create_xgui_files [ipx::find_open_core %s]" % block_vlnv)
+        tcl.append("ipx::update_checksums [ipx::find_open_core %s]" % block_vlnv)
+        tcl.append("ipx::save_core [ipx::find_open_core %s]" % block_vlnv)
+        # write the project creator tcl script
+        tcl_string = "\n".join(tcl) + "\n"
+        with open(vivado_proj_dir + "/make_project.tcl", "w") as f:
+            f.write(tcl_string)
+        # create a shell script and call Vivado
+        make_project_sh = vivado_proj_dir + "/make_project.sh"
+        working_dir = os.environ["PWD"]
+        with open(make_project_sh, "w") as f:
+            f.write("#!/bin/bash \n")
+            f.write("cd {}\n".format(vivado_proj_dir))
+            f.write("vivado -mode batch -source make_project.tcl\n")
+            f.write("cd {}\n".format(working_dir))
+        bash_command = ["bash", make_project_sh]
+        process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE)
+        process_compile.communicate()
+        return (model, False)
diff --git a/src/finn/transformation/fpgadataflow/codegen.py b/src/finn/transformation/fpgadataflow/codegen_npysim.py
similarity index 80%
rename from src/finn/transformation/fpgadataflow/codegen.py
rename to src/finn/transformation/fpgadataflow/codegen_npysim.py
index 84078d90a573faf4d014c5e280e22e41061f0aff..0d5a42dfe3f0b47aa9201175177ed9c242e9dfbe 100644
--- a/src/finn/transformation/fpgadataflow/codegen.py
+++ b/src/finn/transformation/fpgadataflow/codegen_npysim.py
@@ -14,19 +14,21 @@ def _codegen_single_node(node, model):
         # lookup op_type in registry of CustomOps
         inst = registry.custom_op[op_type](node)
         # get the path of the code generation directory
-        code_gen_dir = inst.get_nodeattr("code_gen_dir")
+        code_gen_dir = inst.get_nodeattr("code_gen_dir_npysim")
         # ensure that there is a directory
         if code_gen_dir == "" or not os.path.isdir(code_gen_dir):
-            code_gen_dir = tmp.mkdtemp(prefix="code_gen_" + str(node.op_type) + "_")
-            inst.set_nodeattr("code_gen_dir", code_gen_dir)
+            code_gen_dir = tmp.mkdtemp(
+                prefix="code_gen_npysim_" + str(node.op_type) + "_"
+            )
+            inst.set_nodeattr("code_gen_dir_npysim", code_gen_dir)
         # ensure that there is generated code inside the dir
-        inst.code_generation(model)
+        inst.code_generation_npysim(model)
     except KeyError:
         # exception if op_type is not supported
         raise Exception("Custom op_type %s is currently not supported." % op_type)
 
 
-class CodeGen(Transformation):
+class CodeGen_npysim(Transformation):
     """Code generation for all nodes in model"""
 
     def apply(self, model):
diff --git a/src/finn/transformation/fpgadataflow/compile.py b/src/finn/transformation/fpgadataflow/compile.py
index 37df1c61dfc101111b1ab8623dcee9a5f1697489..c4f6a1a094f0963845440280add791f2a349ba9d 100644
--- a/src/finn/transformation/fpgadataflow/compile.py
+++ b/src/finn/transformation/fpgadataflow/compile.py
@@ -22,7 +22,7 @@ class Compile(Transformation):
                         # lookup op_type in registry of CustomOps
                         inst = registry.custom_op[op_type](node)
                         # ensure that code is generated
-                        assert inst.get_nodeattr("code_gen_dir") != ""
+                        assert inst.get_nodeattr("code_gen_dir_npysim") != ""
                         # call the compilation function for this node
                         inst.compile_singlenode_code()
                         # ensure that executable path is now set
diff --git a/src/finn/transformation/fpgadataflow/hlssynth_ipgen.py b/src/finn/transformation/fpgadataflow/hlssynth_ipgen.py
new file mode 100644
index 0000000000000000000000000000000000000000..f704b513e78e78b4dc08302efa56f19acfa62423
--- /dev/null
+++ b/src/finn/transformation/fpgadataflow/hlssynth_ipgen.py
@@ -0,0 +1,35 @@
+import finn.core.utils as util
+import finn.custom_op.registry as registry
+from finn.transformation import Transformation
+
+
+class HLSSynth_IPGen(Transformation):
+    """Compile for all nodes in model"""
+
+    def __init__(self):
+        super().__init__()
+
+    def apply(self, model):
+        for node in model.graph.node:
+            op_type = node.op_type
+            if node.domain == "finn":
+                backend_attribute = util.get_by_name(node.attribute, "backend")
+                if backend_attribute is None:
+                    continue
+                backend_value = backend_attribute.s.decode("UTF-8")
+                if backend_value == "fpgadataflow":
+                    try:
+                        # lookup op_type in registry of CustomOps
+                        inst = registry.custom_op[op_type](node)
+                        # ensure that code is generated
+                        assert inst.get_nodeattr("code_gen_dir_ipgen") != ""
+                        # call the compilation function for this node
+                        inst.ipgen_singlenode_code()
+                        # ensure that executable path is now set
+                        assert inst.get_nodeattr("ipgen_path") != ""
+                    except KeyError:
+                        # exception if op_type is not supported
+                        raise Exception(
+                            "Custom op_type %s is currently not supported." % op_type
+                        )
+        return (model, False)
diff --git a/src/finn/transformation/fpgadataflow/set_sim_mode.py b/src/finn/transformation/fpgadataflow/set_sim_mode.py
new file mode 100644
index 0000000000000000000000000000000000000000..31692c89bb8285f0aba70b27a344e8d66c21533c
--- /dev/null
+++ b/src/finn/transformation/fpgadataflow/set_sim_mode.py
@@ -0,0 +1,34 @@
+import finn.core.utils as util
+import finn.custom_op.registry as registry
+from finn.transformation import Transformation
+
+
+class SetSimMode(Transformation):
+    """Set attribute sim_mode in all fpgadataflow nodes"""
+
+    def __init__(self, mode):
+        super().__init__()
+        self.mode = mode
+
+    def apply(self, model):
+        for node in model.graph.node:
+            op_type = node.op_type
+            if node.domain == "finn":
+                backend_attribute = util.get_by_name(node.attribute, "backend")
+                if backend_attribute is None:
+                    continue
+                backend_value = backend_attribute.s.decode("UTF-8")
+                if backend_value == "fpgadataflow":
+                    try:
+                        # lookup op_type in registry of CustomOps
+                        inst = registry.custom_op[op_type](node)
+                        # set sim_mode accordingly to argument mode
+                        inst.set_nodeattr("sim_mode", self.mode)
+                        # ensure that sim_mode is now set
+                        assert inst.get_nodeattr("sim_mode") != ""
+                    except KeyError:
+                        # exception if op_type is not supported
+                        raise Exception(
+                            "Custom op_type %s is currently not supported." % op_type
+                        )
+        return (model, False)
diff --git a/tests/fpgadataflow/test_code_gen_trafo.py b/tests/fpgadataflow/test_code_gen_trafo.py
index 533710605e2bc514ba0fb0c8784c378d07451951..308f2c3b278fd9ef56d413ce94775a35e8240101 100644
--- a/tests/fpgadataflow/test_code_gen_trafo.py
+++ b/tests/fpgadataflow/test_code_gen_trafo.py
@@ -6,7 +6,7 @@ import finn.core.utils as util
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.fpgadataflow.cleanup import CleanUp
-from finn.transformation.fpgadataflow.codegen import CodeGen
+from finn.transformation.fpgadataflow.codegen_npysim import CodeGen_npysim
 
 
 def test_code_gen_trafo():
@@ -50,7 +50,7 @@ def test_code_gen_trafo():
     W = util.gen_finn_dt_tensor(wdt, (mw, mh))
     model.set_initializer("weights", W)
 
-    model = model.transform(CodeGen())
+    model = model.transform(CodeGen_npysim())
     for node in model.graph.node:
         code_gen_attribute = util.get_by_name(node.attribute, "code_gen_dir")
         tmp_dir = code_gen_attribute.s.decode("UTF-8")
diff --git a/tests/fpgadataflow/test_compilation_trafo.py b/tests/fpgadataflow/test_compilation_trafo.py
index f84ce34b54b3496f7e277e55ac574124e09c25d3..d6e5d3f111d4c0595305fc54653b925bdefc2157 100644
--- a/tests/fpgadataflow/test_compilation_trafo.py
+++ b/tests/fpgadataflow/test_compilation_trafo.py
@@ -6,7 +6,7 @@ import finn.core.utils as util
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.fpgadataflow.cleanup import CleanUp
-from finn.transformation.fpgadataflow.codegen import CodeGen
+from finn.transformation.fpgadataflow.codegen_npysim import CodeGen_npysim
 from finn.transformation.fpgadataflow.compile import Compile
 
 
@@ -51,7 +51,7 @@ def test_compilation_trafo():
     W = util.gen_finn_dt_tensor(wdt, (mw, mh))
     model.set_initializer("weights", W)
 
-    model = model.transform(CodeGen())
+    model = model.transform(CodeGen_npysim())
     model = model.transform(Compile())
     for node in model.graph.node:
         compilation_attribute = util.get_by_name(node.attribute, "executable_path")
diff --git a/tests/fpgadataflow/test_convert_to_hls_layers.py b/tests/fpgadataflow/test_convert_to_hls_layers.py
index 32792e1364229199286a7012105442f3bbfb05df..21dda4481ae7f6cfbb46b422045eb37e4e3db3a3 100644
--- a/tests/fpgadataflow/test_convert_to_hls_layers.py
+++ b/tests/fpgadataflow/test_convert_to_hls_layers.py
@@ -15,7 +15,7 @@ from finn.core.modelwrapper import ModelWrapper
 from finn.custom_op.fpgadataflow.streamingfclayer_batch import StreamingFCLayer_Batch
 from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
 from finn.transformation.fold_constants import FoldConstants
-from finn.transformation.fpgadataflow.codegen import CodeGen
+from finn.transformation.fpgadataflow.codegen_npysim import CodeGen_npysim
 from finn.transformation.fpgadataflow.compile import Compile
 from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
 from finn.transformation.infer_shapes import InferShapes
@@ -82,7 +82,7 @@ def test_convert_to_hls_layers_lfc_w1a1():
     fc3w.set_nodeattr("SIMD", 1024)
     fc3w.set_nodeattr("PE", 10)
 
-    model = model.transform(CodeGen())
+    model = model.transform(CodeGen_npysim())
     model = model.transform(Compile())
 
     raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb")
diff --git a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py
new file mode 100644
index 0000000000000000000000000000000000000000..e32d8b765253a054d90e9c47d0e9d94202b2003b
--- /dev/null
+++ b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py
@@ -0,0 +1,169 @@
+import pytest
+
+import numpy as np
+from onnx import TensorProto, helper
+
+import finn.core.onnx_exec as oxe
+from finn.core.datatype import DataType
+from finn.core.modelwrapper import ModelWrapper
+from finn.core.utils import gen_finn_dt_tensor
+from finn.transformation.fpgadataflow.cleanup import CleanUp
+from finn.transformation.fpgadataflow.codegen_ipgen import CodeGen_ipgen
+from finn.transformation.fpgadataflow.codegen_npysim import CodeGen_npysim
+from finn.transformation.fpgadataflow.compile import Compile
+from finn.transformation.fpgadataflow.hlssynth_ipgen import HLSSynth_IPGen
+from finn.transformation.fpgadataflow.set_sim_mode import SetSimMode
+from finn.transformation.general import GiveUniqueNodeNames
+
+
+def get_im2col_indices(x_shape, k, stride):
+    # First figure out what the size of the output should be
+    N, C, H, W = x_shape
+    assert H == W
+    assert (W - k) % stride == 0
+    ofm_dim = int((W - k) / stride + 1)
+
+    i0 = np.repeat(np.arange(k), k)
+    i0 = np.tile(i0, C)
+    i1 = stride * np.repeat(np.arange(ofm_dim), ofm_dim)
+    j0 = np.tile(np.arange(k), k * C)
+    j1 = stride * np.tile(np.arange(ofm_dim), ofm_dim)
+    i = i0.reshape(-1, 1) + i1.reshape(1, -1)
+    j = j0.reshape(-1, 1) + j1.reshape(1, -1)
+
+    k = np.repeat(np.arange(C), k * k).reshape(-1, 1)
+
+    return (k, i, j)
+
+
+def im2col_indices(x, k, stride):
+    """ An implementation of im2col based on some fancy indexing """
+
+    l, i, j = get_im2col_indices(x.shape, k, stride)
+
+    cols = x[:, l, i, j]
+    C = x.shape[1]
+    cols = cols.transpose(1, 2, 0).reshape(k * k * C, -1)
+    cols = cols.transpose(1, 0)
+
+    # rearranging the output so it matches with finn-hlslib function
+    # swapping the columns according to the input channel
+    # if C > 1 :
+    parts = {}
+    for ch in range(C):
+        parts[ch] = []
+
+    for i in range(cols.shape[1]):
+        if i % C == 0:
+            parts[0].append(i)
+        elif (i + (C - 1)) % C == 0:
+            parts[1].append(i)
+        elif (i + (C - 2)) % C == 0:
+            parts[2].append(i)
+        elif (i + (C - 3)) % C == 0:
+            parts[3].append(i)
+    permutation = []
+    for i in parts:
+        for num in parts[i]:
+            permutation.append(num)
+
+    i = np.argsort(permutation)
+    cols = cols[:, i]
+    return cols
+
+
+def make_single_slidingwindow_modelwrapper(
+    k, ifm_ch, ifm_dim, ofm_dim, simd, stride, idt
+):
+
+    ip = idt.bitwidth()
+    odt = idt
+    out_pix = ofm_dim * ofm_dim
+
+    inp = helper.make_tensor_value_info(
+        "inp", TensorProto.FLOAT, [1, ifm_ch, ifm_dim, ifm_dim]
+    )
+    outp = helper.make_tensor_value_info(
+        "outp", TensorProto.FLOAT, [1, out_pix, k * k * ifm_ch]
+    )
+
+    SlidingWindow_node = helper.make_node(
+        "ConvolutionInputGenerator",
+        ["inp"],
+        ["outp"],
+        domain="finn",
+        backend="fpgadataflow",
+        ConvKernelDim=k,
+        IFMChannels=ifm_ch,
+        Input_precision=ip,
+        IFMDim=ifm_dim,
+        OFMDim=ofm_dim,
+        SIMD=simd,
+        Stride=stride,
+        inputDataType=idt.name,
+        outputDataType=odt.name,
+    )
+    graph = helper.make_graph(
+        nodes=[SlidingWindow_node],
+        name="slidingwindow_graph",
+        inputs=[inp],
+        outputs=[outp],
+    )
+
+    model = helper.make_model(graph, producer_name="slidingwindow-model")
+    model = ModelWrapper(model)
+
+    model.set_tensor_datatype("inp", idt)
+    model.set_tensor_datatype("outp", odt)
+
+    return model
+
+
+def prepare_inputs(input_tensor, idt):
+    if idt == DataType.BIPOLAR:
+        # convert bipolar to binary
+        return {"inp": (input_tensor + 1) / 2}
+    else:
+        return {"inp": input_tensor}
+
+
+# input datatype
+@pytest.mark.parametrize("idt", [DataType.BIPOLAR, DataType.INT2])
+# kernel size
+@pytest.mark.parametrize("k", [2, 4])
+# input dimension
+@pytest.mark.parametrize("ifm_dim", [4, 6, 8])
+# input channels
+@pytest.mark.parametrize("ifm_ch", [1, 2, 3, 4])
+# Stride
+@pytest.mark.parametrize("stride", [1, 2])
+def test_fpgadataflow_slidingwindow(idt, k, ifm_dim, ifm_ch, stride):
+    simd = ifm_ch
+
+    ofm_dim = int(((ifm_dim - k) / stride) + 1)
+
+    x = gen_finn_dt_tensor(idt, (1, ifm_ch, ifm_dim, ifm_dim))
+    model = make_single_slidingwindow_modelwrapper(
+        k, ifm_ch, ifm_dim, ofm_dim, simd, stride, idt
+    )
+    model = model.transform(SetSimMode("npysim"))
+    model = model.transform(CodeGen_npysim())
+    model = model.transform(Compile())
+
+    # prepare input data
+    input_dict = prepare_inputs(x, idt)
+
+    # execute model
+    y_produced = oxe.execute_onnx(model, input_dict)["outp"]
+    y_expected = im2col_indices(x, k, stride)
+    # reshape expected output to match node output
+    oshape = y_produced.shape
+    y_expected = y_expected.reshape(oshape)
+
+    assert (y_produced == y_expected).all()
+
+    model = model.transform(GiveUniqueNodeNames())
+    model = model.transform(CodeGen_ipgen("xc7z020clg400-1", 5))
+    model = model.transform(HLSSynth_IPGen())
+    model = model.transform(SetSimMode("rtlsim"))
+    model = model.transform(CleanUp())
diff --git a/tests/fpgadataflow/test_fpgadataflow_fclayer.py b/tests/fpgadataflow/test_fpgadataflow_fclayer.py
index 0df66c4af2cfbadead8e95322c433cf69d4d2715..c57b2734680319557741db7b0d49c1d6aa6d15aa 100644
--- a/tests/fpgadataflow/test_fpgadataflow_fclayer.py
+++ b/tests/fpgadataflow/test_fpgadataflow_fclayer.py
@@ -10,8 +10,11 @@ from finn.core.modelwrapper import ModelWrapper
 from finn.core.utils import calculate_signed_dot_prod_range, gen_finn_dt_tensor
 from finn.custom_op.multithreshold import multithreshold
 from finn.transformation.fpgadataflow.cleanup import CleanUp
-from finn.transformation.fpgadataflow.codegen import CodeGen
+from finn.transformation.fpgadataflow.codegen_ipgen import CodeGen_ipgen
+from finn.transformation.fpgadataflow.codegen_npysim import CodeGen_npysim
 from finn.transformation.fpgadataflow.compile import Compile
+from finn.transformation.fpgadataflow.hlssynth_ipgen import HLSSynth_IPGen
+from finn.transformation.general import GiveUniqueNodeNames
 
 
 def make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T=None, tdt=None):
@@ -147,7 +150,7 @@ def test_fpgadataflow_fclayer(idt, wdt, act, nf, sf, mw, mh):
         else:
             tdt = DataType.INT32
     model = make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T, tdt)
-    model = model.transform(CodeGen())
+    model = model.transform(CodeGen_npysim())
     model = model.transform(Compile())
     # prepare input data
     input_dict = prepare_inputs(x, idt, wdt)
@@ -169,4 +172,7 @@ def test_fpgadataflow_fclayer(idt, wdt, act, nf, sf, mw, mh):
     # execute model
     y_produced = oxe.execute_onnx(model, input_dict)["outp"]
     assert (y_produced.reshape(y_expected.shape) == y_expected).all()
+    model = model.transform(GiveUniqueNodeNames())
+    model = model.transform(CodeGen_ipgen("xc7z020clg400-1", 5))
+    model = model.transform(HLSSynth_IPGen())
     model = model.transform(CleanUp())
diff --git a/tests/fpgadataflow/test_fpgadataflow_ip_stitch.py b/tests/fpgadataflow/test_fpgadataflow_ip_stitch.py
new file mode 100644
index 0000000000000000000000000000000000000000..741043363f7b8fb359129460513a8b85bf3aae4e
--- /dev/null
+++ b/tests/fpgadataflow/test_fpgadataflow_ip_stitch.py
@@ -0,0 +1,119 @@
+import os.path
+
+import numpy as np
+from onnx import TensorProto, helper
+
+from finn.core.datatype import DataType
+from finn.core.modelwrapper import ModelWrapper
+from finn.core.utils import calculate_signed_dot_prod_range, gen_finn_dt_tensor
+from finn.transformation.fpgadataflow.cleanup import CleanUp
+from finn.transformation.fpgadataflow.codegen_ipgen import CodeGen_ipgen
+from finn.transformation.fpgadataflow.codegen_ipstitch import CodeGen_ipstitch
+from finn.transformation.fpgadataflow.hlssynth_ipgen import HLSSynth_IPGen
+from finn.transformation.general import GiveUniqueNodeNames
+
+
+def create_two_fc_model():
+    # create a model with two StreamingFCLayer instances
+    wdt = DataType.INT2
+    idt = DataType.INT2
+    odt = DataType.INT2
+    act = DataType.INT2
+    m = 4
+    tdt = DataType.INT32
+    actval = odt.min()
+    no_act = 0
+    binary_xnor_mode = 0
+
+    inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, m])
+    mid = helper.make_tensor_value_info("mid", TensorProto.FLOAT, [1, m])
+    outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, m])
+
+    fc0 = helper.make_node(
+        "StreamingFCLayer_Batch",
+        ["inp", "w0", "t0"],
+        ["mid"],
+        domain="finn",
+        backend="fpgadataflow",
+        resType="ap_resource_lut()",
+        MW=m,
+        MH=m,
+        SIMD=1,
+        PE=1,
+        inputDataType=idt.name,
+        weightDataType=wdt.name,
+        outputDataType=odt.name,
+        ActVal=actval,
+        binaryXnorMode=binary_xnor_mode,
+        noActivation=no_act,
+    )
+
+    fc1 = helper.make_node(
+        "StreamingFCLayer_Batch",
+        ["mid", "w1", "t1"],
+        ["outp"],
+        domain="finn",
+        backend="fpgadataflow",
+        resType="ap_resource_lut()",
+        MW=m,
+        MH=m,
+        SIMD=1,
+        PE=1,
+        inputDataType=idt.name,
+        weightDataType=wdt.name,
+        outputDataType=odt.name,
+        ActVal=actval,
+        binaryXnorMode=binary_xnor_mode,
+        noActivation=no_act,
+    )
+
+    graph = helper.make_graph(
+        nodes=[fc0, fc1],
+        name="fclayer_graph",
+        inputs=[inp],
+        outputs=[outp],
+        value_info=[mid],
+    )
+
+    model = helper.make_model(graph, producer_name="fclayer-model")
+    model = ModelWrapper(model)
+
+    model.set_tensor_datatype("inp", idt)
+    model.set_tensor_datatype("mid", idt)
+    model.set_tensor_datatype("outp", odt)
+    model.set_tensor_datatype("w0", wdt)
+    model.set_tensor_datatype("w1", wdt)
+
+    # generate weights
+    w0 = gen_finn_dt_tensor(wdt, (m, m))
+    w1 = gen_finn_dt_tensor(wdt, (m, m))
+    model.set_initializer("w0", w0)
+    model.set_initializer("w1", w1)
+
+    # generate thresholds
+    (min, max) = calculate_signed_dot_prod_range(idt, wdt, m)
+    n_steps = act.get_num_possible_values() - 1
+    t0 = np.random.randint(min, max - 1, (m, n_steps)).astype(np.float32)
+    t1 = np.random.randint(min, max - 1, (m, n_steps)).astype(np.float32)
+    # provide non-decreasing thresholds
+    t0 = np.sort(t0, axis=1)
+    t1 = np.sort(t1, axis=1)
+
+    model.set_initializer("t0", t0)
+    model.set_initializer("t1", t1)
+    model.set_tensor_datatype("t0", tdt)
+    model.set_tensor_datatype("t1", tdt)
+    return model
+
+
+def test_fpgadataflow_ip_stitch():
+    model = create_two_fc_model()
+    model = model.transform(GiveUniqueNodeNames())
+    model = model.transform(CodeGen_ipgen("xc7z020clg400-1", 5))
+    model = model.transform(HLSSynth_IPGen())
+    model = model.transform(CodeGen_ipstitch("xc7z020clg400-1"))
+    vivado_proj_dir = model.get_metadata_prop("vivado_proj")
+    assert vivado_proj_dir is not None
+    assert os.path.isdir(vivado_proj_dir)
+    assert os.path.isfile(vivado_proj_dir + "/ip/component.xml")
+    model = model.transform(CleanUp())
diff --git a/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py b/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py
index 4a7ca1b5c0473c520f0e2ea775f7c8950eb16695..32b1c60fc714794e39fe1ade2d0252895bb33025 100644
--- a/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py
+++ b/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py
@@ -5,7 +5,7 @@ import finn.core.onnx_exec as oxe
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.fpgadataflow.cleanup import CleanUp
-from finn.transformation.fpgadataflow.codegen import CodeGen
+from finn.transformation.fpgadataflow.codegen_npysim import CodeGen_npysim
 from finn.transformation.fpgadataflow.compile import Compile
 
 
@@ -112,7 +112,7 @@ def test_layer_streaming_maxpool_batch():
     ).reshape(2, 2, 4, 4)
     print(input_tensor)
 
-    model = model.transform(CodeGen())
+    model = model.transform(CodeGen_npysim())
     model = model.transform(Compile())
 
     input_dict = {"in": input_tensor}