diff --git a/.gitignore b/.gitignore
index 8b3166a44070a4575aac86c445c4504b594cda08..d7ee7e014a0c175a8a88060f2aa320efeb501ddc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -81,3 +81,6 @@ MANIFEST
 
 # SSH key dir mounted into Docker
 /ssh_keys/
+
+# PYNQ board files
+/board_files/
diff --git a/docker/Dockerfile.finn_ci b/docker/Dockerfile.finn_ci
index 7d5772d9f5118d1f1238dd14a6b57a1b4fd5004d..0d122133a6446cb77160c9447e16ff13d4d4b9c5 100644
--- a/docker/Dockerfile.finn_ci
+++ b/docker/Dockerfile.finn_ci
@@ -37,7 +37,7 @@ RUN apt-get update
 RUN apt-get -y upgrade
 RUN apt-get install -y build-essential libglib2.0-0 libsm6 libxext6 libxrender-dev
 RUN apt-get install -y verilator zsh
-RUN apt-get -y install sshpass
+RUN apt-get -y install sshpass wget unzip
 RUN echo "StrictHostKeyChecking no" >> /etc/ssh/ssh_config
 
 # cloning dependency repos
diff --git a/docker/Dockerfile.finn_dev b/docker/Dockerfile.finn_dev
index 8c1502eb4a1941061bd58e6f9a18106f98f259e2..f8e15f34fb4da3dc4ee353a29d26866b68879144 100644
--- a/docker/Dockerfile.finn_dev
+++ b/docker/Dockerfile.finn_dev
@@ -43,7 +43,7 @@ RUN apt-get update
 RUN apt-get -y upgrade
 RUN apt-get install -y build-essential libglib2.0-0 libsm6 libxext6 libxrender-dev
 RUN apt-get install -y verilator nano zsh rsync
-RUN apt-get -y install sshpass
+RUN apt-get -y install sshpass wget unzip
 RUN echo "StrictHostKeyChecking no" >> /etc/ssh/ssh_config
 
 COPY requirements.txt .
diff --git a/docker/finn_entrypoint.sh b/docker/finn_entrypoint.sh
index ee75089c657e4fad1e4a455ac7bd5fe4976e5d4c..72751817383dbdb441970e5816247cfa7760ef5b 100644
--- a/docker/finn_entrypoint.sh
+++ b/docker/finn_entrypoint.sh
@@ -57,4 +57,19 @@ if [ ! -z "$VITIS_PATH" ];then
   export XILINX_VITIS=$VITIS_PATH
   source $VITIS_PATH/settings64.sh
 fi
+
+# download PYNQ board files if not already there
+if [ ! -d "/workspace/finn/board_files" ]; then
+    gecho "Downloading PYNQ board files for Vivado"
+    wget -q https://github.com/cathalmccabe/pynq-z1_board_files/raw/master/pynq-z1.zip
+    wget -q https://d2m32eurp10079.cloudfront.net/Download/pynq-z2.zip
+    unzip -q pynq-z1.zip
+    unzip -q pynq-z2.zip
+    mkdir /workspace/finn/board_files
+    mv pynq-z1/ board_files/
+    mv pynq-z2/ board_files/
+    rm pynq-z1.zip
+    rm pynq-z2.zip
+fi
+
 exec "$@"
diff --git a/finn-rtllib/memstream/component.xml b/finn-rtllib/memstream/component.xml
index 6b728c0555a4889b8e76d5759233d1109a3002bd..7910a8284dad3674b8665136506a60c498e0547f 100644
--- a/finn-rtllib/memstream/component.xml
+++ b/finn-rtllib/memstream/component.xml
@@ -1051,6 +1051,7 @@
         <xilinx:family xilinx:lifeCycle="Beta">azynq</xilinx:family>
         <xilinx:family xilinx:lifeCycle="Beta">zynquplus</xilinx:family>
         <xilinx:family xilinx:lifeCycle="Production">virtexuplus</xilinx:family>
+        <xilinx:family xilinx:lifeCycle="Production">virtexuplusHBM</xilinx:family>
       </xilinx:supportedFamilies>
       <xilinx:taxonomies>
         <xilinx:taxonomy>/UserIP</xilinx:taxonomy>
diff --git a/src/finn/analysis/fpgadataflow/post_synth_res.py b/src/finn/analysis/fpgadataflow/post_synth_res.py
index 508c34aaed50f2935f4915cdcea29a3e92641b3c..81accba23220d3f25e8560443ff22cf59d3733e9 100644
--- a/src/finn/analysis/fpgadataflow/post_synth_res.py
+++ b/src/finn/analysis/fpgadataflow/post_synth_res.py
@@ -30,15 +30,20 @@ import os
 import xml.etree.ElementTree as ET
 
 from finn.transformation.move_reshape import _is_fpgadataflow_node
+from finn.core.modelwrapper import ModelWrapper
+from finn.custom_op.registry import getCustomOp
 
 
-def post_synth_res(model):
+def post_synth_res(model, override_synth_report_filename=None):
     """Extracts the FPGA resource results from the Vivado synthesis.
 
     Returns {node name : resources_dict}."""
 
     res_dict = {}
-    synth_report_filename = model.get_metadata_prop("vivado_synth_rpt")
+    if override_synth_report_filename is not None:
+        synth_report_filename = override_synth_report_filename
+    else:
+        synth_report_filename = model.get_metadata_prop("vivado_synth_rpt")
     if os.path.isfile(synth_report_filename):
         tree = ET.parse(synth_report_filename)
         root = tree.getroot()
@@ -50,7 +55,11 @@ def post_synth_res(model):
         raise Exception("Please run synthesis first")
 
     for node in model.graph.node:
-        if _is_fpgadataflow_node(node):
+        if node.op_type == "StreamingDataflowPartition":
+            sdp_model = ModelWrapper(getCustomOp(node).get_nodeattr("model"))
+            sdp_res_dict = post_synth_res(sdp_model, synth_report_filename)
+            res_dict.update(sdp_res_dict)
+        elif _is_fpgadataflow_node(node):
             row = root.findall(".//*[@contents='%s']/.." % node.name)
             if row != []:
                 node_dict = {}
diff --git a/src/finn/core/onnx_exec.py b/src/finn/core/onnx_exec.py
index 7c3123cd5eb29a54dc5cbfb912225ad3fdb0f219..0c01a48a07608dcd760447e8f569128f58d86f28 100644
--- a/src/finn/core/onnx_exec.py
+++ b/src/finn/core/onnx_exec.py
@@ -51,8 +51,20 @@ def execute_node(node, context, graph):
     if node.op_type == "StreamingDataflowPartition":
         sdp_node = getCustomOp(node)
         model = ModelWrapper(sdp_node.get_nodeattr("model"))
-        ret = execute_onnx(model, context, True)
-        context.update(ret)
+        inp_ctx = dict(filter(lambda x: x[0] in node.input, context.items()))
+        # input may have been renamed in partition
+        assert len(inp_ctx) == 1
+        old_iname = node.input[0]
+        new_iname = model.graph.input[0].name
+        if old_iname != new_iname:
+            inp_ctx[new_iname] = inp_ctx[old_iname]
+            del inp_ctx[old_iname]
+        ret = execute_onnx(model, inp_ctx, False)
+        # output may have been renamed in partition
+        assert len(ret) == 1
+        node_oname = node.output[0]
+        model_oname = model.graph.output[0].name
+        context[node_oname] = ret[model_oname]
     else:
         if node.domain == "finn":
 
diff --git a/src/finn/core/remote_exec.py b/src/finn/core/remote_exec.py
index a533e4d36629f57f7c4a576570d75a1e051de5be..214358608c43a868f9ef414dcbf6eb33e3f45a5b 100644
--- a/src/finn/core/remote_exec.py
+++ b/src/finn/core/remote_exec.py
@@ -62,11 +62,15 @@ def remote_exec(model, execution_context):
     bash_command = ["/bin/bash", "-c", cmd]
     process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE)
     process_compile.communicate()
+    # set platform attribute for correct remote execution
+    platform = model.get_metadata_prop("platform")
+    assert platform in ["alveo", "zynq", "zynq-iodma"]
     cmd = (
         "sshpass -p {} ssh {}@{} -p {} "
         '"cd {}/{}; echo "{}" | '
         'sudo -S python3.6 driver.py --exec_mode="execute" --batchsize=1" '
-        '--bitfile="resizer.bit" --inputfile="input.npy" --outputfile="output.npy"'
+        '--bitfile="resizer.bit" --inputfile="input.npy" --outputfile="output.npy" '
+        '--platform="{}" '
     ).format(
         pynq_password,
         pynq_username,
@@ -75,6 +79,7 @@ def remote_exec(model, execution_context):
         pynq_target_dir,
         deployment_folder,
         pynq_password,
+        platform,
     )
     bash_command = ["/bin/bash", "-c", cmd]
     process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE)
diff --git a/src/finn/custom_op/fpgadataflow/iodma.py b/src/finn/custom_op/fpgadataflow/iodma.py
index 05870b8d9d5d3a11bad7882c9a7d122f8cd34cf6..7d0374445d816f1e8d49ed92cf7aa67b024f9ac1 100644
--- a/src/finn/custom_op/fpgadataflow/iodma.py
+++ b/src/finn/custom_op/fpgadataflow/iodma.py
@@ -197,11 +197,13 @@ class IODMA(HLSCustomOp):
     def get_number_output_values(self):
         oshape = self.get_normal_output_shape()
         itype_bits = self.get_input_datatype().bitwidth()
-        intfw = self.get_nodeattr("intfWidth")
+        stream_width = self.get_nodeattr("streamWidth")
         nelems = np.prod(oshape)
         nbits = nelems * itype_bits
-        assert nbits % intfw == 0, "DMA: total transfer size must be word multiple"
-        ovalues = nbits // intfw
+        assert (
+            nbits % stream_width == 0
+        ), "DMA: total transfer size must be word multiple"
+        ovalues = nbits // stream_width
         return ovalues
 
     def global_includes(self):
diff --git a/src/finn/custom_op/fpgadataflow/templates.py b/src/finn/custom_op/fpgadataflow/templates.py
index 1da60a5124fa86b4336bae8fd1a587672f2f2e6f..319731df70d5bd1cb80d42932f08acdcec80c074 100644
--- a/src/finn/custom_op/fpgadataflow/templates.py
+++ b/src/finn/custom_op/fpgadataflow/templates.py
@@ -344,6 +344,7 @@ set_property supported_families { \
   virtex7 Production \
   virtexu Production \
   virtexuplus Production \
+  virtexuplusHBM Production \
   zynq Production \
   zynquplus Production \
   aartix7 Production \
diff --git a/src/finn/custom_op/streamingdataflowpartition.py b/src/finn/custom_op/streamingdataflowpartition.py
index bce4dde426b8838d6c86638a3641d51ab259a6db..31cd38fea3c5a9e88084c3332d46aebdb065f800 100644
--- a/src/finn/custom_op/streamingdataflowpartition.py
+++ b/src/finn/custom_op/streamingdataflowpartition.py
@@ -36,7 +36,12 @@ class StreamingDataflowPartition(CustomOp):
     bitfile by itself."""
 
     def get_nodeattr_types(self):
-        return {"model": ("s", True, "")}
+        return {
+            "model": ("s", True, ""),
+            "res_estimate": ("s", False, ""),
+            "res_hls": ("s", False, ""),
+            "res_synth": ("s", False, ""),
+        }
 
     def make_shape_compatible_op(self, model):
         pass
diff --git a/src/finn/transformation/fpgadataflow/annotate_resources.py b/src/finn/transformation/fpgadataflow/annotate_resources.py
index 62ee92df54eee2b63d84657515d7fbc3a8808b81..da6fa1ff738690308a9b7686a5c92d7395ab50c8 100644
--- a/src/finn/transformation/fpgadataflow/annotate_resources.py
+++ b/src/finn/transformation/fpgadataflow/annotate_resources.py
@@ -32,6 +32,8 @@ from finn.transformation.move_reshape import _is_fpgadataflow_node
 from finn.analysis.fpgadataflow.res_estimation import res_estimation
 from finn.analysis.fpgadataflow.hls_synth_res_estimation import hls_synth_res_estimation
 from finn.analysis.fpgadataflow.post_synth_res import post_synth_res
+from finn.core.modelwrapper import ModelWrapper
+from finn.custom_op.registry import getCustomOp
 
 
 class AnnotateResources(Transformation):
@@ -44,9 +46,10 @@ class AnnotateResources(Transformation):
     chosen mode (e.g. HLSSynthIP for hls) was previously run.
     """
 
-    def __init__(self, mode):
+    def __init__(self, mode, override_res_dict=None):
         super().__init__()
         self.mode = mode
+        self.res_dict = override_res_dict
 
     def apply(self, model):
         graph = model.graph
@@ -58,10 +61,33 @@ class AnnotateResources(Transformation):
             res_fxn = post_synth_res
         else:
             raise Exception("Unrecognized mode for AnnotateResources")
-        res_dict = model.analysis(res_fxn)
+        if self.res_dict is None:
+            self.res_dict = model.analysis(res_fxn)
+        children_dict = {}
+        # annotate node resources
+        for node in graph.node:
+            if _is_fpgadataflow_node(node) and node.name in self.res_dict.keys():
+                op_inst = registry.getCustomOp(node)
+                op_inst.set_nodeattr("res_" + self.mode, str(self.res_dict[node.name]))
+                children_dict[node.name] = self.res_dict[node.name]
+            elif node.op_type == "StreamingDataflowPartition":
+                # recurse into model to manually annotate per-layer resources
+                sdp_model_filename = getCustomOp(node).get_nodeattr("model")
+                sdp_model = ModelWrapper(sdp_model_filename)
+                sdp_model = sdp_model.transform(
+                    AnnotateResources(self.mode, self.res_dict)
+                )
+                sdp_dict = sdp_model.get_metadata_prop("res_total_" + self.mode)
+                sdp_dict = eval(sdp_dict)
+                # save transformed model
+                sdp_model.save(sdp_model_filename)
+                # set res attribute for sdp node
+                getCustomOp(node).set_nodeattr("res_" + self.mode, str(sdp_dict))
+                children_dict[node.name] = sdp_dict
+        self.res_dict.update(children_dict)
         total_dict = {}
-        for lname in res_dict.keys():
-            layer_res_dict = res_dict[lname]
+        for lname in children_dict.keys():
+            layer_res_dict = self.res_dict[lname]
             for r_type in layer_res_dict.keys():
                 r_amount = layer_res_dict[r_type]
                 r_amount = float(r_amount)
@@ -73,9 +99,4 @@ class AnnotateResources(Transformation):
             if "efficiency" in k:
                 total_dict[k] = total_dict[k] / len(graph.node)
         model.set_metadata_prop("res_total_" + self.mode, str(total_dict))
-        for node in graph.node:
-            if _is_fpgadataflow_node(node) and node.name in res_dict.keys():
-                op_inst = registry.getCustomOp(node)
-                op_inst.set_nodeattr("res_" + self.mode, str(res_dict[node.name]))
-
         return (model, False)
diff --git a/src/finn/transformation/fpgadataflow/create_dataflow_partition.py b/src/finn/transformation/fpgadataflow/create_dataflow_partition.py
index 7197e68be2fbdf5fc39b7ed202e88672614514ec..5ec4ab14d65d63523856a6bb107bf75c1ca5a261 100644
--- a/src/finn/transformation/fpgadataflow/create_dataflow_partition.py
+++ b/src/finn/transformation/fpgadataflow/create_dataflow_partition.py
@@ -125,6 +125,7 @@ class CreateDataflowPartition(Transformation):
                     [df_out],
                     # use the model attribute to mark the df model
                     model=df_model_filename,
+                    domain="finn",
                 )
                 non_df_model.graph.node.insert(df_start_ind, df_node)
                 model = non_df_model
diff --git a/src/finn/transformation/fpgadataflow/create_stitched_ip.py b/src/finn/transformation/fpgadataflow/create_stitched_ip.py
index 018ad385f33a8e0aea4aa42599fd47fe5dae57dd..90b4b6c47e6e353c1b606d6918eb271e9c0619c5 100644
--- a/src/finn/transformation/fpgadataflow/create_stitched_ip.py
+++ b/src/finn/transformation/fpgadataflow/create_stitched_ip.py
@@ -210,7 +210,8 @@ class CreateStitchedIP(Transformation):
                     assert (
                         node_inst.get_nodeattr("Direction") == "in"
                     ), """Output TLastMarker incorrect direction"""
-                elif node.op_type == "IODMA":
+                elif node.op_type == "IODMA" and len(model.graph.node) != 1:
+                    # don't apply this check for a 1-node partition
                     assert (
                         node_inst.get_nodeattr("direction") == "in"
                     ), """Input DMA incorrect direction"""
@@ -241,17 +242,11 @@ class CreateStitchedIP(Transformation):
             if model.find_consumers(node.output[0]) is None:
                 # last node in graph
                 self.connect_m_axis_external(node)
-                # ensure it is a TLastMarker to have a valid TLast signal
-                assert (
-                    node.op_type == "TLastMarker" or node.op_type == "IODMA"
-                ), """Last node is not TLastMarker or DMA.
-                Please run transformation InsertTLastMarker/InsertIODMA to ensure
-                a valid TLast signal"""
                 if node.op_type == "TLastMarker":
                     assert (
                         node_inst.get_nodeattr("Direction") == "out"
                     ), """Output TLastMarker incorrect direction"""
-                elif node.op_type == "IODMA":
+                elif node.op_type == "IODMA" and len(model.graph.node) != 1:
                     assert (
                         node_inst.get_nodeattr("direction") == "out"
                     ), """Output DMA incorrect direction"""
diff --git a/src/finn/transformation/fpgadataflow/insert_fifo.py b/src/finn/transformation/fpgadataflow/insert_fifo.py
index 85a2d47be0599a852b223f1a65d3ec04efe9bda7..6f7fde0c4faba09e584eb578819f44c18639bc9d 100644
--- a/src/finn/transformation/fpgadataflow/insert_fifo.py
+++ b/src/finn/transformation/fpgadataflow/insert_fifo.py
@@ -159,7 +159,7 @@ class InsertFIFO(Transformation):
             # insert FIFO as last node, except when last node is DMA
             if (
                 graph.node[-1].op_type != "StreamingFIFO"
-                and graph.node[0].op_type != "IODMA"
+                and graph.node[-1].op_type != "IODMA"
             ):
                 n = graph.node[-1]
                 assert (
diff --git a/src/finn/transformation/fpgadataflow/insert_iodma.py b/src/finn/transformation/fpgadataflow/insert_iodma.py
index 0cd7c0d4d41accf8cdba8adfaf4dbb00fc0cab7a..72e5ec4fdd721ecf549adaf7ddd38db4636bce27 100644
--- a/src/finn/transformation/fpgadataflow/insert_iodma.py
+++ b/src/finn/transformation/fpgadataflow/insert_iodma.py
@@ -81,8 +81,8 @@ class InsertIODMA(Transformation):
                 # check if tensor is NHWC
                 assert (
                     model.get_tensor_layout(graph_out_name) == DataLayout.NHWC
-                    or model.get_tensor_layout(graph_in_name) == DataLayout.NC
-                ), "Data layout of tensors must be NHWC or NC"
+                    or model.get_tensor_layout(graph_out_name) == DataLayout.NC
+                ), "Data layout of output tensor must be NHWC or NC"
                 out_shape = model.get_tensor_shape(graph_out_name)
                 out_dtype = model.get_tensor_datatype(graph_out_name)
                 # determine the feasible interface width
@@ -120,7 +120,7 @@ class InsertIODMA(Transformation):
                 assert (
                     model.get_tensor_layout(graph_in_name) == DataLayout.NHWC
                     or model.get_tensor_layout(graph_in_name) == DataLayout.NC
-                ), "Data layout of tensors must be NHWC or NC"
+                ), "Data layout of input tensor must be NHWC or NC"
                 in_shape = model.get_tensor_shape(graph_in_name)
                 in_dtype = model.get_tensor_datatype(graph_in_name)
                 # determine the feasible interface width
diff --git a/src/finn/transformation/fpgadataflow/make_pynq_driver.py b/src/finn/transformation/fpgadataflow/make_pynq_driver.py
index 1e45a65720604144f67245b98dcbe3f6dc8363f5..a7bf9e6e6279923764009a00e2f805be1b1fa9c0 100644
--- a/src/finn/transformation/fpgadataflow/make_pynq_driver.py
+++ b/src/finn/transformation/fpgadataflow/make_pynq_driver.py
@@ -26,10 +26,8 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import os
-import shutil
-import warnings
 
+import shutil
 from finn.custom_op.registry import getCustomOp
 from finn.transformation import Transformation
 from finn.util.basic import gen_finn_dt_tensor, get_finn_root, make_build_dir
@@ -48,14 +46,11 @@ class MakePYNQDriver(Transformation):
     value.
     """
 
-    def __init__(self):
+    def __init__(self, platform):
         super().__init__()
+        self.platform = platform
 
     def apply(self, model):
-        vivado_pynq_proj = model.get_metadata_prop("vivado_pynq_proj")
-        if vivado_pynq_proj is None or (not os.path.isdir(vivado_pynq_proj)):
-            warnings.warn("No PYNQ project found, apply MakePYNQProject first.")
-
         # create a temporary folder for the generated driver
         pynq_driver_dir = make_build_dir(prefix="pynq_driver_")
         model.set_metadata_prop("pynq_driver_dir", pynq_driver_dir)
@@ -68,11 +63,21 @@ class MakePYNQDriver(Transformation):
         o_tensor_shape_normal = tuple(model.get_tensor_shape(o_tensor_name))
         i_tensor_dt = model.get_tensor_datatype(i_tensor_name)
         o_tensor_dt = model.get_tensor_datatype(o_tensor_name)
-        # extract HLSCustomOp instances to get folded i/o shapes
-        first_node = getCustomOp(model.find_consumer(i_tensor_name))
-        last_node = getCustomOp(model.find_producer(o_tensor_name))
-        i_tensor_shape_folded = tuple(first_node.get_folded_input_shape())
-        o_tensor_shape_folded = tuple(last_node.get_folded_output_shape())
+        # handle folded i/o shapes due to differences in DMA engines
+        if self.platform == "zynq":
+            # extract HLSCustomOp instances to get folded i/o shapes
+            first_node = getCustomOp(model.find_consumer(i_tensor_name))
+            last_node = getCustomOp(model.find_producer(o_tensor_name))
+            i_tensor_shape_folded = tuple(first_node.get_folded_input_shape())
+            o_tensor_shape_folded = tuple(last_node.get_folded_output_shape())
+        else:
+            i_tensor_shape_folded = list(i_tensor_shape_normal)
+            i_tensor_shape_folded.insert(-1, 1)
+            i_tensor_shape_folded = tuple(i_tensor_shape_folded)
+            o_tensor_shape_folded = list(o_tensor_shape_normal)
+            o_tensor_shape_folded.insert(-1, 1)
+            o_tensor_shape_folded = tuple(o_tensor_shape_folded)
+
         # generate dummy folded i/o tensors and their packed versions
         i_tensor_dummy_folded = gen_finn_dt_tensor(i_tensor_dt, i_tensor_shape_folded)
         o_tensor_dummy_folded = gen_finn_dt_tensor(o_tensor_dt, o_tensor_shape_folded)
@@ -99,6 +104,7 @@ class MakePYNQDriver(Transformation):
             ret = ret.replace("[1,", "[%s," % batch_var_name)
             return ret
 
+        driver = driver.replace("$PLATFORM$", self.platform)
         driver = driver.replace("$INPUT_FINN_DATATYPE$", str(i_tensor_dt))
         driver = driver.replace("$INPUT_SHAPE_NORMAL$", mss(i_tensor_shape_normal))
         driver = driver.replace("$INPUT_SHAPE_FOLDED$", mss(i_tensor_shape_folded))
diff --git a/src/finn/transformation/fpgadataflow/make_pynq_proj.py b/src/finn/transformation/fpgadataflow/make_pynq_proj.py
index a874d7a7c702e1b3e9125fc031aa65dc287a407d..5e45d6f230503668a15d784e3c6afa45560fe004 100644
--- a/src/finn/transformation/fpgadataflow/make_pynq_proj.py
+++ b/src/finn/transformation/fpgadataflow/make_pynq_proj.py
@@ -128,6 +128,8 @@ class MakePYNQProject(Transformation):
         # filename for the synth utilization report
         synth_report_filename = vivado_pynq_proj_dir + "/synth_report.xml"
         model.set_metadata_prop("vivado_synth_rpt", synth_report_filename)
+        # set platform attribute for correct remote execution
+        model.set_metadata_prop("platform", "zynq")
 
         # get metadata property clk_ns to calculate clock frequency
         clk_ns = float(model.get_metadata_prop("clk_ns"))
diff --git a/src/finn/transformation/fpgadataflow/make_zynq_proj.py b/src/finn/transformation/fpgadataflow/make_zynq_proj.py
new file mode 100644
index 0000000000000000000000000000000000000000..dfc076ba52ab5911267d807a7513e4840f01edaf
--- /dev/null
+++ b/src/finn/transformation/fpgadataflow/make_zynq_proj.py
@@ -0,0 +1,316 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import subprocess
+
+from finn.custom_op.registry import getCustomOp
+from finn.transformation import Transformation
+from finn.core.modelwrapper import ModelWrapper
+from finn.util.basic import get_by_name, make_build_dir
+from finn.util.basic import get_num_default_workers
+from finn.util.basic import pynq_part_map
+
+from finn.transformation.fpgadataflow.create_dataflow_partition import (
+    CreateDataflowPartition,
+)
+from finn.transformation.fpgadataflow.insert_dwc import InsertDWC
+from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
+from finn.transformation.fpgadataflow.insert_iodma import InsertIODMA
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
+from finn.transformation.fpgadataflow.replace_verilog_relpaths import (
+    ReplaceVerilogRelPaths,
+)
+from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
+from finn.transformation.fpgadataflow.floorplan import Floorplan
+from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
+from finn.transformation.infer_data_layouts import InferDataLayouts
+from shutil import copy
+
+from . import templates
+
+
+def collect_ip_dirs(model, ipstitch_path):
+    # collect list of all IP dirs
+    ip_dirs = []
+    for node in model.graph.node:
+        ip_dir_attribute = get_by_name(node.attribute, "ip_path")
+        assert (
+            ip_dir_attribute is not None
+        ), """Node attribute "ip_path" is
+        empty. Please run transformation HLSSynth_ipgen first."""
+        ip_dir_value = ip_dir_attribute.s.decode("UTF-8")
+        assert os.path.isdir(
+            ip_dir_value
+        ), """The directory that should
+        contain the generated ip blocks doesn't exist."""
+        ip_dirs += [ip_dir_value]
+    ip_dirs += [ipstitch_path + "/ip"]
+    return ip_dirs
+
+
+class MakeZYNQProject(Transformation):
+    """Create a Vivado overlay project (including the shell infrastructure)
+    from the already-stitched IP block for this graph.
+    All nodes in the graph must have the fpgadataflow backend attribute,
+    and the CreateStitchedIP transformation must have been previously run on
+    the graph. This is functionally equivalent with MakePYNQProject but does
+    not use Pynq infrastructure and instead creates a fully custom block design.
+    However, this transform requires DMAs in the accelerator design.
+
+    Outcome if successful: sets the vivado_pynq_proj attribute in the ONNX
+    ModelProto's metadata_props field, with the created project dir as the
+    value.
+    """
+
+    def __init__(self, platform, enable_debug=False):
+        super().__init__()
+        self.platform = platform
+        self.enable_debug = 1 if enable_debug else 0
+
+    def apply(self, model):
+
+        # create a config file and empty list of xo files
+        config = []
+        idma_idx = 0
+        odma_idx = 0
+        aximm_idx = 0
+        axilite_idx = 0
+        global_clk_ns = 0
+        instance_names = {}
+        for node in model.graph.node:
+            assert node.op_type == "StreamingDataflowPartition", "Invalid link graph"
+            sdp_node = getCustomOp(node)
+            dataflow_model_filename = sdp_node.get_nodeattr("model")
+            kernel_model = ModelWrapper(dataflow_model_filename)
+
+            ipstitch_path = kernel_model.get_metadata_prop("vivado_stitch_proj")
+            if ipstitch_path is None or (not os.path.isdir(ipstitch_path)):
+                raise Exception(
+                    "No stitched IPI design found for %s, apply CreateStitchedIP first."
+                    % node.name
+                )
+
+            vivado_stitch_vlnv = kernel_model.get_metadata_prop("vivado_stitch_vlnv")
+            if vivado_stitch_vlnv is None:
+                raise Exception(
+                    "No vlnv found for %s, apply CreateStitchedIP first." % node.name
+                )
+
+            ip_dirs = ["list"]
+            ip_dirs += collect_ip_dirs(kernel_model, ipstitch_path)
+            ip_dirs_str = "[%s]" % (" ".join(ip_dirs))
+            config.append(
+                "set_property ip_repo_paths "
+                "[concat [get_property ip_repo_paths [current_project]] %s] "
+                "[current_project]" % ip_dirs_str
+            )
+            config.append("update_ip_catalog -rebuild -scan_changes")
+
+            # get metadata property clk_ns to calculate clock frequency
+            clk_ns = float(kernel_model.get_metadata_prop("clk_ns"))
+            if clk_ns > global_clk_ns:
+                global_clk_ns = clk_ns
+
+            # gather info on connectivity
+            # assume each node connected to outputs/inputs is DMA:
+            # has axis, aximm and axilite
+            # everything else is axis-only
+            # assume only one connection from each ip to the next
+            # all aximm allocated to DDR[0]
+            # all kernels allocated to SLR0
+            producer = model.find_producer(node.input[0])
+            consumer = model.find_consumers(node.output[0])
+            # define kernel instances
+            # name kernels connected to graph inputs as idmaxx
+            # name kernels connected to graph inputs as odmaxx
+            if producer is None or consumer is None:
+                if producer is None:
+                    instance_names[node.name] = "idma" + str(idma_idx)
+                elif consumer is None:
+                    instance_names[node.name] = "odma" + str(odma_idx)
+                config.append(
+                    "create_bd_cell -type ip -vlnv %s %s"
+                    % (vivado_stitch_vlnv, instance_names[node.name])
+                )
+                config.append(
+                    "connect_bd_intf_net [get_bd_intf_pins %s/m_axi_gmem0] "
+                    "[get_bd_intf_pins smartconnect_0/S%02d_AXI]"
+                    % (instance_names[node.name], aximm_idx)
+                )
+                config.append(
+                    "connect_bd_intf_net [get_bd_intf_pins %s/s_axi_control] "
+                    "[get_bd_intf_pins axi_interconnect_0/M%02d_AXI]"
+                    % (instance_names[node.name], axilite_idx)
+                )
+                idma_idx += 1
+                aximm_idx += 1
+                axilite_idx += 1
+            else:
+                instance_names[node.name] = node.name
+                config.append(
+                    "create_bd_cell -type ip -vlnv %s %s"
+                    % (vivado_stitch_vlnv, instance_names[node.name])
+                )
+            config.append(
+                "connect_bd_net [get_bd_pins %s/ap_clk] "
+                "[get_bd_pins smartconnect_0/aclk]" % instance_names[node.name]
+            )
+            config.append(
+                "connect_bd_net [get_bd_pins %s/ap_rst_n] "
+                "[get_bd_pins smartconnect_0/aresetn]" % instance_names[node.name]
+            )
+            # connect streams
+            if producer is not None:
+                for i in range(len(node.input)):
+                    producer = model.find_producer(node.input[i])
+                    if producer is not None:
+                        j = list(producer.output).index(node.input[i])
+                        config.append(
+                            "connect_bd_intf_net [get_bd_intf_pins %s/s_axis_%d] "
+                            "[get_bd_intf_pins %s/m_axis_%d]"
+                            % (
+                                instance_names[node.name],
+                                i,
+                                instance_names[producer.name],
+                                j,
+                            )
+                        )
+
+        # create a temporary folder for the project
+        vivado_pynq_proj_dir = make_build_dir(prefix="vivado_zynq_proj_")
+        model.set_metadata_prop("vivado_pynq_proj", vivado_pynq_proj_dir)
+
+        fclk_mhz = int(1 / (global_clk_ns * 0.001))
+
+        # create a TCL recipe for the project
+        ipcfg = vivado_pynq_proj_dir + "/ip_config.tcl"
+        config = "\n".join(config) + "\n"
+        with open(ipcfg, "w") as f:
+            f.write(
+                templates.custom_zynq_shell_template
+                % (
+                    fclk_mhz,
+                    axilite_idx,
+                    aximm_idx,
+                    self.platform,
+                    pynq_part_map[self.platform],
+                    config,
+                    self.enable_debug,
+                    get_num_default_workers(),
+                )
+            )
+
+        # create a TCL recipe for the project
+        synth_project_sh = vivado_pynq_proj_dir + "/synth_project.sh"
+        working_dir = os.environ["PWD"]
+        with open(synth_project_sh, "w") as f:
+            f.write("#!/bin/bash \n")
+            f.write("cd {}\n".format(vivado_pynq_proj_dir))
+            f.write("vivado -mode tcl -source %s\n" % ipcfg)
+            f.write("cd {}\n".format(working_dir))
+
+        # call the synthesis script
+        bash_command = ["bash", synth_project_sh]
+        process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE)
+        process_compile.communicate()
+        bitfile_name = (
+            vivado_pynq_proj_dir + "/finn_zynq_link.runs/impl_1/top_wrapper.bit"
+        )
+        if not os.path.isfile(bitfile_name):
+            raise Exception("Synthesis failed, no bitfile found")
+        deploy_bitfile_name = vivado_pynq_proj_dir + "/resizer.bit"
+        copy(bitfile_name, deploy_bitfile_name)
+        # set bitfile attribute
+        model.set_metadata_prop("vivado_pynq_bitfile", deploy_bitfile_name)
+        # set platform attribute for correct remote execution
+        model.set_metadata_prop("platform", "zynq-iodma")
+        hwh_name = (
+            vivado_pynq_proj_dir
+            + "/finn_zynq_link.srcs/sources_1/bd/top/hw_handoff/top.hwh"
+        )
+        if not os.path.isfile(hwh_name):
+            raise Exception("Synthesis failed, no hardware handoff file found")
+        deploy_hwh_name = vivado_pynq_proj_dir + "/resizer.hwh"
+        copy(hwh_name, deploy_hwh_name)
+        # filename for the synth utilization report
+        synth_report_filename = vivado_pynq_proj_dir + "/synth_report.xml"
+        model.set_metadata_prop("vivado_synth_rpt", synth_report_filename)
+        return (model, False)
+
+
+class ZynqBuild(Transformation):
+    """Best-effort attempt at building the accelerator for Zynq."""
+
+    def __init__(self, platform, period_ns, enable_debug=False):
+        super().__init__()
+        self.fpga_part = pynq_part_map[platform]
+        self.period_ns = period_ns
+        self.platform = platform
+        self.enable_debug = enable_debug
+
+    def apply(self, model):
+        # first infer layouts
+        model = model.transform(InferDataLayouts())
+        # prepare at global level, then break up into kernels
+        prep_transforms = [
+            InsertIODMA(64),
+            InsertDWC(),
+            Floorplan(),
+            CreateDataflowPartition(),
+        ]
+        for trn in prep_transforms:
+            model = model.transform(trn)
+            model = model.transform(GiveUniqueNodeNames())
+            model = model.transform(GiveReadableTensorNames())
+        # Build each kernel individually
+        sdp_nodes = model.get_nodes_by_op_type("StreamingDataflowPartition")
+        for sdp_node in sdp_nodes:
+            sdp_node = getCustomOp(sdp_node)
+            dataflow_model_filename = sdp_node.get_nodeattr("model")
+            kernel_model = ModelWrapper(dataflow_model_filename)
+            kernel_model = kernel_model.transform(InsertFIFO())
+            kernel_model = kernel_model.transform(GiveUniqueNodeNames())
+            kernel_model.save(dataflow_model_filename)
+            kernel_model = kernel_model.transform(
+                PrepareIP(self.fpga_part, self.period_ns)
+            )
+            kernel_model = kernel_model.transform(HLSSynthIP())
+            kernel_model = kernel_model.transform(ReplaceVerilogRelPaths())
+            kernel_model = kernel_model.transform(
+                CreateStitchedIP(
+                    self.fpga_part, self.period_ns, sdp_node.onnx_node.name, True
+                )
+            )
+            kernel_model.save(dataflow_model_filename)
+        # Assemble design from IPs
+        model = model.transform(
+            MakeZYNQProject(self.platform, enable_debug=self.enable_debug)
+        )
+        return (model, False)
diff --git a/src/finn/transformation/fpgadataflow/templates.py b/src/finn/transformation/fpgadataflow/templates.py
index 48895f35da1285516467b515b8ef518febbe2f12..eaeadc7b38b14b2d2eaa761b3cd46220b9fe6bbe 100644
--- a/src/finn/transformation/fpgadataflow/templates.py
+++ b/src/finn/transformation/fpgadataflow/templates.py
@@ -104,7 +104,7 @@ from finn.core.datatype import DataType
 from pynq.ps import Clocks
 
 class FINNAccelDriver():
-    def __init__(self, N, bitfile, platform="zynq"):
+    def __init__(self, N, bitfile, platform="$PLATFORM$"):
         \"\"\"Instantiate the FINN accelerator driver.
         Gets batchsize (N) as integer and path to bitfile as string.\"\"\"
         self.platform = platform
@@ -141,8 +141,16 @@ class FINNAccelDriver():
         elif self.platform == "alveo":
             self.idma = self.ol.idma0
             self.odma = self.ol.odma0
+        elif self.platform == "zynq-iodma":
+            self.idma = self.ol.idma0
+            self.odma = self.ol.odma0
+            # clock frequency
+            self.fclk_mhz = $CLOCK_FREQ_MHZ$
+            # set the clock frequency as specified by user during transformations
+            if self.fclk_mhz > 0:
+                Clocks.$CLK_NAME$ = self.fclk_mhz
         else:
-            raise ValueError("Supported platforms are zynq and alveo")
+            raise ValueError("Supported platforms are zynq zynq-iodma alveo")
 
         # allocate a PYNQ buffer for the packed input and buffer
         self.ibuf_packed_device = allocate(shape=self.ishape_packed, dtype=np.uint8)
@@ -194,7 +202,20 @@ class FINNAccelDriver():
             dma.recvchannel.transfer(self.obuf_packed_device)
             dma.sendchannel.wait()
             dma.recvchannel.wait()
-        else:
+        elif self.platform == "zynq-iodma":
+            # manually launch IODMAs since signatures are missing
+            self.idma.write(0x10, self.ibuf_packed_device.device_address)
+            self.idma.write(0x1c, self.N)
+            self.odma.write(0x10, self.obuf_packed_device.device_address)
+            self.odma.write(0x1c, self.N)
+            self.idma.write(0x00, 1)
+            self.odma.write(0x00, 1)
+            # wait until output IODMA is finished
+            status = self.odma.read(0x00)
+            while status & 0x2 == 0:
+                status = self.odma.read(0x00)
+
+        elif self.platform == "alveo":
             self.ibuf_packed_device.sync_to_device()
             self.idma.start(self.ibuf_packed_device, self.N)
             self.odma.start(self.obuf_packed_device, self.N)
@@ -207,7 +228,7 @@ class FINNAccelDriver():
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description='Set exec mode, batchsize N, bitfile name, inputfile name and outputfile name')
     parser.add_argument('--exec_mode', help='Please select functional verification ("execute") or throughput test ("throughput_test")', default="execute")
-    parser.add_argument('--platform', help='Target platform, zynq or alveo', default="zynq")
+    parser.add_argument('--platform', help='Target platform: zynq zynq-iodma alveo', default="zynq")
     parser.add_argument('--batchsize', help='number of samples for inference', type=int, default=1)
     parser.add_argument('--bitfile', help='name of bitfile (i.e. "resizer.bit")', default="resizer.bit")
     parser.add_argument('--inputfile', help='name of input npy file (i.e. "input.npy")', default="input.npy")
@@ -278,3 +299,117 @@ if __name__ == "__main__":
 
 
 """
+
+custom_zynq_shell_template = """
+set FREQ_MHZ %s
+set NUM_AXILITE %d
+if {$NUM_AXILITE > 9} {
+    error "Maximum 10 AXI-Lite interfaces supported"
+}
+set NUM_AXIMM %d
+set BOARD %s
+set FPGA_PART %s
+create_project finn_zynq_link ./ -part $FPGA_PART
+
+# set board part repo paths to find PYNQ-Z1/Z2
+set paths_prop [get_property BOARD_PART_REPO_PATHS [current_project]]
+set paths_param [get_param board.repoPaths]
+lappend paths_prop /workspace/finn/board_files
+lappend paths_param /workspace/finn/board_files
+set_property BOARD_PART_REPO_PATHS $paths_prop [current_project]
+set_param board.repoPaths $paths_param
+
+if {$BOARD == "ZCU104"} {
+    set_property board_part xilinx.com:zcu104:part0:1.1 [current_project]
+    set ZYNQ_TYPE "zynq_us+"
+} elseif {$BOARD == "Ultra96"} {
+    set ZYNQ_TYPE "zynq_us+"
+} elseif {$BOARD == "Pynq-Z2"} {
+    set ZYNQ_TYPE "zynq_7000"
+} elseif {$BOARD == "Pynq-Z1"} {
+    set ZYNQ_TYPE "zynq_7000"
+    set_property board_part www.digilentinc.com:pynq-z1:part0:1.0 [current_project]
+} else {
+    puts "Unrecognized board"
+}
+
+create_bd_design "top"
+if {$ZYNQ_TYPE == "zynq_us+"} {
+    create_bd_cell -type ip -vlnv xilinx.com:ip:zynq_ultra_ps_e:3.3 zynq_ps
+    apply_bd_automation -rule xilinx.com:bd_rule:zynq_ultra_ps_e -config {apply_board_preset "1" }  [get_bd_cells zynq_ps]
+    #activate one slave port, deactivate the second master port
+    set_property -dict [list CONFIG.PSU__USE__S_AXI_GP2 {1}] [get_bd_cells zynq_ps]
+    set_property -dict [list CONFIG.PSU__USE__M_AXI_GP1 {0}] [get_bd_cells zynq_ps]
+    #set frequency of PS clock (this can't always be exactly met)
+    set_property -dict [list CONFIG.PSU__CRL_APB__PL0_REF_CTRL__FREQMHZ [expr int($FREQ_MHZ)]] [get_bd_cells zynq_ps]
+} elseif {$ZYNQ_TYPE == "zynq_7000"} {
+    create_bd_cell -type ip -vlnv xilinx.com:ip:processing_system7:5.5 zynq_ps
+    apply_bd_automation -rule xilinx.com:bd_rule:processing_system7 -config {make_external "FIXED_IO, DDR" apply_board_preset "1" Master "Disable" Slave "Disable" }  [get_bd_cells zynq_ps]
+    set_property -dict [list CONFIG.PCW_USE_S_AXI_HP0 {1}] [get_bd_cells zynq_ps]
+    set_property -dict [list CONFIG.PCW_FPGA0_PERIPHERAL_FREQMHZ [expr int($FREQ_MHZ)]] [get_bd_cells zynq_ps]
+} else {
+    puts "Unrecognized Zynq type"
+}
+
+#instantiate axi interconnect, axi smartconnect
+create_bd_cell -type ip -vlnv xilinx.com:ip:axi_interconnect:2.1 axi_interconnect_0
+create_bd_cell -type ip -vlnv xilinx.com:ip:smartconnect:1.0 smartconnect_0
+#set number of axilite interfaces, and number of axi master interfaces
+set_property -dict [list CONFIG.NUM_SI $NUM_AXILITE] [get_bd_cells smartconnect_0]
+set_property -dict [list CONFIG.NUM_MI $NUM_AXIMM] [get_bd_cells axi_interconnect_0]
+
+#create reset controller and connect interconnects to PS
+if {$ZYNQ_TYPE == "zynq_us+"} {
+    connect_bd_intf_net [get_bd_intf_pins smartconnect_0/M00_AXI] [get_bd_intf_pins zynq_ps/S_AXI_HP0_FPD]
+    connect_bd_intf_net [get_bd_intf_pins zynq_ps/M_AXI_HPM0_FPD] -boundary_type upper [get_bd_intf_pins axi_interconnect_0/S00_AXI]
+    #connect interconnect clocks and resets
+    apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config { Clk {/zynq_ps/pl_clk0} Freq {} Ref_Clk0 {} Ref_Clk1 {} Ref_Clk2 {}}  [get_bd_pins axi_interconnect_0/ACLK]
+    apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config { Clk {/zynq_ps/pl_clk0} Freq {} Ref_Clk0 {} Ref_Clk1 {} Ref_Clk2 {}}  [get_bd_pins axi_interconnect_0/S00_ACLK]
+    apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config { Clk {/zynq_ps/pl_clk0} Freq {} Ref_Clk0 {} Ref_Clk1 {} Ref_Clk2 {}}  [get_bd_pins zynq_ps/saxihp0_fpd_aclk]
+} elseif {$ZYNQ_TYPE == "zynq_7000"} {
+    connect_bd_intf_net -boundary_type upper [get_bd_intf_pins zynq_ps/M_AXI_GP0] [get_bd_intf_pins axi_interconnect_0/S00_AXI]
+    connect_bd_intf_net [get_bd_intf_pins smartconnect_0/M00_AXI] [get_bd_intf_pins zynq_ps/S_AXI_HP0]
+    apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config { Clk {/zynq_ps/FCLK_CLK0} Freq {} Ref_Clk0 {} Ref_Clk1 {} Ref_Clk2 {}}  [get_bd_pins axi_interconnect_0/ACLK]
+    apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config { Clk {/zynq_ps/FCLK_CLK0} Freq {} Ref_Clk0 {} Ref_Clk1 {} Ref_Clk2 {}}  [get_bd_pins axi_interconnect_0/S00_ACLK]
+    apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config { Clk {/zynq_ps/FCLK_CLK0} Freq {} Ref_Clk0 {} Ref_Clk1 {} Ref_Clk2 {}}  [get_bd_pins zynq_ps/S_AXI_HP0_ACLK]
+}
+connect_bd_net [get_bd_pins axi_interconnect_0/ARESETN] [get_bd_pins smartconnect_0/aresetn]
+
+#custom IP instantiations/connections start here
+%s
+
+# set up debug
+if {%d == 1} {
+    set_property HDL_ATTRIBUTE.DEBUG true [get_bd_intf_nets {idma0_m_axis_0}]
+    set_property HDL_ATTRIBUTE.DEBUG true [get_bd_intf_nets {StreamingDataflowPartition_1_m_axis_0}]
+    set_property HDL_ATTRIBUTE.DEBUG true [get_bd_intf_nets {smartconnect_0_M00_AXI}]
+    apply_bd_automation -rule xilinx.com:bd_rule:debug -dict [list \
+                                                              [get_bd_intf_nets smartconnect_0_M00_AXI] {AXI_R_ADDRESS "Data and Trigger" AXI_R_DATA "Data and Trigger" AXI_W_ADDRESS "Data and Trigger" AXI_W_DATA "Data and Trigger" AXI_W_RESPONSE "Data and Trigger" CLK_SRC "/zynq_ps/FCLK_CLK0" SYSTEM_ILA "Auto" APC_EN "0" } \
+                                                              [get_bd_intf_nets idma0_m_axis_0] {AXIS_SIGNALS "Data and Trigger" CLK_SRC "/zynq_ps/FCLK_CLK0" SYSTEM_ILA "Auto" APC_EN "0" } \
+                                                              [get_bd_intf_nets StreamingDataflowPartition_1_m_axis_0] {AXIS_SIGNALS "Data and Trigger" CLK_SRC "/zynq_ps/FCLK_CLK0" SYSTEM_ILA "Auto" APC_EN "0" } \
+                                                             ]
+}
+
+#finalize clock and reset connections for interconnects
+set i 0
+while {$i < $NUM_AXILITE} {
+    apply_bd_automation -quiet -rule xilinx.com:bd_rule:clkrst -config { Clk {/zynq_ps/FCLK_CLK0} Freq {} Ref_Clk0 {} Ref_Clk1 {} Ref_Clk2 {}}  [get_bd_pins axi_interconnect_0/M0${i}_ACLK]
+    incr i
+}
+
+save_bd_design
+assign_bd_address
+validate_bd_design
+
+set_property SYNTH_CHECKPOINT_MODE "Hierarchical" [ get_files top.bd ]
+make_wrapper -files [get_files top.bd] -import -fileset sources_1 -top
+
+# out-of-context synth can't be used for bitstream generation
+# set_property -name {STEPS.SYNTH_DESIGN.ARGS.MORE OPTIONS} -value {-mode out_of_context} -objects [get_runs synth_1]
+launch_runs -to_step write_bitstream impl_1 -jobs %d
+wait_on_run [get_runs impl_1]
+
+# generate synthesis report
+open_run synth_1 -name synth_1
+report_utilization -hierarchical -hierarchical_depth 4 -file synth_report.xml -format xml
+"""
diff --git a/src/finn/transformation/fpgadataflow/vitis_build.py b/src/finn/transformation/fpgadataflow/vitis_build.py
index ae529f2f4a165a732627befea0675073bc490996..0fb85f25e4f8d652a87f1e832c6b41fd67a7406e 100644
--- a/src/finn/transformation/fpgadataflow/vitis_build.py
+++ b/src/finn/transformation/fpgadataflow/vitis_build.py
@@ -52,10 +52,16 @@ from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeN
 from finn.util.basic import make_build_dir
 from finn.transformation.infer_data_layouts import InferDataLayouts
 
+
 def _check_vitis_envvars():
     assert "VITIS_PATH" in os.environ, "VITIS_PATH must be set for Vitis"
-    assert "PLATFORM_REPO_PATHS" in os.environ, "PLATFORM_REPO_PATHS must be set for Vitis"
-    assert "XILINX_XRT" in os.environ, "XILINX_XRT must be set for Vitis, ensure the XRT env is sourced"
+    assert (
+        "PLATFORM_REPO_PATHS" in os.environ
+    ), "PLATFORM_REPO_PATHS must be set for Vitis"
+    assert (
+        "XILINX_XRT" in os.environ
+    ), "XILINX_XRT must be set for Vitis, ensure the XRT env is sourced"
+
 
 class CreateVitisXO(Transformation):
     """Create a Vitis object file from a stitched FINN ip.
@@ -145,7 +151,9 @@ class CreateVitisXO(Transformation):
         bash_command = ["bash", package_xo_sh]
         process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE)
         process_compile.communicate()
-        assert os.path.isfile(xo_path), "Vitis .xo file not created, check logs under %s" % vivado_proj_dir
+        assert os.path.isfile(xo_path), (
+            "Vitis .xo file not created, check logs under %s" % vivado_proj_dir
+        )
         return (model, False)
 
 
@@ -238,7 +246,7 @@ class VitisLink(Transformation):
             f.write("cd {}\n".format(link_dir))
             f.write(
                 "v++ -t hw --platform %s --link %s"
-                " --kernel_frequency %d --config config.txt\n"
+                " --kernel_frequency %d --config config.txt --optimize 2 --save-temps -R2\n"
                 % (self.platform, " ".join(object_files), self.f_mhz)
             )
             f.write("cd {}\n".format(working_dir))
@@ -247,7 +255,9 @@ class VitisLink(Transformation):
         process_compile.communicate()
         # TODO rename xclbin appropriately here?
         xclbin = link_dir + "/a.xclbin"
-        assert os.path.isfile(xclbin), "Vitis .xclbin file not created, check logs under %s" % link_dir
+        assert os.path.isfile(xclbin), (
+            "Vitis .xclbin file not created, check logs under %s" % link_dir
+        )
         model.set_metadata_prop("vitis_xclbin", xclbin)
         return (model, False)
 
@@ -305,5 +315,7 @@ class VitisBuild(Transformation):
             kernel_model.save(dataflow_model_filename)
         # Assemble design from kernels
         model = model.transform(VitisLink(self.platform, round(1000 / self.period_ns)))
+        # set platform attribute for correct remote execution
+        model.set_metadata_prop("platform", "alveo")
 
         return (model, False)
diff --git a/src/finn/util/vcd.py b/src/finn/util/vcd.py
index d9e244422065314ceb790dc6719b57688ff76828..a4400f7bd7e75549189f081ce255fd67c49b3746 100644
--- a/src/finn/util/vcd.py
+++ b/src/finn/util/vcd.py
@@ -162,16 +162,23 @@ def _get_stats(x):
     return (x[0], get_stream_if_stats(x[1], x[0]))
 
 
-def get_all_stream_if_stats(vcd_file, stream_ifs=None, sort_by="{'V': 1, 'R': 0}"):
+def get_all_stream_if_stats(vcd_file, stream_ifs=None, sort_by="{'V': 1, 'R': 0}", num_workers=None):
     """Return a list of streaming interface stats, sorted by the percentage
-    for the given sort_by key. If stream_ifs is None, all streamin interface
+    for the given sort_by key. If stream_ifs is None, all streaming interface
     stats will be returned, otherwise treated as a list of interface names to
-    return the stats for."""
+    return the stats for.
+    By default the number of parallel workers from the environment variable
+    NUM_DEFAULT_WORKERS will be used. This behavior can be changed on a per
+    call basis by supplying the optional parameter: num_workers
+    """
 
     if stream_ifs is None:
         stream_ifs = list_stream_if(vcd_file)
 
-    with mp.Pool(get_num_default_workers()) as p:
+    if num_workers is None:
+        num_workers = get_num_default_workers()
+
+    with mp.Pool(num_workers) as p:
         stream_ifs = map(lambda x: (x, vcd_file), stream_ifs)
         all_stats = p.map(_get_stats, stream_ifs)
 
diff --git a/tests/end2end/test_zynqbuild_end2end_cnv_w1a1.py b/tests/end2end/test_zynqbuild_end2end_cnv_w1a1.py
new file mode 100644
index 0000000000000000000000000000000000000000..1a1b21afa1aee5db97add9b3eadba1b750a967cc
--- /dev/null
+++ b/tests/end2end/test_zynqbuild_end2end_cnv_w1a1.py
@@ -0,0 +1,252 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import pytest
+import numpy as np
+
+# as of Feb'20 there is a bug that segfaults ONNX shape inference if we
+# import pytorch before onnx, so we make sure to import onnx first
+import onnx  # NOQA
+import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
+import finn.transformation.streamline.absorb as absorb
+from finn.core.onnx_exec import execute_onnx
+from finn.custom_op.registry import getCustomOp
+from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
+from finn.transformation.fold_constants import FoldConstants
+
+from finn.transformation.fpgadataflow.create_dataflow_partition import (
+    CreateDataflowPartition,
+)
+from finn.transformation.fpgadataflow.make_deployment import DeployToPYNQ
+from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver
+from finn.transformation.general import (
+    RemoveUnusedTensors,
+    RemoveStaticGraphInputs,
+    GiveReadableTensorNames,
+    GiveUniqueNodeNames,
+)
+from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.streamline import Streamline
+from finn.util.basic import pynq_part_map
+from finn.util.test import get_test_model_trained, load_test_checkpoint_or_skip
+from finn.transformation.fpgadataflow.annotate_resources import AnnotateResources
+from finn.transformation.fpgadataflow.make_zynq_proj import ZynqBuild
+import pkg_resources as pk
+from finn.transformation.double_to_single_float import DoubleToSingleFloat
+from finn.transformation.move_reshape import RemoveCNVtoFCFlatten
+from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
+from finn.transformation.streamline.reorder import MakeMaxPoolNHWC
+from finn.transformation.infer_data_layouts import InferDataLayouts
+
+
+build_dir = "/tmp/" + os.environ["FINN_INST_NAME"]
+test_pynq_board = os.getenv("PYNQ_BOARD", default="Pynq-Z1")
+test_fpga_part = pynq_part_map[test_pynq_board]
+target_clk_ns = 10
+mem_mode = "decoupled"
+
+
+def test_end2end_zynqbuild_cnv_w1a1_export():
+    import brevitas.onnx as bo
+
+    tfc = get_test_model_trained("CNV", 1, 1)
+    bo.export_finn_onnx(
+        tfc, (1, 3, 32, 32), build_dir + "/end2end_zynqbuild_cnv_w1a1_export.onnx"
+    )
+
+
+def test_end2end_zynqbuild_cnv_w1a1_import_and_tidy():
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_zynqbuild_cnv_w1a1_export.onnx"
+    )
+    model = model.transform(DoubleToSingleFloat())
+    model = model.transform(InferShapes())
+    model = model.transform(FoldConstants())
+    model = model.transform(GiveUniqueNodeNames())
+    model = model.transform(GiveReadableTensorNames())
+    model = model.transform(RemoveStaticGraphInputs())
+    model.save(build_dir + "/end2end_zynqbuild_cnv_w1a1_tidy.onnx")
+
+
+def test_end2end_zynqbuild_cnv_w1a1_streamline():
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_zynqbuild_cnv_w1a1_tidy.onnx"
+    )
+    model = model.transform(Streamline())
+    model = model.transform(LowerConvsToMatMul())
+    model = model.transform(MakeMaxPoolNHWC())
+    model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
+    model = model.transform(ConvertBipolarMatMulToXnorPopcount())
+    model = model.transform(Streamline())
+    model = model.transform(RemoveUnusedTensors())
+    model.save(build_dir + "/end2end_zynqbuild_cnv_w1a1_streamlined.onnx")
+
+
+def test_end2end_zynqbuild_cnv_w1a1_convert_to_hls_layers():
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_zynqbuild_cnv_w1a1_streamlined.onnx"
+    )
+    model = model.transform(to_hls.InferBinaryStreamingFCLayer(mem_mode))
+    model = model.transform(to_hls.InferQuantizedStreamingFCLayer(mem_mode))
+    model = model.transform(to_hls.InferConvInpGen())
+    model = model.transform(to_hls.InferStreamingMaxPool())
+    model = model.transform(RemoveCNVtoFCFlatten())
+    model = model.transform(InferDataLayouts())
+    model.save(build_dir + "/end2end_zynqbuild_cnv_w1a1_hls_layers.onnx")
+
+
+def test_end2end_zynqbuild_cnv_w1a1_create_dataflow_partition():
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_zynqbuild_cnv_w1a1_hls_layers.onnx"
+    )
+    parent_model = model.transform(CreateDataflowPartition())
+    parent_model.save(build_dir + "/end2end_zynqbuild_cnv_w1a1_dataflow_parent.onnx")
+    sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
+    sdp_node = getCustomOp(sdp_node)
+    dataflow_model_filename = sdp_node.get_nodeattr("model")
+    dataflow_model = load_test_checkpoint_or_skip(dataflow_model_filename)
+    dataflow_model.save(build_dir + "/end2end_zynqbuild_cnv_w1a1_dataflow_model.onnx")
+
+
+def test_end2end_zynqbuild_cnv_w1a1_fold():
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_zynqbuild_cnv_w1a1_dataflow_model.onnx"
+    )
+    fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch")
+    # each tuple is (PE, SIMD, in_fifo_depth) for a layer
+    folding = [
+        (16, 3, 256),
+        (32, 32, 256),
+        (16, 32, 256),
+        (16, 32, 256),
+        (4, 32, 214),
+        (1, 32, 2),
+        (1, 4, 126),
+        (1, 8, 62),
+        (5, 1, 6),
+    ]
+    for fcl, (pe, simd, ififodepth) in zip(fc_layers, folding):
+        fcl_inst = getCustomOp(fcl)
+        fcl_inst.set_nodeattr("PE", pe)
+        fcl_inst.set_nodeattr("SIMD", simd)
+        fcl_inst.set_nodeattr("inFIFODepth", ififodepth)
+
+    swg_layers = model.get_nodes_by_op_type("ConvolutionInputGenerator")
+    swg_idepth = [2, 51, 9, 106, 2, 2]
+    for i in range(len(swg_layers)):
+        swg_inst = getCustomOp(swg_layers[i])
+        simd = folding[i][1]
+        swg_inst.set_nodeattr("SIMD", simd)
+        swg_inst.set_nodeattr("inFIFODepth", swg_idepth[i])
+    model = model.transform(AnnotateResources("estimate"))
+    model.save(build_dir + "/end2end_zynqbuild_cnv_w1a1_folded.onnx")
+
+
+def test_end2end_zynqbuild_cnv_w1a1_make_driver():
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_zynqbuild_cnv_w1a1_folded.onnx"
+    )
+    model = model.transform(MakePYNQDriver(platform="zynq-iodma"))
+    model.save(build_dir + "/end2end_zynqbuild_cnv_w1a1_pynq_driver.onnx")
+
+
+@pytest.mark.slow
+@pytest.mark.vivado
+def test_end2end_zynqbuild_cnv_w1a1_build():
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_zynqbuild_cnv_w1a1_pynq_driver.onnx"
+    )
+    model = model.transform(ZynqBuild(test_pynq_board, target_clk_ns))
+    model = model.transform(AnnotateResources("synth"))
+    model.save(build_dir + "/end2end_zynqbuild_cnv_w1a1_build.onnx")
+
+
+def test_end2end_zynqbuild_cnv_w1a1_deploy_on_pynq():
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_zynqbuild_cnv_w1a1_build.onnx"
+    )
+    try:
+        ip = os.environ["PYNQ_IP"]  # no fault for this one; skip if not defined
+        if ip == "":
+            pytest.skip("PYNQ board IP address not specified")
+        username = os.getenv("PYNQ_USERNAME", "xilinx")
+        password = os.getenv("PYNQ_PASSWORD", "xilinx")
+        port = os.getenv("PYNQ_PORT", 22)
+        target_dir = os.getenv("PYNQ_TARGET_DIR", "/home/xilinx/finn")
+        model = model.transform(DeployToPYNQ(ip, port, username, password, target_dir))
+        # save the model to be able to link it to the parent
+        model.save(build_dir + "/end2end_zynqbuild_cnv_w1a1_pynq_deploy.onnx")
+    except KeyError:
+        pytest.skip("PYNQ board IP address not specified")
+
+
+def test_end2end_zynqbuild_cnv_w1a1_run_on_pynq():
+    # use the streamlined model as the "golden" model for right answers
+    golden = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_zynqbuild_cnv_w1a1_streamlined.onnx"
+    )
+    iname = golden.graph.input[0].name
+    oname = golden.graph.output[0].name
+    # load one of the test vectors
+    fn = pk.resource_filename("finn", "data/cifar10/cifar10-test-data-class3.npz")
+    input_tensor = np.load(fn)["arr_0"].astype(np.float32)
+    input_tensor = input_tensor / 255
+    assert input_tensor.shape == (1, 3, 32, 32)
+    x = input_tensor
+    # x = np.zeros(ishape, dtype=np.float32)
+    # run using FINN-based execution
+    ret_golden = execute_onnx(golden, {iname: x}, True)
+    y_golden = ret_golden[oname]
+    # set up parent+child graph to test
+    # we'll use models from the previous step as the child model
+    parent_model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_zynqbuild_cnv_w1a1_dataflow_parent.onnx"
+    )
+    iname = parent_model.graph.input[0].name
+    oname = parent_model.graph.output[0].name
+    try:
+        ip = os.environ["PYNQ_IP"]  # NOQA
+        if ip == "":
+            pytest.skip("PYNQ board IP address not specified")
+        # produce results with cppsim
+        sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
+        sdp_node = getCustomOp(sdp_node)
+        load_test_checkpoint_or_skip(
+            build_dir + "/end2end_zynqbuild_cnv_w1a1_pynq_deploy.onnx"
+        )
+        sdp_node.set_nodeattr(
+            "model", build_dir + "/end2end_zynqbuild_cnv_w1a1_pynq_deploy.onnx"
+        )
+        ret = execute_onnx(parent_model, {iname: x}, True)
+        y = ret[oname]
+        assert np.isclose(y, y_golden).all()
+        assert np.argmax(y) == 3
+
+    except KeyError:
+        pytest.skip("PYNQ board IP address not specified")
diff --git a/tests/end2end/test_zynqbuild_end2end_tfc_w1a1.py b/tests/end2end/test_zynqbuild_end2end_tfc_w1a1.py
new file mode 100644
index 0000000000000000000000000000000000000000..98ccc93188c17f4f82b3cbf0164f847c92b7b7bd
--- /dev/null
+++ b/tests/end2end/test_zynqbuild_end2end_tfc_w1a1.py
@@ -0,0 +1,232 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+from pkgutil import get_data
+
+import pytest
+
+import numpy as np
+
+# as of Feb'20 there is a bug that segfaults ONNX shape inference if we
+# import pytorch before onnx, so we make sure to import onnx first
+import onnx  # NOQA
+import onnx.numpy_helper as nph
+
+import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
+import finn.transformation.streamline.absorb as absorb
+from finn.core.onnx_exec import execute_onnx
+from finn.custom_op.registry import getCustomOp
+from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
+from finn.transformation.fold_constants import FoldConstants
+
+from finn.transformation.fpgadataflow.create_dataflow_partition import (
+    CreateDataflowPartition,
+)
+from finn.transformation.fpgadataflow.make_deployment import DeployToPYNQ
+from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver
+from finn.transformation.general import (
+    RemoveUnusedTensors,
+    RemoveStaticGraphInputs,
+    GiveReadableTensorNames,
+    GiveUniqueNodeNames,
+)
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.streamline import Streamline
+from finn.transformation.streamline.round_thresholds import RoundAndClipThresholds
+from finn.util.basic import pynq_part_map
+from finn.util.test import get_test_model_trained, load_test_checkpoint_or_skip
+from finn.transformation.fpgadataflow.annotate_resources import AnnotateResources
+
+from finn.transformation.fpgadataflow.make_zynq_proj import ZynqBuild
+
+build_dir = "/tmp/" + os.environ["FINN_INST_NAME"]
+test_pynq_board = os.getenv("PYNQ_BOARD", default="Pynq-Z1")
+test_fpga_part = pynq_part_map[test_pynq_board]
+target_clk_ns = 10
+mem_mode = "decoupled"
+
+
+def test_end2end_zynqbuild_tfc_w1a1_export():
+    import brevitas.onnx as bo
+
+    tfc = get_test_model_trained("TFC", 1, 1)
+    bo.export_finn_onnx(
+        tfc, (1, 1, 28, 28), build_dir + "/end2end_zynqbuild_tfc_w1a1_export.onnx"
+    )
+
+
+def test_end2end_zynqbuild_tfc_w1a1_import_and_tidy():
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_zynqbuild_tfc_w1a1_export.onnx"
+    )
+    model = model.transform(InferShapes())
+    model = model.transform(FoldConstants())
+    model = model.transform(GiveUniqueNodeNames())
+    model = model.transform(GiveReadableTensorNames())
+    model = model.transform(InferDataTypes())
+    model = model.transform(RemoveStaticGraphInputs())
+    model.save(build_dir + "/end2end_zynqbuild_tfc_w1a1_tidy.onnx")
+
+
+def test_end2end_zynqbuild_tfc_w1a1_streamline():
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_zynqbuild_tfc_w1a1_tidy.onnx"
+    )
+    model = model.transform(Streamline())
+    model = model.transform(RemoveUnusedTensors())
+    model.save(build_dir + "/end2end_zynqbuild_tfc_w1a1_streamlined.onnx")
+
+
+def test_end2end_zynqbuild_tfc_w1a1_convert_to_hls_layers():
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_zynqbuild_tfc_w1a1_streamlined.onnx"
+    )
+    model = model.transform(ConvertBipolarMatMulToXnorPopcount())
+    model = model.transform(absorb.AbsorbAddIntoMultiThreshold())
+    model = model.transform(absorb.AbsorbMulIntoMultiThreshold())
+    model = model.transform(RoundAndClipThresholds())
+    model = model.transform(to_hls.InferBinaryStreamingFCLayer(mem_mode))
+    model.save(build_dir + "/end2end_zynqbuild_tfc_w1a1_hls_layers.onnx")
+
+
+def test_end2end_zynqbuild_tfc_w1a1_create_dataflow_partition():
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_zynqbuild_tfc_w1a1_hls_layers.onnx"
+    )
+    parent_model = model.transform(CreateDataflowPartition())
+    parent_model.save(build_dir + "/end2end_zynqbuild_tfc_w1a1_dataflow_parent.onnx")
+    sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
+    sdp_node = getCustomOp(sdp_node)
+    dataflow_model_filename = sdp_node.get_nodeattr("model")
+    dataflow_model = load_test_checkpoint_or_skip(dataflow_model_filename)
+    dataflow_model.save(build_dir + "/end2end_zynqbuild_tfc_w1a1_dataflow_model.onnx")
+
+
+def test_end2end_zynqbuild_tfc_w1a1_fold():
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_zynqbuild_tfc_w1a1_dataflow_model.onnx"
+    )
+    fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch")
+    # (PE, SIMD, in_fifo_depth, out_fifo_depth, ramstyle) for each layer
+    config = [
+        (16, 49, 16, 64, "block"),
+        (8, 8, 64, 64, "auto"),
+        (8, 8, 64, 64, "auto"),
+        (10, 8, 64, 10, "distributed"),
+    ]
+    for fcl, (pe, simd, ififo, ofifo, ramstyle) in zip(fc_layers, config):
+        fcl_inst = getCustomOp(fcl)
+        fcl_inst.set_nodeattr("PE", pe)
+        fcl_inst.set_nodeattr("SIMD", simd)
+        fcl_inst.set_nodeattr("inFIFODepth", ififo)
+        fcl_inst.set_nodeattr("outFIFODepth", ofifo)
+        fcl_inst.set_nodeattr("ram_style", ramstyle)
+
+    model.save(build_dir + "/end2end_zynqbuild_tfc_w1a1_folded.onnx")
+
+
+def test_end2end_zynqbuild_tfc_w1a1_make_driver():
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_zynqbuild_tfc_w1a1_folded.onnx"
+    )
+    model = model.transform(MakePYNQDriver(platform="zynq-iodma"))
+    model.save(build_dir + "/end2end_zynqbuild_tfc_w1a1_pynq_driver.onnx")
+
+
+@pytest.mark.slow
+@pytest.mark.vivado
+def test_end2end_zynqbuild_tfc_w1a1_build():
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_zynqbuild_tfc_w1a1_pynq_driver.onnx"
+    )
+    model = model.transform(ZynqBuild(test_pynq_board, target_clk_ns))
+    model = model.transform(AnnotateResources("synth"))
+    model.save(build_dir + "/end2end_zynqbuild_tfc_w1a1_build.onnx")
+
+
+def test_end2end_zynqbuild_tfc_w1a1_deploy_on_pynq():
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_zynqbuild_tfc_w1a1_build.onnx"
+    )
+    try:
+        ip = os.environ["PYNQ_IP"]  # no fault for this one; skip if not defined
+        if ip == "":
+            pytest.skip("PYNQ board IP address not specified")
+        username = os.getenv("PYNQ_USERNAME", "xilinx")
+        password = os.getenv("PYNQ_PASSWORD", "xilinx")
+        port = os.getenv("PYNQ_PORT", 22)
+        target_dir = os.getenv("PYNQ_TARGET_DIR", "/home/xilinx/finn")
+        model = model.transform(DeployToPYNQ(ip, port, username, password, target_dir))
+        # save the model to be able to link it to the parent
+        model.save(build_dir + "/end2end_zynqbuild_tfc_w1a1_pynq_deploy.onnx")
+    except KeyError:
+        pytest.skip("PYNQ board IP address not specified")
+
+
+def test_end2end_zynqbuild_tfc_w1a1_run_on_pynq():
+    # use the streamlined model as the "golden" model for right answers
+    golden = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_zynqbuild_tfc_w1a1_streamlined.onnx"
+    )
+    iname = golden.graph.input[0].name
+    oname = golden.graph.output[0].name
+    raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb")
+    input_tensor = onnx.load_tensor_from_string(raw_i)
+    x = nph.to_array(input_tensor)
+    # x = np.zeros(ishape, dtype=np.float32)
+    # run using FINN-based execution
+    ret_golden = execute_onnx(golden, {iname: x}, True)
+    y_golden = ret_golden[oname]
+    # set up parent+child graph to test
+    # we'll use models from the previous step as the child model
+    parent_model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_zynqbuild_tfc_w1a1_dataflow_parent.onnx"
+    )
+    iname = parent_model.graph.input[0].name
+    oname = parent_model.graph.output[0].name
+    try:
+        ip = os.environ["PYNQ_IP"]  # NOQA
+        if ip == "":
+            pytest.skip("PYNQ board IP address not specified")
+        # produce results with cppsim
+        sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
+        sdp_node = getCustomOp(sdp_node)
+        load_test_checkpoint_or_skip(
+            build_dir + "/end2end_zynqbuild_tfc_w1a1_pynq_deploy.onnx"
+        )
+        sdp_node.set_nodeattr(
+            "model", build_dir + "/end2end_zynqbuild_tfc_w1a1_pynq_deploy.onnx"
+        )
+        ret = execute_onnx(parent_model, {iname: x}, True)
+        y = ret[oname]
+        assert np.isclose(y, y_golden).all()
+
+    except KeyError:
+        pytest.skip("PYNQ board IP address not specified")
diff --git a/tests/end2end/test_zynqbuild_end2end_tfc_w2a2.py b/tests/end2end/test_zynqbuild_end2end_tfc_w2a2.py
new file mode 100644
index 0000000000000000000000000000000000000000..b8db8c1a4a2049a38e64b9c5bb54fb7d4d8d0ab0
--- /dev/null
+++ b/tests/end2end/test_zynqbuild_end2end_tfc_w2a2.py
@@ -0,0 +1,222 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+from pkgutil import get_data
+
+import pytest
+
+import numpy as np
+
+# as of Feb'20 there is a bug that segfaults ONNX shape inference if we
+# import pytorch before onnx, so we make sure to import onnx first
+import onnx  # NOQA
+import onnx.numpy_helper as nph
+import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
+from finn.core.onnx_exec import execute_onnx
+from finn.custom_op.registry import getCustomOp
+from finn.transformation.fold_constants import FoldConstants
+from finn.transformation.fpgadataflow.create_dataflow_partition import (
+    CreateDataflowPartition,
+)
+from finn.transformation.fpgadataflow.make_deployment import DeployToPYNQ
+from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver
+from finn.transformation.general import (
+    RemoveUnusedTensors,
+    RemoveStaticGraphInputs,
+    GiveReadableTensorNames,
+    GiveUniqueNodeNames,
+)
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.streamline import Streamline
+from finn.util.basic import pynq_part_map
+from finn.util.test import get_test_model_trained, load_test_checkpoint_or_skip
+from finn.transformation.fpgadataflow.annotate_resources import AnnotateResources
+from finn.transformation.fpgadataflow.make_zynq_proj import ZynqBuild
+
+build_dir = "/tmp/" + os.environ["FINN_INST_NAME"]
+test_pynq_board = os.getenv("PYNQ_BOARD", default="Pynq-Z1")
+test_fpga_part = pynq_part_map[test_pynq_board]
+target_clk_ns = 10
+mem_mode = "decoupled"
+
+
+def test_end2end_zynqbuild_tfc_w2a2_export():
+    import brevitas.onnx as bo
+
+    tfc = get_test_model_trained("TFC", 2, 2)
+    bo.export_finn_onnx(
+        tfc, (1, 1, 28, 28), build_dir + "/end2end_zynqbuild_tfc_w2a2_export.onnx"
+    )
+
+
+def test_end2end_zynqbuild_tfc_w2a2_import_and_tidy():
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_zynqbuild_tfc_w2a2_export.onnx"
+    )
+    model = model.transform(InferShapes())
+    model = model.transform(FoldConstants())
+    model = model.transform(GiveUniqueNodeNames())
+    model = model.transform(GiveReadableTensorNames())
+    model = model.transform(InferDataTypes())
+    model = model.transform(RemoveStaticGraphInputs())
+    model.save(build_dir + "/end2end_zynqbuild_tfc_w2a2_tidy.onnx")
+
+
+def test_end2end_zynqbuild_tfc_w2a2_streamline():
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_zynqbuild_tfc_w2a2_tidy.onnx"
+    )
+    model = model.transform(Streamline())
+    model = model.transform(RemoveUnusedTensors())
+    model.save(build_dir + "/end2end_zynqbuild_tfc_w2a2_streamlined.onnx")
+
+
+def test_end2end_zynqbuild_tfc_w2a2_convert_to_hls_layers():
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_zynqbuild_tfc_w2a2_streamlined.onnx"
+    )
+    model = model.transform(to_hls.InferQuantizedStreamingFCLayer(mem_mode))
+    model.save(build_dir + "/end2end_zynqbuild_tfc_w2a2_hls_layers.onnx")
+
+
+def test_end2end_zynqbuild_tfc_w2a2_create_dataflow_partition():
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_zynqbuild_tfc_w2a2_hls_layers.onnx"
+    )
+    parent_model = model.transform(CreateDataflowPartition())
+    parent_model.save(build_dir + "/end2end_zynqbuild_tfc_w2a2_dataflow_parent.onnx")
+    sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
+    sdp_node = getCustomOp(sdp_node)
+    dataflow_model_filename = sdp_node.get_nodeattr("model")
+    dataflow_model = load_test_checkpoint_or_skip(dataflow_model_filename)
+    dataflow_model.save(build_dir + "/end2end_zynqbuild_tfc_w2a2_dataflow_model.onnx")
+
+
+def test_end2end_zynqbuild_tfc_w2a2_fold():
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_zynqbuild_tfc_w2a2_dataflow_model.onnx"
+    )
+    fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch")
+    # (PE, SIMD, in_fifo_depth, out_fifo_depth, ramstyle) for each layer
+    config = [
+        (16, 49, 16, 64, "block"),
+        (8, 8, 64, 64, "auto"),
+        (8, 8, 64, 64, "auto"),
+        (10, 8, 64, 10, "distributed"),
+    ]
+    for fcl, (pe, simd, ififo, ofifo, ramstyle) in zip(fc_layers, config):
+        fcl_inst = getCustomOp(fcl)
+        fcl_inst.set_nodeattr("PE", pe)
+        fcl_inst.set_nodeattr("SIMD", simd)
+        fcl_inst.set_nodeattr("inFIFODepth", ififo)
+        fcl_inst.set_nodeattr("outFIFODepth", ofifo)
+        fcl_inst.set_nodeattr("ram_style", ramstyle)
+
+    model.save(build_dir + "/end2end_zynqbuild_tfc_w2a2_folded.onnx")
+
+
+def test_end2end_zynqbuild_tfc_w2a2_make_driver():
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_zynqbuild_tfc_w2a2_folded.onnx"
+    )
+    model = model.transform(MakePYNQDriver(platform="zynq-iodma"))
+    model.save(build_dir + "/end2end_zynqbuild_tfc_w2a2_pynq_driver.onnx")
+
+
+@pytest.mark.slow
+@pytest.mark.vivado
+def test_end2end_zynqbuild_tfc_w2a2_build():
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_zynqbuild_tfc_w2a2_pynq_driver.onnx"
+    )
+    model = model.transform(ZynqBuild(test_pynq_board, target_clk_ns))
+    model = model.transform(AnnotateResources("synth"))
+    model.save(build_dir + "/end2end_zynqbuild_tfc_w2a2_build.onnx")
+
+
+def test_end2end_zynqbuild_tfc_w2a2_deploy_on_pynq():
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_zynqbuild_tfc_w2a2_build.onnx"
+    )
+    try:
+        ip = os.environ["PYNQ_IP"]  # no fault for this one; skip if not defined
+        if ip == "":
+            pytest.skip("PYNQ board IP address not specified")
+        username = os.getenv("PYNQ_USERNAME", "xilinx")
+        password = os.getenv("PYNQ_PASSWORD", "xilinx")
+        port = os.getenv("PYNQ_PORT", 22)
+        target_dir = os.getenv("PYNQ_TARGET_DIR", "/home/xilinx/finn")
+        model = model.transform(DeployToPYNQ(ip, port, username, password, target_dir))
+        # save the model to be able to link it to the parent
+        model.save(build_dir + "/end2end_zynqbuild_tfc_w2a2_pynq_deploy.onnx")
+    except KeyError:
+        pytest.skip("PYNQ board IP address not specified")
+
+
+def test_end2end_zynqbuild_tfc_w2a2_run_on_pynq():
+    # use the streamlined model as the "golden" model for right answers
+    golden = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_zynqbuild_tfc_w2a2_streamlined.onnx"
+    )
+    iname = golden.graph.input[0].name
+    oname = golden.graph.output[0].name
+    raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb")
+    input_tensor = onnx.load_tensor_from_string(raw_i)
+    x = nph.to_array(input_tensor)
+    # x = np.zeros(ishape, dtype=np.float32)
+    # run using FINN-based execution
+    ret_golden = execute_onnx(golden, {iname: x}, True)
+    y_golden = ret_golden[oname]
+    # set up parent+child graph to test
+    # we'll use models from the previous step as the child model
+    parent_model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_zynqbuild_tfc_w2a2_dataflow_parent.onnx"
+    )
+    iname = parent_model.graph.input[0].name
+    oname = parent_model.graph.output[0].name
+    try:
+        ip = os.environ["PYNQ_IP"]  # NOQA
+        if ip == "":
+            pytest.skip("PYNQ board IP address not specified")
+        # produce results with cppsim
+        sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
+        sdp_node = getCustomOp(sdp_node)
+        load_test_checkpoint_or_skip(
+            build_dir + "/end2end_zynqbuild_tfc_w2a2_pynq_deploy.onnx"
+        )
+        sdp_node.set_nodeattr(
+            "model", build_dir + "/end2end_zynqbuild_tfc_w2a2_pynq_deploy.onnx"
+        )
+        ret = execute_onnx(parent_model, {iname: x}, True)
+        y = ret[oname]
+        assert np.isclose(y, y_golden).all()
+
+    except KeyError:
+        pytest.skip("PYNQ board IP address not specified")
diff --git a/tests/fpgadataflow/test_fpgadataflow_ipstitch.py b/tests/fpgadataflow/test_fpgadataflow_ipstitch.py
index 1add41861562b84808697fc3936f504f9f3d6c48..40f29b0c7df025df09137f124d66ea33236e18e4 100644
--- a/tests/fpgadataflow/test_fpgadataflow_ipstitch.py
+++ b/tests/fpgadataflow/test_fpgadataflow_ipstitch.py
@@ -63,6 +63,7 @@ from finn.transformation.infer_data_layouts import InferDataLayouts
 from finn.transformation.fpgadataflow.insert_iodma import InsertIODMA
 from finn.transformation.fpgadataflow.floorplan import Floorplan
 from finn.transformation.fpgadataflow.vitis_build import VitisBuild
+from finn.transformation.fpgadataflow.make_zynq_proj import ZynqBuild
 
 
 test_pynq_board = os.getenv("PYNQ_BOARD", default="Pynq-Z1")
@@ -428,6 +429,8 @@ def test_fpgadataflow_ipstitch_iodma_floorplan():
 @pytest.mark.vivado
 @pytest.mark.vitis
 def test_fpgadataflow_ipstitch_vitis(board, period_ns, extw):
+    if "VITIS_PATH" not in os.environ:
+        pytest.skip("VITIS_PATH not set")
     platform = alveo_default_platform[board]
     fpga_part = alveo_part_map[board]
     model = create_two_fc_model("external" if extw else "decoupled")
@@ -438,3 +441,52 @@ def test_fpgadataflow_ipstitch_vitis(board, period_ns, extw):
         model = load_test_checkpoint_or_skip(sdp_node.get_nodeattr("model"))
     model = model.transform(VitisBuild(fpga_part, period_ns, platform))
     model.save(ip_stitch_model_dir + "/test_fpgadataflow_ipstitch_vitis.onnx")
+
+
+# board
+@pytest.mark.parametrize("board", ["Pynq-Z1"])
+@pytest.mark.slow
+@pytest.mark.vivado
+def test_fpgadataflow_ipstitch_zynqbuild(board):
+    model = create_two_fc_model()
+    if model.graph.node[0].op_type == "StreamingDataflowPartition":
+        sdp_node = getCustomOp(model.graph.node[0])
+        assert sdp_node.__class__.__name__ == "StreamingDataflowPartition"
+        assert os.path.isfile(sdp_node.get_nodeattr("model"))
+        model = load_test_checkpoint_or_skip(sdp_node.get_nodeattr("model"))
+    # generate inputs for remote exec
+    iname = "inp"
+    idt = model.get_tensor_datatype(iname)
+    ishape = model.get_tensor_shape(iname)
+    x = gen_finn_dt_tensor(idt, ishape)
+    # driver
+    model = model.transform(MakePYNQDriver())
+    driver_dir = model.get_metadata_prop("pynq_driver_dir")
+    assert driver_dir is not None
+    assert os.path.isdir(driver_dir)
+    # bitfile using ZynqBuild
+    model = model.transform(ZynqBuild(board, 10))
+    model.save(ip_stitch_model_dir + "/test_fpgadataflow_ipstitch_customzynq.onnx")
+
+    bitfile_name = model.get_metadata_prop("vivado_pynq_bitfile")
+    assert bitfile_name is not None
+    assert os.path.isfile(bitfile_name)
+    # deployment
+    try:
+        ip = os.environ["PYNQ_IP"]  # no default for this one; skip if not defined
+        if ip == "":
+            pytest.skip("PYNQ board IP address not specified")
+        username = os.getenv("PYNQ_USERNAME", "xilinx")
+        password = os.getenv("PYNQ_PASSWORD", "xilinx")
+        port = os.getenv("PYNQ_PORT", 22)
+        target_dir = os.getenv("PYNQ_TARGET_DIR", "/home/xilinx/finn")
+        model = model.transform(DeployToPYNQ(ip, port, username, password, target_dir))
+        deployment_dir = model.get_metadata_prop("pynq_deploy_dir")
+        assert deployment_dir is not None
+        assert os.path.isdir(deployment_dir)
+        # remote exec
+        input_dict = {"global_in": x}
+        outp = execute_onnx(model, input_dict)
+        assert np.isclose(outp["global_out"], x).all()
+    except KeyError:
+        pytest.skip("PYNQ board IP address not specified")