diff --git a/.github/workflows/quicktest-dev-pr.yml b/.github/workflows/quicktest-dev-pr.yml
index 0233a81ba06dc701a3a4579b9a5bd3ce17e47d04..fff5b9618de9c2e223c86bc9add2cf3990c5fb78 100644
--- a/.github/workflows/quicktest-dev-pr.yml
+++ b/.github/workflows/quicktest-dev-pr.yml
@@ -11,7 +11,7 @@ jobs:
 
   test:
     name: Run quicktest on PR branch
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-16.04
 
     steps:
       - name: checkout
diff --git a/docker/Dockerfile.finn_ci b/docker/Dockerfile.finn_ci
index 0d122133a6446cb77160c9447e16ff13d4d4b9c5..d1ca2e891b735c8be04418f469984618fcff072a 100644
--- a/docker/Dockerfile.finn_ci
+++ b/docker/Dockerfile.finn_ci
@@ -40,6 +40,12 @@ RUN apt-get install -y verilator zsh
 RUN apt-get -y install sshpass wget unzip
 RUN echo "StrictHostKeyChecking no" >> /etc/ssh/ssh_config
 
+# install XRT dependencies
+RUN wget https://raw.githubusercontent.com/Xilinx/XRT/master/src/runtime_src/tools/scripts/xrtdeps.sh
+RUN apt-get update
+RUN bash xrtdeps.sh
+RUN rm xrtdeps.sh
+
 # cloning dependency repos
 # Brevitas
 RUN git clone https://github.com/Xilinx/brevitas.git /workspace/brevitas
diff --git a/docker/Dockerfile.finn_dev b/docker/Dockerfile.finn_dev
index db49dceb2d06670dfc43059d3a4fa6160a8ded58..4b52d7886b8ab58542cc6ff17bcb65a48ca66237 100644
--- a/docker/Dockerfile.finn_dev
+++ b/docker/Dockerfile.finn_dev
@@ -46,6 +46,12 @@ RUN apt-get install -y verilator nano zsh rsync
 RUN apt-get -y install sshpass wget unzip
 RUN echo "StrictHostKeyChecking no" >> /etc/ssh/ssh_config
 
+# install XRT dependencies
+RUN wget https://raw.githubusercontent.com/Xilinx/XRT/master/src/runtime_src/tools/scripts/xrtdeps.sh
+RUN apt-get update
+RUN bash xrtdeps.sh
+RUN rm xrtdeps.sh
+
 COPY requirements.txt .
 RUN pip install -r requirements.txt
 RUN rm requirements.txt
diff --git a/docker/finn_entrypoint.sh b/docker/finn_entrypoint.sh
index d0d0ad7d65f92bfaa26c83143e783040efd70d91..c35d18ef5dbaee82bcaf6c23e73ae8469e8211c4 100644
--- a/docker/finn_entrypoint.sh
+++ b/docker/finn_entrypoint.sh
@@ -56,6 +56,10 @@ if [ ! -z "$VITIS_PATH" ];then
   export XILINX_VITIS=$VITIS_PATH
   source $VITIS_PATH/settings64.sh
 fi
+if [ ! -z "$XILINX_XRT" ];then
+  # source XRT
+  source $XILINX_XRT/setup.sh
+fi
 
 # download PYNQ board files if not already there
 if [ ! -d "/workspace/finn/board_files" ]; then
diff --git a/docs/finn/getting_started.rst b/docs/finn/getting_started.rst
index 8a20dad0e47b9458989039184cfa0e5d01d48aa2..3bfaf2e1a7f5cbbeb8760ad7c51bd941338f6360 100644
--- a/docs/finn/getting_started.rst
+++ b/docs/finn/getting_started.rst
@@ -13,12 +13,14 @@ The FINN compiler should not be thought of a single pushbutton tool that does ev
 Requirements
 ============
 
-* Ubuntu 18.04 with `bash` installed
+* Ubuntu 18.04 with ``bash`` installed
 * Docker
-* A working Vivado 2019.1 installation
-* A `VIVADO_PATH` environment variable pointing to the Vivado installation directory (e.g. the directory where settings64.sh is located)
+* A working Vivado 2019.1 or 2020.1 installation
+* A ``VIVADO_PATH`` environment variable pointing to the Vivado installation directory (e.g. the directory where settings64.sh is located)
 * (optional) A PYNQ board with a network connection
    * the ``bitstring`` package must be installed on the PYNQ: ``sudo pip3 install bitstring``
+* (optional) An Alveo board, and a working Vitis 2020.1 installation if you want to use Vitis and Alveo (see `Alveo first-time setup`_ below)
+
 
 Running FINN in Docker
 ======================
@@ -88,18 +90,41 @@ Environment variables
 Prior to running the `run-docker.sh` script, there are several environment variables you can set to configure certain aspects of FINN.
 These are summarized below:
 
-* `VIVADO_PATH` points to your Vivado installation on the host
-* `JUPYTER_PORT` (default 8888) changes the port for Jupyter inside Docker
-* `NETRON_PORT` (default 8081) changes the port for Netron inside Docker
-* `NUM_DEFAULT_WORKERS` (default 1) specifies the degree of parallelization for the transformations that can be run in parallel
-* `PYNQ_BOARD` specifies the type of PYNQ board used (see "supported hardware" below) for the test suite
-* `PYNQ_IP` and `PYNQ_PORT` specify ip address and port number to access the PYNQ board
-* `PYNQ_USERNAME` and `PYNQ_PASSWORD` specify the PYNQ board access credentials for the test suite
-* `PYNQ_TARGET_DIR` specifies the target dir on the PYNQ board for the test suite
+* ``VIVADO_PATH`` points to your Vivado installation on the host
+* (optional, for Vitis & Alveo only) ``VITIS_PATH``, ``PLATFORM_REPO_PATHS`` and ``XILINX_XRT`` respectively point to your Vitis installation, the Vitis platform files, and Xilinx XRT
+* ``JUPYTER_PORT`` (default 8888) changes the port for Jupyter inside Docker
+* ``NETRON_PORT`` (default 8081) changes the port for Netron inside Docker
+* ``NUM_DEFAULT_WORKERS`` (default 1) specifies the degree of parallelization for the transformations that can be run in parallel
+* ``PYNQ_BOARD`` or ``ALVEO_BOARD`` specifies the type of PYNQ/Alveo board used (see "supported hardware" below) for the test suite
+* ``PYNQ_IP`` and ``PYNQ_PORT`` (or ``ALVEO_IP`` and ``ALVEO_PORT``) specify ip address and port number to access the PYNQ board / Alveo target
+* ``PYNQ_USERNAME`` and ``PYNQ_PASSWORD`` (or ``ALVEO_USERNAME`` and ``ALVEO_PASSWORD``) specify the PYNQ board / Alveo host access credentials for the test suite. For PYNQ, password is always needed to run as sudo. For Alveo, you can leave the password empty and place your ssh private key in the ``finn/ssh_keys`` folder to use keypair authentication.
+* ``PYNQ_TARGET_DIR`` (or ``ALVEO_TARGET_DIR``) specifies the target dir on the PYNQ board / Alveo host for the test suite
 
 Supported Hardware
 ===================
 **End-to-end support including driver:** For quick deployment, FINN targets boards supported by  `PYNQ <https://pynq.io/>`_ . For these platforms, we can build a full bitfile including DMAs to move data into and out of the FINN-generated accelerator, as well as a Python driver to launch the accelerator. We support the Pynq-Z1, Pynq-Z2, Ultra96, ZCU102 and ZCU104 boards.
+As of FINN v0.4b we also have preliminary support for `Xilinx Alveo boards <>`_ using PYNQ and Vitis, see instructions below for Alveo setup.
+
+**Vivado IPI support for any Xilinx FPGA:** FINN generates a Vivado IP Integrator (IPI) design from the neural network with AXI stream (FIFO) in-out interfaces, which can be integrated onto any Xilinx FPGA as part of a larger system. It's up to you to take the FINN-generated accelerator (what we call "stitched IP" in the tutorials), wire it up to your FPGA design and send/receive neural network data to/from the accelerator.
+
+Alveo first-time setup
+**********************
+We use *host* to refer to the PC running the FINN Docker environment, which will build the accelerator+driver and package it up, and *target* to refer to the PC where the Alveo card is installed. These two can be the same PC, or connected over the network -- FINN includes some utilities to make it easier to test on remote PCs too. Prior to first usage, you need to set up both the host and the target in the following manner:
+
+On the target side:
+
+1. Install Xilinx XRT and set up the ``XILINX_XRT`` environment variable to point to your installation, for instance ``/opt/xilinx/xrt``.
+2. Install the Vitis platform files for Alveo and set up the ``PLATFORM_REPO_PATHS`` environment variable to point to your installation, for instance ``/opt/xilinx/platforms``.
+3. Create a conda environment named *finn-pynq-alveo* by following this guide `to set up PYNQ for Alveo <https://pynq.readthedocs.io/en/latest/getting_started/alveo_getting_started.html>`_. It's best to follow the recommended environment.yml (set of package versions) in this guide.
+4. Activate the environment with `conda activate finn-pynq-alveo` and install the bitstring package with ``pip install bitstring``
+5. Done! You should now be able to e.g. ``import pynq`` in Python scripts.
+6 (optional) If you don't want to specify the ``ALVEO_PASSWORD`` environment variable, you can `set up public key authentication <https://www.digitalocean.com/community/tutorials/how-to-configure-ssh-key-based-authentication-on-a-linux-server>`_. Copy your private key to the ``finn/ssh_keys`` folder on the host to get password-less deployment and remote execution.
+
 
-**Vivado IPI support for any Xilinx FPGA:** FINN generates a Vivado IP Integrator (IPI) design from the neural network with AXI stream (FIFO) in-out interfaces, which can be integrated onto any Xilinx FPGA as part of a larger system. It's up to you to take the FINN-generated accelerator (what we call "stitched IP" in the tutorials) and wire it up to your FPGA design.
+On the host side:
 
+1. Install Vitis 2020.1 and set up the ``VITIS_PATH`` environment variable to point to your installation.
+2. Install Xilinx XRT and set up the ``XILINX_XRT`` environment variable to point to your installation. *This must be the same path as the target's XRT (target step 1)*
+3. Install the Vitis platform files for Alveo and set up the ``PLATFORM_REPO_PATHS`` environment variable to point to your installation. *This must be the same path as the target's platform files (target step 2)*
+4. Set up the ``ALVEO_*`` environment variables accordingly for your target, see description of environment variables above.
+5. Done! You can try the ``test_end2end_vitis`` tests in the FINN Docker to verify your setup, although this will take some time.
diff --git a/notebooks/end2end_example/tfc_end2end_example.ipynb b/notebooks/end2end_example/tfc_end2end_example.ipynb
index c388feca2340792c3535dba3fb3cf5e7220adf3c..105ba8c6e41dcb3c5845a1aac8331ef0d9455ebd 100644
--- a/notebooks/end2end_example/tfc_end2end_example.ipynb
+++ b/notebooks/end2end_example/tfc_end2end_example.ipynb
@@ -1379,7 +1379,7 @@
        "value: \"/tmp/finn_dev_jakobap/vivado_pynq_proj_ljn53hfs\"\n",
        ", key: \"vivado_synth_rpt\"\n",
        "value: \"/tmp/finn_dev_jakobap/vivado_pynq_proj_ljn53hfs/synth_report.xml\"\n",
-       ", key: \"vivado_pynq_bitfile\"\n",
+       ", key: \"bitfile\"\n",
        "value: \"/tmp/finn_dev_jakobap/vivado_pynq_proj_ljn53hfs/resizer.bit\"\n",
        "]"
       ]
@@ -1670,7 +1670,7 @@
        "value: \"/tmp/finn_dev_jakobap/vivado_pynq_proj_ljn53hfs\"\n",
        ", key: \"vivado_synth_rpt\"\n",
        "value: \"/tmp/finn_dev_jakobap/vivado_pynq_proj_ljn53hfs/synth_report.xml\"\n",
-       ", key: \"vivado_pynq_bitfile\"\n",
+       ", key: \"bitfile\"\n",
        "value: \"/tmp/finn_dev_jakobap/vivado_pynq_proj_ljn53hfs/resizer.bit\"\n",
        ", key: \"pynq_driver_dir\"\n",
        "value: \"/tmp/finn_dev_jakobap/pynq_driver_j_9suyqm\"\n",
@@ -1936,10 +1936,10 @@
     }
    ],
    "source": [
-    "from finn.core.throughput_test import throughput_test\n",
+    "from finn.core.throughput_test import throughput_test_remote\n",
     "\n",
     "child_model = ModelWrapper(getCustomOp(sdp_node).get_nodeattr(\"model\"))\n",
-    "res = throughput_test(child_model)\n",
+    "res = throughput_test_remote(child_model)\n",
     "print(\"Network metrics:\")\n",
     "for key in res:\n",
     "    print(str(key) + \": \" + str(res[key]))"
diff --git a/run-docker.sh b/run-docker.sh
index 88956586c6a2ba9780d0597f8149038dad4aa6ab..219e5c258f2e4d8b4c95d1c0a84cd1a636510e24 100755
--- a/run-docker.sh
+++ b/run-docker.sh
@@ -89,6 +89,12 @@ SCRIPTPATH=$(dirname "$SCRIPT")
 : ${PYNQ_TARGET_DIR="/home/xilinx/$DOCKER_INST_NAME"}
 : ${NUM_DEFAULT_WORKERS=1}
 : ${FINN_SSH_KEY_DIR="$SCRIPTPATH/ssh_keys"}
+: ${ALVEO_USERNAME="alveo_user"}
+: ${ALVEO_PASSWORD=""}
+: ${ALVEO_BOARD="U250"}
+: ${ALVEO_TARGET_DIR="/tmp"}
+: ${XILINX_XRT="/opt/xilinx/xrt"}
+: ${PLATFORM_REPO_PATHS="/opt/xilinx/platforms"}
 
 BUILD_LOCAL=/tmp/$DOCKER_INST_NAME
 VIVADO_HLS_LOCAL=$VIVADO_PATH
@@ -161,10 +167,25 @@ if [ ! -z "$VIVADO_PATH" ];then
   DOCKER_EXEC+="-e VIVADO_PATH=$VIVADO_PATH "
 fi
 if [ ! -z "$VITIS_PATH" ];then
+  if [ -z "$PLATFORM_REPO_PATHS" ];then
+          recho "PLATFORM_REPO_PATHS must be set for Vitis/Alveo flows"
+          exit -1
+  fi
+  if [ -z "$XILINX_XRT" ];then
+          recho "XILINX_XRT must be set for Vitis/Alveo flows"
+          exit -1
+  fi
   DOCKER_EXEC+="-v $VITIS_PATH:$VITIS_PATH "
-  DOCKER_EXEC+="-v $PLATFORM_REPO_PATHS:/workspace/finn/vitis_platforms "
+  DOCKER_EXEC+="-v $PLATFORM_REPO_PATHS:$PLATFORM_REPO_PATHS "
+  DOCKER_EXEC+="-v $XILINX_XRT:$XILINX_XRT "
   DOCKER_EXEC+="-e VITIS_PATH=$VITIS_PATH "
-  DOCKER_EXEC+="-e PLATFORM_REPO_PATHS=/workspace/finn/vitis_platforms "
+  DOCKER_EXEC+="-e PLATFORM_REPO_PATHS=$PLATFORM_REPO_PATHS "
+  DOCKER_EXEC+="-e XILINX_XRT=$XILINX_XRT "
+  DOCKER_EXEC+="-e ALVEO_IP=$ALVEO_IP "
+  DOCKER_EXEC+="-e ALVEO_USERNAME=$ALVEO_USERNAME "
+  DOCKER_EXEC+="-e ALVEO_PASSWORD=$ALVEO_PASSWORD "
+  DOCKER_EXEC+="-e ALVEO_BOARD=$ALVEO_BOARD "
+  DOCKER_EXEC+="-e ALVEO_TARGET_DIR=$ALVEO_TARGET_DIR "
 fi
 DOCKER_EXEC+="$DOCKER_TAG $DOCKER_CMD"
 
diff --git a/src/finn/analysis/fpgadataflow/post_synth_res.py b/src/finn/analysis/fpgadataflow/post_synth_res.py
index 9206f3f6fcd81de175babef54de990fe01c861e1..79204c54cdb8233fd7b65968c25af819fce91959 100644
--- a/src/finn/analysis/fpgadataflow/post_synth_res.py
+++ b/src/finn/analysis/fpgadataflow/post_synth_res.py
@@ -57,36 +57,54 @@ def post_synth_res(model, override_synth_report_filename=None):
     else:
         raise Exception("Please run synthesis first")
 
+    # TODO build these indices based on table headers instead of harcoding
+    restype_to_ind_default = {
+        "LUT": 2,
+        "SRL": 5,
+        "FF": 6,
+        "BRAM_36K": 7,
+        "BRAM_18K": 8,
+        "DSP48": 9,
+    }
+    restype_to_ind_vitis = {
+        "LUT": 4,
+        "SRL": 7,
+        "FF": 8,
+        "BRAM_36K": 9,
+        "BRAM_18K": 10,
+        "URAM": 11,
+        "DSP48": 12,
+    }
+
+    if model.get_metadata_prop("platform") == "alveo":
+        restype_to_ind = restype_to_ind_vitis
+    else:
+        restype_to_ind = restype_to_ind_default
+
+    def get_instance_stats(inst_name):
+        row = root.findall(".//*[@contents='%s']/.." % inst_name)
+        if row != []:
+            node_dict = {}
+            row = row[0].getchildren()
+            for (restype, ind) in restype_to_ind.items():
+                node_dict[restype] = int(row[ind].attrib["contents"])
+            return node_dict
+        else:
+            return None
+
+    # global (top-level) stats, including shell etc.
+    top_dict = get_instance_stats("(top)")
+    if top_dict is not None:
+        res_dict["(top)"] = top_dict
+
     for node in model.graph.node:
         if node.op_type == "StreamingDataflowPartition":
             sdp_model = ModelWrapper(getCustomOp(node).get_nodeattr("model"))
             sdp_res_dict = post_synth_res(sdp_model, synth_report_filename)
             res_dict.update(sdp_res_dict)
         elif _is_fpgadataflow_node(node):
-            row = root.findall(".//*[@contents='%s']/.." % node.name)
-            if row != []:
-                node_dict = {}
-                row = row[0].getchildren()
-                """ Expected XML structure:
-<tablerow class="" suppressoutput="0" wordwrap="0">
-    <tableheader class="" contents="Instance" halign="3" width="-1"/>
-    <tableheader class="" contents="Module" halign="3" width="-1"/>
-    <tableheader class="" contents="Total LUTs" halign="3" width="-1"/>
-    <tableheader class="" contents="Logic LUTs" halign="3" width="-1"/>
-    <tableheader class="" contents="LUTRAMs" halign="3" width="-1"/>
-    <tableheader class="" contents="SRLs" halign="3" width="-1"/>
-    <tableheader class="" contents="FFs" halign="3" width="-1"/>
-    <tableheader class="" contents="RAMB36" halign="3" width="-1"/>
-    <tableheader class="" contents="RAMB18" halign="3" width="-1"/>
-    <tableheader class="" contents="DSP48 Blocks" halign="3" width="-1"/>
-</tablerow>
-                """
-                node_dict["LUT"] = int(row[2].attrib["contents"])
-                node_dict["SRL"] = int(row[5].attrib["contents"])
-                node_dict["FF"] = int(row[6].attrib["contents"])
-                node_dict["BRAM_36K"] = int(row[7].attrib["contents"])
-                node_dict["BRAM_18K"] = int(row[8].attrib["contents"])
-                node_dict["DSP48"] = int(row[9].attrib["contents"])
+            node_dict = get_instance_stats(node.name)
+            if node_dict is not None:
                 res_dict[node.name] = node_dict
 
     return res_dict
diff --git a/src/finn/core/remote_exec.py b/src/finn/core/remote_exec.py
index 214358608c43a868f9ef414dcbf6eb33e3f45a5b..ee07201315db23a9a1f8d0b7f1392d99517a8b63 100644
--- a/src/finn/core/remote_exec.py
+++ b/src/finn/core/remote_exec.py
@@ -28,7 +28,7 @@
 
 import os
 import subprocess
-
+import warnings
 import numpy as np
 
 
@@ -43,15 +43,35 @@ def remote_exec(model, execution_context):
     pynq_password = model.get_metadata_prop("pynq_password")
     pynq_target_dir = model.get_metadata_prop("pynq_target_dir")
     deployment_dir = model.get_metadata_prop("pynq_deploy_dir")
+    platform = model.get_metadata_prop("platform")
+    assert platform in ["alveo", "zynq", "zynq-iodma"]
+    bitfile = model.get_metadata_prop("bitfile")
+    bitfile = os.path.basename(bitfile)
+    if pynq_password == "":
+        if "zynq" in platform:
+            raise Exception("PYNQ board remote exec needs password for sudo")
+        else:
+            local_prefix = ""  # assume we are using an ssh key
+            warnings.warn("Empty password, make sure you've set up an ssh key")
+    else:
+        local_prefix = "sshpass -p %s " % pynq_password
+
+    if platform == "alveo":
+        # Alveo can run without sudo
+        remote_prefix = ""
+    elif "zynq" in platform:
+        # PYNQ Zynq boards need to execute with sudo
+        remote_prefix = "echo %s | sudo -S " % pynq_password
+
     inp = execution_context[model.graph.input[0].name]
     # make copy of array before saving it
     inp = inp.copy()
+    batchsize = inp.shape[0]
     np.save(os.path.join(deployment_dir, "input.npy"), inp)
     # extracting last folder of absolute path (deployment_dir)
     deployment_folder = os.path.basename(os.path.normpath(deployment_dir))
     # copy input to PYNQ board
-    cmd = "sshpass -p {} scp -P{} -r {}/input.npy {}@{}:{}/{}".format(
-        pynq_password,
+    cmd = local_prefix + "scp -P{} -r {}/input.npy {}@{}:{}/{}".format(
         pynq_port,
         deployment_dir,
         pynq_username,
@@ -60,38 +80,31 @@ def remote_exec(model, execution_context):
         deployment_folder,
     )
     bash_command = ["/bin/bash", "-c", cmd]
-    process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE)
-    process_compile.communicate()
-    # set platform attribute for correct remote execution
-    platform = model.get_metadata_prop("platform")
-    assert platform in ["alveo", "zynq", "zynq-iodma"]
+    process_scp_in = subprocess.Popen(bash_command, stdout=subprocess.PIPE)
+    process_scp_in.communicate()
+
+    # use platform attribute for correct remote execution
+    if platform == "alveo":
+        remote_cmd = "bash -ic 'bash alveo_run.sh execute %d' \"" % batchsize
+    else:
+        remote_cmd = (
+            "python3.6 driver.py --exec_mode=execute --batchsize={} "
+            "--bitfile={} --inputfile=input.npy --outputfile=output.npy "
+            '--platform={} "'
+        ).format(batchsize, bitfile, platform)
     cmd = (
-        "sshpass -p {} ssh {}@{} -p {} "
-        '"cd {}/{}; echo "{}" | '
-        'sudo -S python3.6 driver.py --exec_mode="execute" --batchsize=1" '
-        '--bitfile="resizer.bit" --inputfile="input.npy" --outputfile="output.npy" '
-        '--platform="{}" '
-    ).format(
-        pynq_password,
-        pynq_username,
-        pynq_ip,
-        pynq_port,
-        pynq_target_dir,
-        deployment_folder,
-        pynq_password,
-        platform,
-    )
+        local_prefix + 'ssh {}@{} -p {} "cd {}/{}; ' + remote_prefix + remote_cmd
+    ).format(pynq_username, pynq_ip, pynq_port, pynq_target_dir, deployment_folder)
     bash_command = ["/bin/bash", "-c", cmd]
-    process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE)
-    process_compile.communicate()
+    process_exec_accel = subprocess.Popen(bash_command, stdout=subprocess.PIPE)
+    process_exec_accel.communicate()
     # remove stale output file from local dir, if any
     try:
         os.remove("{}/output.npy".format(deployment_dir))
     except FileNotFoundError:
         pass
     # copy generated output to local
-    cmd = "sshpass -p {} scp -P{} {}@{}:{}/{}/output.npy {}".format(
-        pynq_password,
+    cmd = local_prefix + "scp -P{} {}@{}:{}/{}/output.npy {}".format(
         pynq_port,
         pynq_username,
         pynq_ip,
@@ -100,7 +113,7 @@ def remote_exec(model, execution_context):
         deployment_dir,
     )
     bash_command = ["/bin/bash", "-c", cmd]
-    process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE)
-    process_compile.communicate()
+    process_scp_out = subprocess.Popen(bash_command, stdout=subprocess.PIPE)
+    process_scp_out.communicate()
     outp = np.load("{}/output.npy".format(deployment_dir))
     execution_context[model.graph.output[0].name] = outp
diff --git a/src/finn/core/throughput_test.py b/src/finn/core/throughput_test.py
index fbfe775e581e063b08e34b3096fd34f412b47d11..77c7b6c00179343048c52494fbffacd4c7447d7f 100644
--- a/src/finn/core/throughput_test.py
+++ b/src/finn/core/throughput_test.py
@@ -29,12 +29,12 @@
 import os
 import subprocess
 import numpy as np
-
+import warnings
 from finn.util.basic import gen_finn_dt_tensor
 from finn.core.rtlsim_exec import rtlsim_exec
 
 
-def throughput_test(model, batchsize=1000):
+def throughput_test_remote(model, batchsize=1000):
     """Runs the throughput test for the given model remotely on the pynq board.
     The metadata properties related to the pynq board have to be set.
     Returns a dictionary with results of the throughput test. Returns None
@@ -48,24 +48,41 @@ def throughput_test(model, batchsize=1000):
     deployment_dir = model.get_metadata_prop("pynq_deploy_dir")
     # extracting last folder of absolute path (deployment_dir)
     deployment_folder = os.path.basename(os.path.normpath(deployment_dir))
+    platform = model.get_metadata_prop("platform")
+    assert platform in ["alveo", "zynq", "zynq-iodma"]
+    bitfile = model.get_metadata_prop("bitfile")
+    bitfile = os.path.basename(bitfile)
+    if pynq_password == "":
+        if "zynq" in platform:
+            raise Exception("PYNQ board remote exec needs password for sudo")
+        else:
+            local_prefix = ""  # assume we are using an ssh key
+            warnings.warn("Empty password, make sure you've set up an ssh key")
+    else:
+        local_prefix = "sshpass -p %s " % pynq_password
+
+    if platform == "alveo":
+        # Alveo can run without sudo but needs correct environment
+        remote_prefix = "conda activate finn-pynq-alveo; "
+    elif "zynq" in platform:
+        # PYNQ Zynq boards need to execute with sudo
+        remote_prefix = "echo %s | sudo -S " % pynq_password
 
+    # use platform attribute for correct remote execution
+    if platform == "alveo":
+        remote_cmd = "bash -ic 'bash alveo_run.sh throughput_test %d' \"" % batchsize
+    else:
+        remote_cmd = (
+            "python3.6 driver.py --exec_mode=throughput_test --batchsize={} "
+            "--bitfile={} --inputfile=input.npy --outputfile=output.npy "
+            '--platform={} "'
+        ).format(batchsize, bitfile, platform)
     cmd = (
-        "sshpass -p {} ssh {}@{} -p {} "
-        '"cd {}/{}; echo "{}" | '
-        'sudo -S python3.6 driver.py --exec_mode="throughput_test" --batchsize=%d"'
-        % batchsize
-    ).format(
-        pynq_password,
-        pynq_username,
-        pynq_ip,
-        pynq_port,
-        pynq_target_dir,
-        deployment_folder,
-        pynq_password,
-    )
+        local_prefix + 'ssh {}@{} -p {} "cd {}/{}; ' + remote_prefix + remote_cmd
+    ).format(pynq_username, pynq_ip, pynq_port, pynq_target_dir, deployment_folder)
     bash_command = ["/bin/bash", "-c", cmd]
-    process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE)
-    process_compile.communicate()
+    process_throughput_test = subprocess.Popen(bash_command, stdout=subprocess.PIPE)
+    process_throughput_test.communicate()
 
     # remove any pre-existing metrics file
     try:
@@ -73,8 +90,7 @@ def throughput_test(model, batchsize=1000):
     except FileNotFoundError:
         pass
 
-    cmd = "sshpass -p {} scp -P{} {}@{}:{}/{}/nw_metrics.txt {}".format(
-        pynq_password,
+    cmd = local_prefix + "scp -P{} {}@{}:{}/{}/nw_metrics.txt {}".format(
         pynq_port,
         pynq_username,
         pynq_ip,
diff --git a/src/finn/custom_op/debugmarker.py b/src/finn/custom_op/debugmarker.py
new file mode 100644
index 0000000000000000000000000000000000000000..6c02f0dc81295dfc5c3060d549b6853eac1d0bac
--- /dev/null
+++ b/src/finn/custom_op/debugmarker.py
@@ -0,0 +1,66 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from finn.custom_op import CustomOp
+from onnx import helper
+
+
+class DebugMarker(CustomOp):
+    def get_nodeattr_types(self):
+        return {"export_debug_name": ("s", True, "")}
+
+    def make_shape_compatible_op(self, model):
+        node = self.onnx_node
+        return helper.make_node("Identity", [node.input[0]], [node.output[0]])
+
+    def infer_node_datatype(self, model):
+        node = self.onnx_node
+        # data type stays the same
+        dtype = model.get_tensor_datatype(node.input[0])
+        model.set_tensor_datatype(node.output[0], dtype)
+        # create quantization annotation for debug marker
+        model.set_tensor_datatype(self.get_nodeattr("export_debug_name"), dtype)
+
+    def execute_node(self, context, graph):
+        node = self.onnx_node
+        inp_name = node.input[0]
+        out_name = node.output[0]
+        inp = context[inp_name]
+        context[out_name] = inp
+        # insert debug marker output as separate tensor
+        context[self.get_nodeattr("export_debug_name")] = inp
+
+    def verify_node(self):
+        info_messages = []
+        # verify that "domain" is set to "finn"
+        domain_value = self.onnx_node.domain
+        if domain_value == "finn":
+            info_messages.append("Attribute domain is set correctly")
+        else:
+            info_messages.append('Attribute domain should be set to "finn"')
+        return info_messages
diff --git a/src/finn/custom_op/fpgadataflow/vector_vector_activate_batch.py b/src/finn/custom_op/fpgadataflow/vector_vector_activate_batch.py
index 942e4b25700d0c52c1bc5bcd81614a058342f178..c7c08d081a04ff72ae2a198e65091d042bd8d599 100644
--- a/src/finn/custom_op/fpgadataflow/vector_vector_activate_batch.py
+++ b/src/finn/custom_op/fpgadataflow/vector_vector_activate_batch.py
@@ -96,7 +96,7 @@ class Vector_Vector_Activate_Batch(HLSCustomOp):
 
     def get_instream_width(self):
         i_bits = self.get_input_datatype().bitwidth()
-        in_width = i_bits * self.get_nodeattr("Channels")
+        in_width = i_bits * self.get_nodeattr("PE")
         return in_width
 
     def get_outstream_width(self):
diff --git a/src/finn/custom_op/registry.py b/src/finn/custom_op/registry.py
index 0cc0e53eaebd94d5e2cd0e030bc107da098e4931..ecf2a711f17ac35c9bf8cb081fb4dc6d9bb6c01e 100644
--- a/src/finn/custom_op/registry.py
+++ b/src/finn/custom_op/registry.py
@@ -57,6 +57,7 @@ from finn.custom_op.fpgadataflow.vector_vector_activate_batch import (
 )
 from finn.custom_op.fpgadataflow.channelwise_op_batch import ChannelwiseOp_Batch
 from finn.custom_op.fpgadataflow.iodma import IODMA
+from finn.custom_op.debugmarker import DebugMarker
 
 # create a mapping of all known CustomOp names and classes
 custom_op = {}
@@ -84,6 +85,7 @@ custom_op["DuplicateStreams_Batch"] = DuplicateStreams_Batch
 custom_op["Vector_Vector_Activate_Batch"] = Vector_Vector_Activate_Batch
 custom_op["ChannelwiseOp_Batch"] = ChannelwiseOp_Batch
 custom_op["IODMA"] = IODMA
+custom_op["DebugMarker"] = DebugMarker
 
 
 def getCustomOp(node):
diff --git a/src/finn/transformation/fpgadataflow/annotate_resources.py b/src/finn/transformation/fpgadataflow/annotate_resources.py
index da6fa1ff738690308a9b7686a5c92d7395ab50c8..d6ff058848700b50dadb7a6ed0ff6c07b7eeb4a3 100644
--- a/src/finn/transformation/fpgadataflow/annotate_resources.py
+++ b/src/finn/transformation/fpgadataflow/annotate_resources.py
@@ -41,6 +41,7 @@ class AnnotateResources(Transformation):
     node as an attribute on the node, depending on the mode parameter:
     * 'estimate' -- use the analytical estimation model
     * 'hls' -- use results from the HLS synthesis report
+    * 'synth' -- use post-synthesis (Vivado or Vitis) report
 
     No annotations can be provided unless the relevant transformation for the
     chosen mode (e.g. HLSSynthIP for hls) was previously run.
@@ -99,4 +100,7 @@ class AnnotateResources(Transformation):
             if "efficiency" in k:
                 total_dict[k] = total_dict[k] / len(graph.node)
         model.set_metadata_prop("res_total_" + self.mode, str(total_dict))
+        if "(top)" in self.res_dict.keys():
+            top_dict = self.res_dict["(top)"]
+            model.set_metadata_prop("res_total_top_" + self.mode, str(top_dict))
         return (model, False)
diff --git a/src/finn/transformation/fpgadataflow/make_deployment.py b/src/finn/transformation/fpgadataflow/make_deployment.py
index a4c751b844a2796447930eb74adad6321454ac09..2880e4aba20564f50a0acdff5e8c728714c84b5c 100644
--- a/src/finn/transformation/fpgadataflow/make_deployment.py
+++ b/src/finn/transformation/fpgadataflow/make_deployment.py
@@ -26,6 +26,7 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import warnings
 import os
 import subprocess
 from distutils.dir_util import copy_tree
@@ -33,6 +34,7 @@ from shutil import copy
 
 from finn.transformation import Transformation
 from finn.util.basic import make_build_dir
+import finn.transformation.fpgadataflow.templates as templates
 
 
 class DeployToPYNQ(Transformation):
@@ -64,35 +66,54 @@ class DeployToPYNQ(Transformation):
 
         # get and copy necessary files
         # .bit and .hwh file
-        vivado_pynq_proj = model.get_metadata_prop("vivado_pynq_proj")
-        for file in os.listdir(vivado_pynq_proj):
-            if file.endswith(".bit"):
-                bitfile = os.path.join(vivado_pynq_proj, file)
-                copy(bitfile, deployment_dir)
-            elif file.endswith(".hwh"):
-                hwhfile = os.path.join(vivado_pynq_proj, file)
-                copy(hwhfile, deployment_dir)
+        bitfile = model.get_metadata_prop("bitfile")
+        hwh_file = model.get_metadata_prop("hw_handoff")
+        deploy_files = [bitfile, hwh_file]
+
+        for dfile in deploy_files:
+            if dfile is not None:
+                copy(dfile, deployment_dir)
+
+        # helper script for Alveo
+        platform = model.get_metadata_prop("platform")
+        if platform == "alveo":
+            alveo_run_sh = templates.alveo_run_sh_template
+            fill_dict = {
+                "$REMOTE_DEPLOY_DIR$": self.target_dir
+                + "/"
+                + os.path.basename(deployment_dir),
+                "$CONDA_ENV_NAME$": "finn-pynq-alveo",
+                "$REMOTE_XRT$": os.environ["XILINX_XRT"],
+                "$REMOTE_PLATFORM_REPO_PATHS$": os.environ["PLATFORM_REPO_PATHS"],
+                "$BITFILE$": os.path.basename(bitfile),
+            }
+            for key, value in fill_dict.items():
+                alveo_run_sh = alveo_run_sh.replace(key, value)
+            alveo_run_sh_path = deployment_dir + "/alveo_run.sh"
+            with open(alveo_run_sh_path, "w") as f:
+                f.write(alveo_run_sh)
 
         # driver.py and python libraries
         pynq_driver_dir = model.get_metadata_prop("pynq_driver_dir")
         copy_tree(pynq_driver_dir, deployment_dir)
         model.set_metadata_prop("pynq_deploy_dir", deployment_dir)
         model.set_metadata_prop("exec_mode", "remote_pynq")
+        if self.password == "":
+            prefix = ""  # assume we are using an ssh key
+            warnings.warn("Empty password, make sure you've set up an ssh key")
+        else:
+            prefix = "sshpass -p %s " % self.password
+
         # create target directory on PYNQ board
-        cmd = 'sshpass -p {} ssh {}@{} -p {} "mkdir -p {}"'.format(
-            self.password, self.username, self.ip, self.port, self.target_dir
+        cmd = prefix + 'ssh {}@{} -p {} "mkdir -p {}"'.format(
+            self.username, self.ip, self.port, self.target_dir
         )
         bash_command = ["/bin/bash", "-c", cmd]
         process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE)
         process_compile.communicate()
         # copy directory to PYNQ board using scp and sshpass
-        cmd = "sshpass -p {} scp -P{} -r {} {}@{}:{}".format(
-            self.password,
-            self.port,
-            deployment_dir,
-            self.username,
-            self.ip,
-            self.target_dir,
+        cmd = prefix + "scp -P{} -r {} {}@{}:{}".format(
+            self.port, deployment_dir, self.username, self.ip, self.target_dir,
         )
         bash_command = ["/bin/bash", "-c", cmd]
         process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE)
diff --git a/src/finn/transformation/fpgadataflow/make_zynq_proj.py b/src/finn/transformation/fpgadataflow/make_zynq_proj.py
index 095327be0d3c36f201bcf343d8aea61aa069b8e1..b79850abad8f63ade326103764f6d62121f401cc 100644
--- a/src/finn/transformation/fpgadataflow/make_zynq_proj.py
+++ b/src/finn/transformation/fpgadataflow/make_zynq_proj.py
@@ -249,9 +249,7 @@ class MakeZYNQProject(Transformation):
         deploy_bitfile_name = vivado_pynq_proj_dir + "/resizer.bit"
         copy(bitfile_name, deploy_bitfile_name)
         # set bitfile attribute
-        model.set_metadata_prop("vivado_pynq_bitfile", deploy_bitfile_name)
-        # set platform attribute for correct remote execution
-        model.set_metadata_prop("platform", "zynq-iodma")
+        model.set_metadata_prop("bitfile", deploy_bitfile_name)
         hwh_name = (
             vivado_pynq_proj_dir
             + "/finn_zynq_link.srcs/sources_1/bd/top/hw_handoff/top.hwh"
@@ -260,6 +258,7 @@ class MakeZYNQProject(Transformation):
             raise Exception("Synthesis failed, no hardware handoff file found")
         deploy_hwh_name = vivado_pynq_proj_dir + "/resizer.hwh"
         copy(hwh_name, deploy_hwh_name)
+        model.set_metadata_prop("hw_handoff", deploy_hwh_name)
         # filename for the synth utilization report
         synth_report_filename = vivado_pynq_proj_dir + "/synth_report.xml"
         model.set_metadata_prop("vivado_synth_rpt", synth_report_filename)
@@ -311,9 +310,12 @@ class ZynqBuild(Transformation):
                     self.fpga_part, self.period_ns, sdp_node.onnx_node.name, True
                 )
             )
+            kernel_model.set_metadata_prop("platform", "zynq-iodma")
             kernel_model.save(dataflow_model_filename)
         # Assemble design from IPs
         model = model.transform(
             MakeZYNQProject(self.platform, enable_debug=self.enable_debug)
         )
+        # set platform attribute for correct remote execution
+        model.set_metadata_prop("platform", "zynq-iodma")
         return (model, False)
diff --git a/src/finn/transformation/fpgadataflow/synth_pynq_proj.py b/src/finn/transformation/fpgadataflow/synth_pynq_proj.py
index d7f73a7fe3dfcd0fef314304fe939623e577ac20..7f9bbaf5665f872d4393fa173b60b2cc3a6655e9 100644
--- a/src/finn/transformation/fpgadataflow/synth_pynq_proj.py
+++ b/src/finn/transformation/fpgadataflow/synth_pynq_proj.py
@@ -50,8 +50,7 @@ class SynthPYNQProject(Transformation):
         process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE)
         process_compile.communicate()
         # set bitfile attribute
-        model.set_metadata_prop(
-            "vivado_pynq_bitfile", vivado_pynq_proj_dir + "/resizer.bit"
-        )
+        model.set_metadata_prop("bitfile", vivado_pynq_proj_dir + "/resizer.bit")
+        model.set_metadata_prop("hw_handoff", vivado_pynq_proj_dir + "/resizer.hwh")
         # TODO pull out synthesis statistics and put them in as attributes
         return (model, False)
diff --git a/src/finn/transformation/fpgadataflow/templates.py b/src/finn/transformation/fpgadataflow/templates.py
index 3bd74ec6a2071db820a35a9440eedd74092354e1..1c449919fe64e33ba3c512adab9b52dd1f651628 100644
--- a/src/finn/transformation/fpgadataflow/templates.py
+++ b/src/finn/transformation/fpgadataflow/templates.py
@@ -124,15 +124,14 @@ class FINNAccelDriver():
         self.ol = Overlay(bitfile)
         # neuron folding factor of output = iterations per sample
         self.itersPerSample = self.oshape_packed[-2]
+        # clock frequency as specified by user
+        self.fclk_mhz = $CLOCK_FREQ_MHZ$
         if self.platform == "zynq":
-            # clock frequency
-            self.fclk_mhz = $CLOCK_FREQ_MHZ$
             # set the clock frequency as specified by user during transformations
             if self.fclk_mhz > 0:
                 Clocks.$CLK_NAME$ = self.fclk_mhz
             self.dma = self.ol.axi_dma_0
             self.ctrl_regs = self.ol.resize_accel_0
-
             # AXI lite register offset for number of iterations
             # used by TLastMarker to signal end of transmission for AXI CDMA
             self.REG_OFFSET_NUM_ITERS = 0x10
@@ -144,8 +143,6 @@ class FINNAccelDriver():
         elif self.platform == "zynq-iodma":
             self.idma = self.ol.idma0
             self.odma = self.ol.odma0
-            # clock frequency
-            self.fclk_mhz = $CLOCK_FREQ_MHZ$
             # set the clock frequency as specified by user during transformations
             if self.fclk_mhz > 0:
                 Clocks.$CLK_NAME$ = self.fclk_mhz
@@ -191,6 +188,14 @@ class FINNAccelDriver():
     def copy_input_data_to_device(self, data):
         \"\"\"Copies given input data to PYNQ buffer.\"\"\"
         np.copyto(self.ibuf_packed_device, data)
+        if self.platform == "alveo":
+            self.ibuf_packed_device.sync_to_device()
+
+    def copy_output_data_from_device(self, data):
+        \"\"\"Copies PYNQ output buffer from device.\"\"\"
+        if self.platform == "alveo":
+            self.obuf_packed_device.sync_from_device()
+        np.copyto(data, self.obuf_packed_device)
 
     def execute(self):
         \"\"\"Executes accelerator by setting up the DMA(s) and
@@ -214,14 +219,10 @@ class FINNAccelDriver():
             status = self.odma.read(0x00)
             while status & 0x2 == 0:
                 status = self.odma.read(0x00)
-
         elif self.platform == "alveo":
-            self.ibuf_packed_device.sync_to_device()
-            self.idma.start(self.ibuf_packed_device, self.N)
-            self.odma.start(self.obuf_packed_device, self.N)
-            self.idma.wait()
-            self.odma.wait()
-            self.obuf_packed_device.sync_from_device()
+            idma_handle = self.idma.start_sw(self.ibuf_packed_device, self.N)
+            odma_handle = self.odma.start_sw(self.obuf_packed_device, self.N)
+            odma_handle.wait()
 
 
 
@@ -285,7 +286,10 @@ if __name__ == "__main__":
         res["throughput[images/s]"] = N / runtime
         res["DRAM_in_bandwidth[Mb/s]"] = np.prod(finnDriver.ishape_packed)*0.000001 / runtime
         res["DRAM_out_bandwidth[Mb/s]"] = np.prod(finnDriver.oshape_packed)*0.000001 / runtime
-        res["fclk[mhz]"] = Clocks.fclk0_mhz
+        if platform != "alveo":
+            res["fclk[mhz]"] = Clocks.fclk0_mhz
+        else:
+            res["fclk[mhz]"] = finnDriver.fclk_mhz
         res["N"] = N
         file = open("nw_metrics.txt", "w")
         file.write(str(res))
@@ -293,7 +297,9 @@ if __name__ == "__main__":
 
     # if execution is selected unpack, unfold and save output to output npy file
     else:
-        obuf_folded = finnDriver.unpack_output(finnDriver.obuf_packed_device)
+        obuf_packed = np.empty_like(finnDriver.obuf_packed_device)
+        finnDriver.copy_output_data_from_device(obuf_packed)
+        obuf_folded = finnDriver.unpack_output(obuf_packed)
         obuf_normal = finnDriver.unfold_output(obuf_folded)
         np.save(outputfile, obuf_normal)
 
@@ -422,3 +428,25 @@ wait_on_run [get_runs impl_1]
 open_run synth_1 -name synth_1
 report_utilization -hierarchical -hierarchical_depth 4 -file synth_report.xml -format xml
 """
+
+alveo_run_sh_template = """#!/bin/bash
+
+if [ "$#" -ne 2 ]; then
+    echo "Usage: alveo_run.sh <exec_mode={execute, throughput_test}> <batch_size>"
+    exit -1
+fi
+
+cd $REMOTE_DEPLOY_DIR$
+eval "$(conda shell.bash hook)"
+conda activate $CONDA_ENV_NAME$
+source $REMOTE_XRT$/setup.sh
+export PLATFORM_REPO_PATHS=$REMOTE_PLATFORM_REPO_PATHS$
+python3.6 driver.py --exec_mode=$1 --batchsize=$2 --bitfile=$BITFILE$ \
+    --inputfile=input.npy --outputfile=output.npy --platform=alveo
+"""
+
+vitis_gen_xml_report_tcl_template = """
+open_project $VITIS_PROJ_PATH$/_x/link/vivado/vpl/prj/prj.xpr
+open_run impl_1
+report_utilization -hierarchical -hierarchical_depth 5 -file $VITIS_PROJ_PATH$/synth_report.xml -format xml
+"""
diff --git a/src/finn/transformation/fpgadataflow/vitis_build.py b/src/finn/transformation/fpgadataflow/vitis_build.py
index 2df58c537250c102ee85a685fc32904ee879e38f..047480897b435a3de0f9746e52aa7c3eb634385c 100644
--- a/src/finn/transformation/fpgadataflow/vitis_build.py
+++ b/src/finn/transformation/fpgadataflow/vitis_build.py
@@ -51,6 +51,8 @@ from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver
 from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
 from finn.util.basic import make_build_dir
 from finn.transformation.infer_data_layouts import InferDataLayouts
+from . import templates
+from enum import Enum
 
 
 def _check_vitis_envvars():
@@ -63,6 +65,17 @@ def _check_vitis_envvars():
     ), "XILINX_XRT must be set for Vitis, ensure the XRT env is sourced"
 
 
+class VitisOptStrategy(Enum):
+    "Values applicable to VitisBuild optimization strategy."
+
+    DEFAULT = "0"
+    POWER = "1"
+    PERFORMANCE = "2"
+    PERFORMANCE_BEST = "3"
+    SIZE = "s"
+    BUILD_SPEED = "quick"
+
+
 class CreateVitisXO(Transformation):
     """Create a Vitis object file from a stitched FINN ip.
 
@@ -160,14 +173,15 @@ class CreateVitisXO(Transformation):
 class VitisLink(Transformation):
     """Create an XCLBIN with Vitis.
 
-    Outcome if successful: sets the vitis_xclbin attribute in the ONNX
+    Outcome if successful: sets the bitfile attribute in the ONNX
     ModelProto's metadata_props field with the XCLBIN full path as value.
     """
 
-    def __init__(self, platform, f_mhz=200):
+    def __init__(self, platform, f_mhz=200, strategy=VitisOptStrategy.PERFORMANCE):
         super().__init__()
         self.platform = platform
         self.f_mhz = f_mhz
+        self.strategy = strategy
 
     def apply(self, model):
         _check_vitis_envvars()
@@ -238,6 +252,12 @@ class VitisLink(Transformation):
         with open(link_dir + "/config.txt", "w") as f:
             f.write(config)
 
+        # create tcl script to generate resource report in XML format
+        gen_rep_xml = templates.vitis_gen_xml_report_tcl_template
+        gen_rep_xml = gen_rep_xml.replace("$VITIS_PROJ_PATH$", link_dir)
+        with open(link_dir + "/gen_report_xml.tcl", "w") as f:
+            f.write(gen_rep_xml)
+
         # create a shell script and call Vitis
         script = link_dir + "/run_vitis_link.sh"
         working_dir = os.environ["PWD"]
@@ -246,9 +266,14 @@ class VitisLink(Transformation):
             f.write("cd {}\n".format(link_dir))
             f.write(
                 "v++ -t hw --platform %s --link %s"
-                " --kernel_frequency %d --config config.txt --optimize 2"
+                " --kernel_frequency %d --config config.txt --optimize %s"
                 " --save-temps -R2\n"
-                % (self.platform, " ".join(object_files), self.f_mhz)
+                % (
+                    self.platform,
+                    " ".join(object_files),
+                    self.f_mhz,
+                    self.strategy.value,
+                )
             )
             f.write("cd {}\n".format(working_dir))
         bash_command = ["bash", script]
@@ -259,18 +284,38 @@ class VitisLink(Transformation):
         assert os.path.isfile(xclbin), (
             "Vitis .xclbin file not created, check logs under %s" % link_dir
         )
-        model.set_metadata_prop("vitis_xclbin", xclbin)
+        model.set_metadata_prop("bitfile", xclbin)
+
+        # run Vivado to gen xml report
+        gen_rep_xml_sh = link_dir + "/gen_report_xml.sh"
+        working_dir = os.environ["PWD"]
+        with open(gen_rep_xml_sh, "w") as f:
+            f.write("#!/bin/bash \n")
+            f.write("cd {}\n".format(link_dir))
+            f.write(
+                "vivado -mode tcl -source %s\n" % (link_dir + "/gen_report_xml.tcl")
+            )
+            f.write("cd {}\n".format(working_dir))
+        bash_command = ["bash", gen_rep_xml_sh]
+        process_genxml = subprocess.Popen(bash_command, stdout=subprocess.PIPE)
+        process_genxml.communicate()
+        # filename for the synth utilization report
+        synth_report_filename = link_dir + "/synth_report.xml"
+        model.set_metadata_prop("vivado_synth_rpt", synth_report_filename)
         return (model, False)
 
 
 class VitisBuild(Transformation):
     """Best-effort attempt at building the accelerator with Vitis."""
 
-    def __init__(self, fpga_part, period_ns, platform):
+    def __init__(
+        self, fpga_part, period_ns, platform, strategy=VitisOptStrategy.PERFORMANCE
+    ):
         super().__init__()
         self.fpga_part = fpga_part
         self.period_ns = period_ns
         self.platform = platform
+        self.strategy = strategy
 
     def apply(self, model):
         _check_vitis_envvars()
@@ -313,9 +358,14 @@ class VitisBuild(Transformation):
             kernel_model = kernel_model.transform(
                 CreateVitisXO(sdp_node.onnx_node.name)
             )
+            kernel_model.set_metadata_prop("platform", "alveo")
             kernel_model.save(dataflow_model_filename)
         # Assemble design from kernels
-        model = model.transform(VitisLink(self.platform, round(1000 / self.period_ns)))
+        model = model.transform(
+            VitisLink(
+                self.platform, round(1000 / self.period_ns), strategy=self.strategy
+            )
+        )
         # set platform attribute for correct remote execution
         model.set_metadata_prop("platform", "alveo")
 
diff --git a/src/finn/util/pytorch.py b/src/finn/util/pytorch.py
new file mode 100644
index 0000000000000000000000000000000000000000..8332757cab839c8ce2fe7afa2449da5782d1aea3
--- /dev/null
+++ b/src/finn/util/pytorch.py
@@ -0,0 +1,81 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+import torch
+
+from torch.nn import Module, Sequential
+from brevitas.quant_tensor import QuantTensor
+
+
+class Normalize(Module):
+    def __init__(self, mean, std, channels):
+        super(Normalize, self).__init__()
+
+        self.mean = mean
+        self.std = std
+        self.channels = channels
+
+    def forward(self, x):
+        x = x - torch.tensor(self.mean, device=x.device).reshape(1, self.channels, 1, 1)
+        x = x / self.std
+        return x
+
+
+class ToTensor(Module):
+    def __init__(self):
+        super(ToTensor, self).__init__()
+
+    def forward(self, x):
+        x = x / 255
+        return x
+
+
+class NormalizePreProc(Module):
+    def __init__(self, mean, std, channels):
+        super(NormalizePreProc, self).__init__()
+        self.features = Sequential()
+        scaling = ToTensor()
+        self.features.add_module("scaling", scaling)
+        normalize = Normalize(mean, std, channels)
+        self.features.add_module("normalize", normalize)
+
+    def forward(self, x):
+        return self.features(x)
+
+
+class BrevitasDebugHook:
+    def __init__(self):
+        self.outputs = {}
+
+    def __call__(self, module, module_in, module_out):
+        tensor = module_out
+        if isinstance(module_out, QuantTensor):
+            tensor = module_out[0]
+        self.outputs[module.export_debug_name] = tensor.detach().numpy()
+
+    def clear(self):
+        self.outputs = {}
diff --git a/tests/brevitas/test_brevitas_debug.py b/tests/brevitas/test_brevitas_debug.py
new file mode 100644
index 0000000000000000000000000000000000000000..cb7bb5a16a76e37275ab267c7bf90a4409a8769d
--- /dev/null
+++ b/tests/brevitas/test_brevitas_debug.py
@@ -0,0 +1,79 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from pkgutil import get_data
+
+import os
+import brevitas.onnx as bo
+import numpy as np
+import onnx
+import onnx.numpy_helper as nph
+import torch
+
+import finn.core.onnx_exec as oxe
+from finn.core.modelwrapper import ModelWrapper
+from finn.transformation.fold_constants import FoldConstants
+from finn.transformation.general import RemoveStaticGraphInputs
+from finn.transformation.infer_shapes import InferShapes
+from finn.util.test import get_test_model_trained
+from finn.util.pytorch import BrevitasDebugHook
+
+
+def test_brevitas_debug():
+    finn_onnx = "test_brevitas_debug.onnx"
+    fc = get_test_model_trained("TFC", 2, 2)
+    dbg_hook = BrevitasDebugHook()
+    bo.enable_debug(fc, dbg_hook)
+    bo.export_finn_onnx(fc, (1, 1, 28, 28), finn_onnx)
+    model = ModelWrapper(finn_onnx)
+    model = model.transform(InferShapes())
+    model = model.transform(FoldConstants())
+    model = model.transform(RemoveStaticGraphInputs())
+    assert len(model.graph.input) == 1
+    assert len(model.graph.output) == 1
+    # load one of the test vectors
+    raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb")
+    input_tensor = onnx.load_tensor_from_string(raw_i)
+    # run using FINN-based execution
+    input_dict = {"0": nph.to_array(input_tensor)}
+    output_dict = oxe.execute_onnx(model, input_dict, return_full_exec_context=True)
+    produced = output_dict[model.graph.output[0].name]
+    # run using PyTorch/Brevitas
+    input_tensor = torch.from_numpy(nph.to_array(input_tensor)).float()
+    assert input_tensor.shape == (1, 1, 28, 28)
+    # do forward pass in PyTorch/Brevitas
+    expected = fc.forward(input_tensor).detach().numpy()
+    assert np.isclose(produced, expected, atol=1e-3).all()
+    # check all tensors at debug markers
+    names_brevitas = set(dbg_hook.outputs.keys())
+    names_finn = set(output_dict.keys())
+    names_common = names_brevitas.intersection(names_finn)
+    assert len(names_common) == 8
+    for dbg_name in names_common:
+        assert (dbg_hook.outputs[dbg_name] == output_dict[dbg_name]).all()
+    os.remove(finn_onnx)
diff --git a/tests/end2end/test_vitis_end2end_cnv_w1a1.py b/tests/end2end/test_vitis_end2end_cnv_w1a1.py
new file mode 100644
index 0000000000000000000000000000000000000000..106591aca21363f1246c43b9b24461d838293993
--- /dev/null
+++ b/tests/end2end/test_vitis_end2end_cnv_w1a1.py
@@ -0,0 +1,269 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import pytest
+import numpy as np
+
+# as of Feb'20 there is a bug that segfaults ONNX shape inference if we
+# import pytorch before onnx, so we make sure to import onnx first
+import onnx  # NOQA
+import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
+import finn.transformation.streamline.absorb as absorb
+from finn.core.onnx_exec import execute_onnx
+from finn.custom_op.registry import getCustomOp
+from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
+from finn.transformation.fold_constants import FoldConstants
+
+from finn.transformation.fpgadataflow.create_dataflow_partition import (
+    CreateDataflowPartition,
+)
+from finn.transformation.fpgadataflow.make_deployment import DeployToPYNQ
+from finn.transformation.general import (
+    RemoveUnusedTensors,
+    RemoveStaticGraphInputs,
+    GiveReadableTensorNames,
+    GiveUniqueNodeNames,
+)
+from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.streamline import Streamline
+from finn.util.basic import alveo_part_map, alveo_default_platform
+from finn.util.test import get_test_model_trained, load_test_checkpoint_or_skip
+from finn.transformation.fpgadataflow.vitis_build import VitisBuild, VitisOptStrategy
+import pkg_resources as pk
+from finn.transformation.double_to_single_float import DoubleToSingleFloat
+from finn.transformation.move_reshape import RemoveCNVtoFCFlatten
+from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
+from finn.transformation.streamline.reorder import MakeMaxPoolNHWC
+from finn.transformation.infer_data_layouts import InferDataLayouts
+from finn.transformation.fpgadataflow.annotate_cycles import AnnotateCycles
+from finn.transformation.fpgadataflow.annotate_resources import AnnotateResources
+import warnings
+
+build_dir = "/tmp/" + os.environ["FINN_INST_NAME"]
+test_alveo_board = os.getenv("ALVEO_BOARD", default="U250")
+test_fpga_part = alveo_part_map[test_alveo_board]
+test_platform = alveo_default_platform[test_alveo_board]
+target_clk_ns = 10
+mem_mode = "decoupled"
+
+
+def test_end2end_vitis_cnv_w1a1_export():
+    import brevitas.onnx as bo
+
+    tfc = get_test_model_trained("CNV", 1, 1)
+    bo.export_finn_onnx(
+        tfc, (1, 3, 32, 32), build_dir + "/end2end_vitis_cnv_w1a1_export.onnx"
+    )
+
+
+def test_end2end_vitis_cnv_w1a1_import_and_tidy():
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_vitis_cnv_w1a1_export.onnx"
+    )
+    model = model.transform(DoubleToSingleFloat())
+    model = model.transform(InferShapes())
+    model = model.transform(FoldConstants())
+    model = model.transform(GiveUniqueNodeNames())
+    model = model.transform(GiveReadableTensorNames())
+    model = model.transform(RemoveStaticGraphInputs())
+    model.save(build_dir + "/end2end_vitis_cnv_w1a1_tidy.onnx")
+
+
+def test_end2end_vitis_cnv_w1a1_streamline():
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_vitis_cnv_w1a1_tidy.onnx"
+    )
+    model = model.transform(Streamline())
+    model = model.transform(LowerConvsToMatMul())
+    model = model.transform(MakeMaxPoolNHWC())
+    model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
+    model = model.transform(ConvertBipolarMatMulToXnorPopcount())
+    model = model.transform(Streamline())
+    model = model.transform(RemoveUnusedTensors())
+    model.save(build_dir + "/end2end_vitis_cnv_w1a1_streamlined.onnx")
+
+
+def test_end2end_vitis_cnv_w1a1_convert_to_hls_layers():
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_vitis_cnv_w1a1_streamlined.onnx"
+    )
+    model = model.transform(to_hls.InferBinaryStreamingFCLayer(mem_mode))
+    model = model.transform(to_hls.InferQuantizedStreamingFCLayer(mem_mode))
+    model = model.transform(to_hls.InferConvInpGen())
+    model = model.transform(to_hls.InferStreamingMaxPool())
+    model = model.transform(GiveUniqueNodeNames())
+    model = model.transform(RemoveCNVtoFCFlatten())
+    model = model.transform(InferDataLayouts())
+    model.save(build_dir + "/end2end_vitis_cnv_w1a1_hls_layers.onnx")
+
+
+def test_end2end_vitis_cnv_w1a1_create_dataflow_partition():
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_vitis_cnv_w1a1_hls_layers.onnx"
+    )
+    parent_model = model.transform(CreateDataflowPartition())
+    parent_model.save(build_dir + "/end2end_vitis_cnv_w1a1_dataflow_parent.onnx")
+    sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
+    sdp_node = getCustomOp(sdp_node)
+    dataflow_model_filename = sdp_node.get_nodeattr("model")
+    dataflow_model = load_test_checkpoint_or_skip(dataflow_model_filename)
+    dataflow_model.save(build_dir + "/end2end_vitis_cnv_w1a1_dataflow_model.onnx")
+
+
+def test_end2end_vitis_cnv_w1a1_fold():
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_vitis_cnv_w1a1_dataflow_model.onnx"
+    )
+    fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch")
+    # each tuple is (PE, SIMD, in_fifo_depth) for a layer
+    folding = [
+        (16, 3, 256),
+        (32, 32, 256),
+        (16, 32, 256),
+        (16, 32, 256),
+        (4, 32, 214),
+        (1, 32, 2),
+        (1, 4, 126),
+        (1, 8, 62),
+        (5, 1, 6),
+    ]
+    for fcl, (pe, simd, ififodepth) in zip(fc_layers, folding):
+        fcl_inst = getCustomOp(fcl)
+        fcl_inst.set_nodeattr("PE", pe)
+        fcl_inst.set_nodeattr("SIMD", simd)
+        fcl_inst.set_nodeattr("inFIFODepth", ififodepth)
+
+    swg_layers = model.get_nodes_by_op_type("ConvolutionInputGenerator")
+    swg_idepth = [2, 51, 9, 106, 2, 2]
+    for i in range(len(swg_layers)):
+        swg_inst = getCustomOp(swg_layers[i])
+        simd = folding[i][1]
+        swg_inst.set_nodeattr("SIMD", simd)
+        swg_inst.set_nodeattr("inFIFODepth", swg_idepth[i])
+    model = model.transform(AnnotateResources("estimate"))
+    model = model.transform(AnnotateCycles())
+    model.save(build_dir + "/end2end_vitis_cnv_w1a1_folded.onnx")
+
+
+@pytest.mark.slow
+@pytest.mark.vivado
+def test_end2end_vitis_cnv_w1a1_build():
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_vitis_cnv_w1a1_folded.onnx"
+    )
+    model = model.transform(
+        VitisBuild(
+            test_fpga_part,
+            target_clk_ns,
+            test_platform,
+            strategy=VitisOptStrategy.BUILD_SPEED,
+        )
+    )
+    model.save(build_dir + "/end2end_vitis_cnv_w1a1_build.onnx")
+
+
+def test_end2end_vitis_cnv_w1a1_annotate_resources():
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_vitis_cnv_w1a1_build.onnx"
+    )
+    model = model.transform(AnnotateResources("synth"))
+    warnings.warn(
+        "Post-synthesis resources (excluding shell): "
+        + model.get_metadata_prop("res_total_synth")
+    )
+    warnings.warn(
+        "Post-synthesis resources (all inclusive): "
+        + model.get_metadata_prop("res_total_top_synth")
+    )
+    model.save(build_dir + "/end2end_vitis_cnv_w1a1_annotate_resources.onnx")
+
+
+def test_end2end_vitis_cnv_w1a1_deploy_on_pynq():
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_vitis_cnv_w1a1_build.onnx"
+    )
+    try:
+        ip = os.environ["ALVEO_IP"]  # no fault for this one; skip if not defined
+        if ip == "":
+            pytest.skip("Alveo host IP address not specified")
+        username = os.getenv("ALVEO_USERNAME", "xilinx")
+        password = os.getenv("ALVEO_PASSWORD", "xilinx")
+        port = os.getenv("ALVEO_PORT", 22)
+        target_dir = os.getenv("ALVEO_TARGET_DIR", "/home/xilinx/finn")
+        model = model.transform(DeployToPYNQ(ip, port, username, password, target_dir))
+        # save the model to be able to link it to the parent
+        model.save(build_dir + "/end2end_vitis_cnv_w1a1_pynq_deploy.onnx")
+    except KeyError:
+        pytest.skip("Alveo host IP address not specified")
+
+
+def test_end2end_vitis_cnv_w1a1_run_on_pynq():
+    # use the streamlined model as the "golden" model for right answers
+    golden = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_vitis_cnv_w1a1_streamlined.onnx"
+    )
+    iname = golden.graph.input[0].name
+    oname = golden.graph.output[0].name
+    # load one of the test vectors
+    fn = pk.resource_filename("finn", "data/cifar10/cifar10-test-data-class3.npz")
+    input_tensor = np.load(fn)["arr_0"].astype(np.float32)
+    input_tensor = input_tensor / 255
+    assert input_tensor.shape == (1, 3, 32, 32)
+    x = input_tensor
+    # x = np.zeros(ishape, dtype=np.float32)
+    # run using FINN-based execution
+    ret_golden = execute_onnx(golden, {iname: x}, True)
+    y_golden = ret_golden[oname]
+    # set up parent+child graph to test
+    # we'll use models from the previous step as the child model
+    parent_model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_vitis_cnv_w1a1_dataflow_parent.onnx"
+    )
+    iname = parent_model.graph.input[0].name
+    oname = parent_model.graph.output[0].name
+    try:
+        ip = os.environ["ALVEO_IP"]  # NOQA
+        if ip == "":
+            pytest.skip("Alveo host IP address not specified")
+        # produce results with cppsim
+        sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
+        sdp_node = getCustomOp(sdp_node)
+        load_test_checkpoint_or_skip(
+            build_dir + "/end2end_vitis_cnv_w1a1_pynq_deploy.onnx"
+        )
+        sdp_node.set_nodeattr(
+            "model", build_dir + "/end2end_vitis_cnv_w1a1_pynq_deploy.onnx"
+        )
+        ret = execute_onnx(parent_model, {iname: x}, True)
+        y = ret[oname]
+        assert np.isclose(y, y_golden).all()
+        assert np.argmax(y) == 3
+
+    except KeyError:
+        pytest.skip("Alveo host IP address not specified")
diff --git a/tests/end2end/test_vitis_end2end_tfc_w1a1.py b/tests/end2end/test_vitis_end2end_tfc_w1a1.py
new file mode 100644
index 0000000000000000000000000000000000000000..1eef964d4de81ce490809808fcdb9687a0f69b2f
--- /dev/null
+++ b/tests/end2end/test_vitis_end2end_tfc_w1a1.py
@@ -0,0 +1,252 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+
+
+import pytest
+import numpy as np
+
+# as of Feb'20 there is a bug that segfaults ONNX shape inference if we
+# import pytorch before onnx, so we make sure to import onnx first
+import onnx  # NOQA
+import onnx.numpy_helper as nph
+
+import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
+import finn.transformation.streamline.absorb as absorb
+
+from finn.custom_op.registry import getCustomOp
+from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
+from finn.transformation.fold_constants import FoldConstants
+
+from finn.transformation.fpgadataflow.create_dataflow_partition import (
+    CreateDataflowPartition,
+)
+
+from finn.transformation.general import (
+    RemoveUnusedTensors,
+    RemoveStaticGraphInputs,
+    GiveReadableTensorNames,
+    GiveUniqueNodeNames,
+)
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.streamline import Streamline
+from finn.transformation.streamline.round_thresholds import RoundAndClipThresholds
+from finn.util.basic import alveo_part_map, alveo_default_platform
+from finn.util.test import get_test_model_trained, load_test_checkpoint_or_skip
+from finn.transformation.fpgadataflow.vitis_build import VitisBuild, VitisOptStrategy
+from finn.transformation.infer_data_layouts import InferDataLayouts
+from finn.transformation.fpgadataflow.make_deployment import DeployToPYNQ
+from pkgutil import get_data
+from finn.transformation.fpgadataflow.annotate_resources import AnnotateResources
+from finn.core.onnx_exec import execute_onnx
+import warnings
+
+build_dir = "/tmp/" + os.environ["FINN_INST_NAME"]
+test_alveo_board = os.getenv("ALVEO_BOARD", default="U250")
+test_fpga_part = alveo_part_map[test_alveo_board]
+test_platform = alveo_default_platform[test_alveo_board]
+target_clk_ns = 10
+mem_mode = "decoupled"
+
+
+def test_end2end_vitis_tfc_w1a1_export():
+    import brevitas.onnx as bo
+
+    tfc = get_test_model_trained("TFC", 1, 1)
+    bo.export_finn_onnx(
+        tfc, (1, 1, 28, 28), build_dir + "/end2end_vitis_tfc_w1a1_export.onnx"
+    )
+
+
+def test_end2end_vitis_tfc_w1a1_import_and_tidy():
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_vitis_tfc_w1a1_export.onnx"
+    )
+    model = model.transform(InferShapes())
+    model = model.transform(FoldConstants())
+    model = model.transform(GiveUniqueNodeNames())
+    model = model.transform(GiveReadableTensorNames())
+    model = model.transform(InferDataTypes())
+    model = model.transform(RemoveStaticGraphInputs())
+    model.save(build_dir + "/end2end_vitis_tfc_w1a1_tidy.onnx")
+
+
+def test_end2end_vitis_tfc_w1a1_streamline():
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_vitis_tfc_w1a1_tidy.onnx"
+    )
+    model = model.transform(Streamline())
+    model = model.transform(RemoveUnusedTensors())
+    model.save(build_dir + "/end2end_vitis_tfc_w1a1_streamlined.onnx")
+
+
+def test_end2end_vitis_tfc_w1a1_convert_to_hls_layers():
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_vitis_tfc_w1a1_streamlined.onnx"
+    )
+    model = model.transform(ConvertBipolarMatMulToXnorPopcount())
+    model = model.transform(absorb.AbsorbAddIntoMultiThreshold())
+    model = model.transform(absorb.AbsorbMulIntoMultiThreshold())
+    model = model.transform(RoundAndClipThresholds())
+    model = model.transform(to_hls.InferBinaryStreamingFCLayer(mem_mode))
+    model = model.transform(InferDataLayouts())
+    model.save(build_dir + "/end2end_vitis_tfc_w1a1_hls_layers.onnx")
+
+
+def test_end2end_vitis_tfc_w1a1_create_dataflow_partition():
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_vitis_tfc_w1a1_hls_layers.onnx"
+    )
+    parent_model = model.transform(CreateDataflowPartition())
+    parent_model.save(build_dir + "/end2end_vitis_tfc_w1a1_dataflow_parent.onnx")
+    sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
+    sdp_node = getCustomOp(sdp_node)
+    dataflow_model_filename = sdp_node.get_nodeattr("model")
+    dataflow_model = load_test_checkpoint_or_skip(dataflow_model_filename)
+    dataflow_model.save(build_dir + "/end2end_vitis_tfc_w1a1_dataflow_model.onnx")
+
+
+def test_end2end_vitis_tfc_w1a1_fold():
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_vitis_tfc_w1a1_dataflow_model.onnx"
+    )
+    fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch")
+    # (PE, SIMD, in_fifo_depth, out_fifo_depth, ramstyle) for each layer
+    config = [
+        (16, 49, 16, 64, "block"),
+        (8, 8, 64, 64, "auto"),
+        (8, 8, 64, 64, "auto"),
+        (10, 8, 64, 10, "distributed"),
+    ]
+    for fcl, (pe, simd, ififo, ofifo, ramstyle) in zip(fc_layers, config):
+        fcl_inst = getCustomOp(fcl)
+        fcl_inst.set_nodeattr("PE", pe)
+        fcl_inst.set_nodeattr("SIMD", simd)
+        fcl_inst.set_nodeattr("inFIFODepth", ififo)
+        fcl_inst.set_nodeattr("outFIFODepth", ofifo)
+        fcl_inst.set_nodeattr("ram_style", ramstyle)
+
+    model.save(build_dir + "/end2end_vitis_tfc_w1a1_folded.onnx")
+
+
+@pytest.mark.slow
+@pytest.mark.vitis
+def test_end2end_vitis_tfc_w1a1_build():
+    if "VITIS_PATH" not in os.environ:
+        pytest.skip("VITIS_PATH not set")
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_vitis_tfc_w1a1_folded.onnx"
+    )
+    model = model.transform(
+        VitisBuild(
+            test_fpga_part,
+            target_clk_ns,
+            test_platform,
+            strategy=VitisOptStrategy.BUILD_SPEED,
+        )
+    )
+    model.save(build_dir + "/end2end_vitis_tfc_w1a1_build.onnx")
+
+
+def test_end2end_vitis_tfc_w1a1_annotate_resources():
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_vitis_tfc_w1a1_build.onnx"
+    )
+    model = model.transform(AnnotateResources("synth"))
+    warnings.warn(
+        "Post-synthesis resources (excluding shell): "
+        + model.get_metadata_prop("res_total_synth")
+    )
+    warnings.warn(
+        "Post-synthesis resources (all inclusive): "
+        + model.get_metadata_prop("res_total_top_synth")
+    )
+    model.save(build_dir + "/end2end_vitis_tfc_w1a1_annotate_resources.onnx")
+
+
+def test_end2end_vitis_tfc_w1a1_deploy_on_pynq():
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_vitis_tfc_w1a1_build.onnx"
+    )
+    try:
+        ip = os.environ["ALVEO_IP"]  # no fault for this one; skip if not defined
+        if ip == "":
+            pytest.skip("PYNQ board IP address not specified")
+        username = os.getenv("ALVEO_USERNAME", "xilinx")
+        password = os.getenv("ALVEO_PASSWORD", "xilinx")
+        port = os.getenv("ALVEO_PORT", 22)
+        target_dir = os.getenv("ALVEO_TARGET_DIR", "/home/xilinx/finn")
+        model = model.transform(DeployToPYNQ(ip, port, username, password, target_dir))
+        # save the model to be able to link it to the parent
+        model.save(build_dir + "/end2end_vitis_tfc_w1a1_pynq_deploy.onnx")
+    except KeyError:
+        pytest.skip("PYNQ board IP address not specified")
+
+
+def test_end2end_vitis_tfc_w1a1_run_on_pynq():
+    # use the streamlined model as the "golden" model for right answers
+    golden = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_vitis_tfc_w1a1_streamlined.onnx"
+    )
+    iname = golden.graph.input[0].name
+    oname = golden.graph.output[0].name
+    raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb")
+    input_tensor = onnx.load_tensor_from_string(raw_i)
+    x = nph.to_array(input_tensor)
+    # x = np.zeros(ishape, dtype=np.float32)
+    # run using FINN-based execution
+    ret_golden = execute_onnx(golden, {iname: x}, True)
+    y_golden = ret_golden[oname]
+    # set up parent+child graph to test
+    # we'll use models from the previous step as the child model
+    parent_model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_vitis_tfc_w1a1_dataflow_parent.onnx"
+    )
+    iname = parent_model.graph.input[0].name
+    oname = parent_model.graph.output[0].name
+    try:
+        ip = os.environ["ALVEO_IP"]  # NOQA
+        if ip == "":
+            pytest.skip("PYNQ board IP address not specified")
+        # produce results with cppsim
+        sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
+        sdp_node = getCustomOp(sdp_node)
+        load_test_checkpoint_or_skip(
+            build_dir + "/end2end_vitis_tfc_w1a1_pynq_deploy.onnx"
+        )
+        sdp_node.set_nodeattr(
+            "model", build_dir + "/end2end_vitis_tfc_w1a1_pynq_deploy.onnx"
+        )
+        ret = execute_onnx(parent_model, {iname: x}, True)
+        y = ret[oname]
+        assert np.isclose(y, y_golden).all()
+
+    except KeyError:
+        pytest.skip("PYNQ board IP address not specified")
diff --git a/tests/end2end/test_zynqbuild_end2end_cnv_w1a1.py b/tests/end2end/test_zynqbuild_end2end_cnv_w1a1.py
index a272fadc12f095034693e555e4d791e9e73262ab..84394eff86c35d7132991173a77c65cfceac37c4 100644
--- a/tests/end2end/test_zynqbuild_end2end_cnv_w1a1.py
+++ b/tests/end2end/test_zynqbuild_end2end_cnv_w1a1.py
@@ -183,6 +183,10 @@ def test_end2end_zynqbuild_cnv_w1a1_build():
         "Post-synthesis resources (excluding shell): "
         + model.get_metadata_prop("res_total_synth")
     )
+    warnings.warn(
+        "Post-synthesis resources (all inclusive): "
+        + model.get_metadata_prop("res_total_top_synth")
+    )
     model.save(build_dir + "/end2end_zynqbuild_cnv_w1a1_build.onnx")
 
 
diff --git a/tests/end2end/test_zynqbuild_end2end_tfc_w1a1.py b/tests/end2end/test_zynqbuild_end2end_tfc_w1a1.py
index 8b298d5644d6d6cda038e8ca1757be7538ba9804..e73393c71e0b0e168ad51c27dc415ef2c755bcab 100644
--- a/tests/end2end/test_zynqbuild_end2end_tfc_w1a1.py
+++ b/tests/end2end/test_zynqbuild_end2end_tfc_w1a1.py
@@ -165,6 +165,10 @@ def test_end2end_zynqbuild_tfc_w1a1_build():
         "Post-synthesis resources (excluding shell): "
         + model.get_metadata_prop("res_total_synth")
     )
+    warnings.warn(
+        "Post-synthesis resources (all inclusive): "
+        + model.get_metadata_prop("res_total_top_synth")
+    )
     model.save(build_dir + "/end2end_zynqbuild_tfc_w1a1_build.onnx")
 
 
diff --git a/tests/end2end/test_zynqbuild_end2end_tfc_w2a2.py b/tests/end2end/test_zynqbuild_end2end_tfc_w2a2.py
index bdb24d82dd639abe52aac9688b0b98430f72cabd..294235719d66e1b3d15e6e72504209736f278457 100644
--- a/tests/end2end/test_zynqbuild_end2end_tfc_w2a2.py
+++ b/tests/end2end/test_zynqbuild_end2end_tfc_w2a2.py
@@ -154,6 +154,10 @@ def test_end2end_zynqbuild_tfc_w2a2_build():
         "Post-synthesis resources (excluding shell): "
         + model.get_metadata_prop("res_total_synth")
     )
+    warnings.warn(
+        "Post-synthesis resources (all inclusive): "
+        + model.get_metadata_prop("res_total_top_synth")
+    )
     model.save(build_dir + "/end2end_zynqbuild_tfc_w2a2_build.onnx")
 
 
diff --git a/tests/fpgadataflow/test_fpgadataflow_fifo.py b/tests/fpgadataflow/test_fpgadataflow_fifo.py
index a0881e2c95a491c79bb86b9817fb81735eb63d81..8f6f5e2e5fc380d8dc4e5d4413d1503687ca67ae 100644
--- a/tests/fpgadataflow/test_fpgadataflow_fifo.py
+++ b/tests/fpgadataflow/test_fpgadataflow_fifo.py
@@ -26,7 +26,7 @@ from finn.transformation.fpgadataflow.replace_verilog_relpaths import (
 )
 from finn.transformation.fpgadataflow.synth_pynq_proj import SynthPYNQProject
 from finn.util.basic import pynq_part_map
-from finn.core.throughput_test import throughput_test
+from finn.core.throughput_test import throughput_test_remote
 
 
 build_dir = "/tmp/" + os.environ["FINN_INST_NAME"]
@@ -113,7 +113,7 @@ def test_fpgadataflow_fifo_rtlsim(Shape, folded_shape, depth, finn_dtype):
         port = os.getenv("PYNQ_PORT", 22)
         target_dir = os.getenv("PYNQ_TARGET_DIR", "/home/xilinx/finn")
         model = model.transform(DeployToPYNQ(ip, port, username, password, target_dir))
-        res = throughput_test(model)
+        res = throughput_test_remote(model)
         expected_dict = {}
         expected_dict["runtime[ms]"] = []
         expected_dict["throughput[images/s]"] = []
diff --git a/tests/fpgadataflow/test_fpgadataflow_ipstitch.py b/tests/fpgadataflow/test_fpgadataflow_ipstitch.py
index 66b0ef921453e9e6fee9eb9be18cc556b2612f23..75a67f3508f1de69cebf466589d195a99f08dd94 100644
--- a/tests/fpgadataflow/test_fpgadataflow_ipstitch.py
+++ b/tests/fpgadataflow/test_fpgadataflow_ipstitch.py
@@ -333,7 +333,7 @@ def test_fpgadataflow_ipstitch_pynq_synth():
         ip_stitch_model_dir + "/test_fpgadataflow_pynq_projgen.onnx"
     )
     model = model.transform(SynthPYNQProject())
-    bitfile = model.get_metadata_prop("vivado_pynq_bitfile")
+    bitfile = model.get_metadata_prop("bitfile")
     assert bitfile is not None
     assert os.path.isfile(bitfile)
     model.save(ip_stitch_model_dir + "/test_fpgadataflow_ipstitch_pynq_synth.onnx")
@@ -463,7 +463,7 @@ def test_fpgadataflow_ipstitch_zynqbuild(board):
     model = model.transform(ZynqBuild(board, 10))
     model.save(ip_stitch_model_dir + "/test_fpgadataflow_ipstitch_customzynq.onnx")
 
-    bitfile_name = model.get_metadata_prop("vivado_pynq_bitfile")
+    bitfile_name = model.get_metadata_prop("bitfile")
     assert bitfile_name is not None
     assert os.path.isfile(bitfile_name)
     # deployment
diff --git a/tests/pynq/test_pynq_performance_end2end.py b/tests/pynq/test_pynq_performance_end2end.py
index 3b6ea86741b8adefce4faaa65b791f1d213cf3ae..ea04ed9c402944730bf35374f46636ec433fe4c2 100644
--- a/tests/pynq/test_pynq_performance_end2end.py
+++ b/tests/pynq/test_pynq_performance_end2end.py
@@ -5,12 +5,22 @@ import numpy as np
 from scipy.stats import linregress
 import warnings
 from finn.util.test import load_test_checkpoint_or_skip
-from finn.core.throughput_test import throughput_test
+from finn.core.throughput_test import throughput_test_remote
 
 build_dir = "/tmp/" + os.environ["FINN_INST_NAME"]
 
 
-@pytest.mark.parametrize("end2end_example", ["tfc_w1a1", "cnv_w1a1", "cnv_w2a2"])
+@pytest.mark.parametrize(
+    "end2end_example",
+    [
+        "zynqbuild_tfc_w1a1",
+        "zynqbuild_cnv_w1a1",
+        "vitis_tfc_w1a1",
+        "tfc_w1a1",
+        "cnv_w1a1",
+        "cnv_w2a2",
+    ],
+)
 @pytest.mark.slow
 def test_pynq_performance_end2end(end2end_example):
     model = load_test_checkpoint_or_skip(
@@ -27,7 +37,7 @@ def test_pynq_performance_end2end(end2end_example):
         bsize_range_in = [2 ** i for i in range(16)]
         bsize_range = []
         for bsize in bsize_range_in:
-            res = throughput_test(model, bsize)
+            res = throughput_test_remote(model, bsize)
             if res is not None:
                 ret[bsize] = res
                 bsize_range.append(bsize)
diff --git a/tests/pynq/test_pynq_performance_fifo.py b/tests/pynq/test_pynq_performance_fifo.py
index 1a438f79e09925cab57866c83a3cc9c8a1896351..b1f69527d98694b325f910cdffd21c3914aeff7e 100644
--- a/tests/pynq/test_pynq_performance_fifo.py
+++ b/tests/pynq/test_pynq_performance_fifo.py
@@ -18,7 +18,7 @@ from finn.transformation.fpgadataflow.synth_pynq_proj import SynthPYNQProject
 import finn.transformation.fpgadataflow.replace_verilog_relpaths as rvp
 from finn.transformation.general import GiveUniqueNodeNames
 from finn.util.basic import pynq_part_map, pynq_native_port_width
-from finn.core.throughput_test import throughput_test
+from finn.core.throughput_test import throughput_test_remote
 from scipy.stats import linregress
 import warnings
 
@@ -90,7 +90,7 @@ def test_pynq_performance_fifo():
         bsize_range_in = [2 ** i for i in range(20)]
         bsize_range = []
         for bsize in bsize_range_in:
-            res = throughput_test(model, bsize)
+            res = throughput_test_remote(model, bsize)
             if res is not None:
                 ret[bsize] = res
                 bsize_range.append(bsize)