From 520e5d2693ef83f8f9b4590e545ae5d897925795 Mon Sep 17 00:00:00 2001
From: Yaman Umuroglu <yamanu@xilinx.com>
Date: Wed, 1 Jun 2022 19:08:27 +0200
Subject: [PATCH] [Refactor] move files over from finn-base

---
 src/finn/core/onnx_exec.py    | 120 +++++++++
 src/finn/core/remote_exec.py  | 119 +++++++++
 src/finn/core/rtlsim_exec.py  | 156 +++++++++++
 src/finn/util/basic.py        | 211 +++++++++++++++
 src/finn/util/data_packing.py | 455 ++++++++++++++++++++++++++++++++
 src/finn/util/fpgadataflow.py |  43 +++
 src/finn/util/hls.py          |  74 ++++++
 src/finn/util/platforms.py    | 480 ++++++++++++++++++++++++++++++++++
 src/finn/util/vivado.py       |  83 ++++++
 9 files changed, 1741 insertions(+)
 create mode 100644 src/finn/core/onnx_exec.py
 create mode 100644 src/finn/core/remote_exec.py
 create mode 100644 src/finn/core/rtlsim_exec.py
 create mode 100644 src/finn/util/basic.py
 create mode 100644 src/finn/util/data_packing.py
 create mode 100644 src/finn/util/fpgadataflow.py
 create mode 100644 src/finn/util/hls.py
 create mode 100644 src/finn/util/platforms.py
 create mode 100644 src/finn/util/vivado.py

diff --git a/src/finn/core/onnx_exec.py b/src/finn/core/onnx_exec.py
new file mode 100644
index 000000000..d305ac9a6
--- /dev/null
+++ b/src/finn/core/onnx_exec.py
@@ -0,0 +1,120 @@
+# Copyright (c) 2022, Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import qonnx.analysis.topology as ta
+from qonnx.core.onnx_exec import execute_onnx as execute_onnx_base
+
+from finn.core.remote_exec import remote_exec
+from finn.core.rtlsim_exec import rtlsim_exec
+
+
+def execute_onnx(
+    model, input_dict, return_full_exec_context=False, start_node=None, end_node=None
+):
+    """Executes given ONNX ModelWrapper with given named inputs.
+    If return_full_exec_context is False, a dict of named outputs is returned
+    as indicated by the model.graph.output.
+    If return return_full_exec_context is True, the full set of tensors used by
+    the execution (including inputs, weights, activations and final outputs)
+    will be returned as a dict.
+    When start_node and end_node are set to None, the whole graph is executed.
+    If they are set to particular ONNX nodes, only the subgraph between (and
+    including) those nodes is executed.
+    """
+
+    # check if model has an execution mode set
+    # if None, execute model node using the QONNX-provided execute_onnx impl
+    # if set to "remote_pynq" execute model on PYNQ board
+    # if set to "rtlsim" execute model using pyverilator
+    model_exec_mode = model.get_metadata_prop("exec_mode")
+    if (model_exec_mode is None) or (model_exec_mode == ""):
+        return execute_onnx_base(
+            model, input_dict, return_full_exec_context, start_node, end_node
+        )
+
+    if not model.check_all_tensor_shapes_specified():
+        raise Exception("Found unspecified tensor shapes, try infer_shapes")
+    ret = model.analysis(ta.nodes_topologically_sorted)
+    assert (
+        ret["nodes_topologically_sorted"] is True
+    ), """Nodes must be
+    topologically sorted."""
+
+    graph = model.graph
+    # first, we need to make sure that every variable required by the graph has
+    # some buffer associated with it. this includes graph inputs (which includes
+    # the input data as well as the trained parameters) and the graph ValueInfo
+    # (intermediate tensors between layers)
+    # this is provided by the execution_context, which is a dict of np.ndarray
+    execution_context = model.make_empty_exec_context()
+    # fill in any inputs provided to this function
+    for inp_name in input_dict.keys():
+        if inp_name in execution_context:
+            if execution_context[inp_name].shape == input_dict[inp_name].shape:
+                execution_context[inp_name] = input_dict[inp_name]
+            else:
+                raise Exception(
+                    "Shape mismatch for provided input %s: found %s expected %s "
+                    % (
+                        inp_name,
+                        str(execution_context[inp_name].shape),
+                        str(input_dict[inp_name].shape),
+                    )
+                )
+        # else:
+        # raise Exception("Provided input not found in graph context: %s" % inp_name)
+
+    # check if model has an execution mode set
+    # if None, execute model node by node using execute_node()
+    # if set to "remote_pynq" execute model on PYNQ board
+    # if set to "rtlsim" execute model using pyverilator
+    model_exec_mode = model.get_metadata_prop("exec_mode")
+    if (model_exec_mode is None) or (model_exec_mode == ""):
+        return execute_onnx_base()
+    elif model_exec_mode == "remote_pynq":
+        # use remote exec metadata built into model to execute on a remote PYNQ
+        remote_exec(model, execution_context)
+    elif model_exec_mode == "rtlsim":
+        # use stitched IP for rtlsim
+        rtlsim_exec(model, execution_context)
+    else:
+        raise Exception(
+            """Metadata property "exec_mode" is set to an unknown value.
+        Can be left unset or has to be set to "remote_pynq" for remote execution
+        on PYNQ board or "rtlsim" for execution using pyverilator!"""
+        )
+
+    if return_full_exec_context:
+        return execution_context
+    else:
+        # provide outputs as dict
+        output_dict = dict()
+        for out_tensor in graph.output:
+            out_name = out_tensor.name
+            output_dict[out_name] = execution_context[out_name]
+        return output_dict
diff --git a/src/finn/core/remote_exec.py b/src/finn/core/remote_exec.py
new file mode 100644
index 000000000..f487b48f8
--- /dev/null
+++ b/src/finn/core/remote_exec.py
@@ -0,0 +1,119 @@
+# Copyright (c) 2020 Xilinx, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of Xilinx nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import numpy as np
+import os
+import subprocess
+import warnings
+
+
+def remote_exec(model, execution_context):
+    """Executes the given model remotely on the pynq board. The metadata properties
+    related to the pynq board have to be set. The execution context contains the
+    input values."""
+    # TODO fix for multi input-output
+    pynq_ip = model.get_metadata_prop("pynq_ip")
+    pynq_port = int(model.get_metadata_prop("pynq_port"))
+    pynq_username = model.get_metadata_prop("pynq_username")
+    pynq_password = model.get_metadata_prop("pynq_password")
+    pynq_target_dir = model.get_metadata_prop("pynq_target_dir")
+    deployment_dir = model.get_metadata_prop("pynq_deploy_dir")
+    platform = model.get_metadata_prop("platform")
+    assert platform in ["alveo", "zynq-iodma"]
+    bitfile = model.get_metadata_prop("bitfile")
+    bitfile = os.path.basename(bitfile)
+    if pynq_password == "":
+        if "zynq" in platform:
+            raise Exception("PYNQ board remote exec needs password for sudo")
+        else:
+            local_prefix = ""  # assume we are using an ssh key
+            warnings.warn("Empty password, make sure you've set up an ssh key")
+    else:
+        local_prefix = "sshpass -p %s " % pynq_password
+
+    if platform == "alveo":
+        # Alveo can run without sudo
+        remote_prefix = ""
+    elif "zynq" in platform:
+        # PYNQ Zynq boards need to execute with sudo
+        remote_prefix = "echo %s | sudo -S " % pynq_password
+
+    inp = execution_context[model.graph.input[0].name]
+    # make copy of array before saving it
+    inp = inp.copy()
+    batchsize = inp.shape[0]
+    np.save(os.path.join(deployment_dir, "input.npy"), inp)
+    # extracting last folder of absolute path (deployment_dir)
+    deployment_folder = os.path.basename(os.path.normpath(deployment_dir))
+    # copy input to PYNQ board
+    cmd = local_prefix + "scp -P{} -r {}/input.npy {}@{}:{}/{}".format(
+        pynq_port,
+        deployment_dir,
+        pynq_username,
+        pynq_ip,
+        pynq_target_dir,
+        deployment_folder,
+    )
+    bash_command = ["/bin/bash", "-c", cmd]
+    process_scp_in = subprocess.Popen(bash_command, stdout=subprocess.PIPE)
+    process_scp_in.communicate()
+
+    # use platform attribute for correct remote execution
+    if platform == "alveo":
+        remote_cmd = "bash -ic 'bash alveo_run.sh execute %d' \"" % batchsize
+    else:
+        remote_cmd = (
+            "python3.6 driver.py --exec_mode=execute --batchsize={} "
+            "--bitfile={} --inputfile=input.npy --outputfile=output.npy "
+            '--platform={} "'
+        ).format(batchsize, bitfile, platform)
+    cmd = (
+        local_prefix + 'ssh {}@{} -p {} "cd {}/{}; ' + remote_prefix + remote_cmd
+    ).format(pynq_username, pynq_ip, pynq_port, pynq_target_dir, deployment_folder)
+    bash_command = ["/bin/bash", "-c", cmd]
+    process_exec_accel = subprocess.Popen(bash_command, stdout=subprocess.PIPE)
+    process_exec_accel.communicate()
+    # remove stale output file from local dir, if any
+    try:
+        os.remove("{}/output.npy".format(deployment_dir))
+    except FileNotFoundError:
+        pass
+    # copy generated output to local
+    cmd = local_prefix + "scp -P{} {}@{}:{}/{}/output.npy {}".format(
+        pynq_port,
+        pynq_username,
+        pynq_ip,
+        pynq_target_dir,
+        deployment_folder,
+        deployment_dir,
+    )
+    bash_command = ["/bin/bash", "-c", cmd]
+    process_scp_out = subprocess.Popen(bash_command, stdout=subprocess.PIPE)
+    process_scp_out.communicate()
+    outp = np.load("{}/output.npy".format(deployment_dir))
+    execution_context[model.graph.output[0].name] = outp
diff --git a/src/finn/core/rtlsim_exec.py b/src/finn/core/rtlsim_exec.py
new file mode 100644
index 000000000..231a5ea7c
--- /dev/null
+++ b/src/finn/core/rtlsim_exec.py
@@ -0,0 +1,156 @@
+# Copyright (c) 2020 Xilinx, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of Xilinx nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+from pyverilator.util.axi_utils import (
+    pyverilate_stitched_ip,
+    reset_rtlsim,
+    rtlsim_multi_io,
+)
+from qonnx.custom_op.registry import getCustomOp
+
+from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
+
+try:
+    from pyverilator import PyVerilator
+except ModuleNotFoundError:
+    PyVerilator = None
+
+
+def rtlsim_exec(model, execution_context, pre_hook=None, post_hook=None):
+    """Use PyVerilator to execute given model with stitched IP. The execution
+    context contains the input values. Hook functions can be optionally
+    specified to observe/alter the state of the circuit, receiving the
+    PyVerilator sim object as their first argument:
+    - pre_hook : hook function to be called before sim start (after reset)
+    - post_hook : hook function to be called after sim end
+    """
+    if PyVerilator is None:
+        raise ImportError("Installation of PyVerilator is required.")
+    # ensure stitched ip project already exists
+    assert os.path.isfile(
+        model.get_metadata_prop("wrapper_filename")
+    ), """The
+    file name from metadata property "wrapper_filename" doesn't exist."""
+    assert os.path.isdir(
+        model.get_metadata_prop("vivado_stitch_proj")
+    ), """The
+    directory from metadata property "vivado_stitch_proj" doesn't exist"""
+    trace_file = model.get_metadata_prop("rtlsim_trace")
+    if trace_file is None:
+        trace_file = ""
+    extra_verilator_args = model.get_metadata_prop("extra_verilator_args")
+    if extra_verilator_args is None:
+        extra_verilator_args = []
+    else:
+        extra_verilator_args = eval(extra_verilator_args)
+
+    # extract i/o info to prepare io_dict
+    io_dict = {"inputs": {}, "outputs": {}}
+    if_dict = eval(model.get_metadata_prop("vivado_stitch_ifnames"))
+    # go over and prepare inputs
+    for i, i_vi in enumerate(model.graph.input):
+        i_name = i_vi.name
+        i_tensor = execution_context[i_name]
+        i_dt = model.get_tensor_datatype(i_name)
+        first_node_onnx = model.find_consumer(i_name)
+        first_node = getCustomOp(first_node_onnx)
+        node_inp_ind = list(first_node_onnx.input).index(i_name)
+        if node_inp_ind == 0:
+            # default node input (input 0)
+            i_stream_w = first_node.get_instream_width()
+            i_folded_shape = first_node.get_folded_input_shape()
+        else:
+            # not input 0; node must support specifying inp index
+            # for these functions
+            i_stream_w = first_node.get_instream_width(node_inp_ind)
+            i_folded_shape = first_node.get_folded_input_shape(node_inp_ind)
+        batchsize = i_tensor.shape[0]
+        # override batch size for input
+        i_folded_shape = list(i_folded_shape)
+        i_folded_shape[0] = batchsize
+        i_folded_shape = tuple(i_folded_shape)
+        # TODO any other layout transformations need to happen here!
+        i_tensor = i_tensor.reshape(i_folded_shape)
+        # pack input for rtlsim
+        packed_input = npy_to_rtlsim_input(i_tensor, i_dt, i_stream_w)
+        # add to io_dict
+        if_name = if_dict["s_axis"][i][0]
+        io_dict["inputs"][if_name] = packed_input
+    # go over outputs to determine how many values will be produced
+    num_out_values = 0
+    o_tensor_info = []
+    for o, o_vi in enumerate(model.graph.output):
+        # output in io_dict just needs an empty list
+        if_name = if_dict["m_axis"][o][0]
+        io_dict["outputs"][if_name] = []
+        # extract output shape
+        o_name = o_vi.name
+        o_shape = model.get_tensor_shape(o_name)
+        o_dt = model.get_tensor_datatype(o_name)
+        last_node = getCustomOp(model.find_producer(o_name))
+        o_folded_shape = last_node.get_folded_output_shape()
+        # override batch size from actual input
+        o_shape = list(o_shape)
+        o_shape[0] = batchsize
+        o_shape = tuple(o_shape)
+        o_folded_shape = list(o_folded_shape)
+        o_folded_shape[0] = batchsize
+        o_folded_shape = tuple(o_folded_shape)
+        o_stream_w = last_node.get_outstream_width()
+        o_tensor_info.append((o_stream_w, o_dt, o_folded_shape, o_shape))
+        num_out_values += batchsize * last_node.get_number_output_values()
+
+    # prepare pyverilator model
+    rtlsim_so = model.get_metadata_prop("rtlsim_so")
+    if (rtlsim_so is None) or (not os.path.isfile(rtlsim_so)):
+        sim = pyverilate_stitched_ip(model, extra_verilator_args=extra_verilator_args)
+        model.set_metadata_prop("rtlsim_so", sim.lib._name)
+    else:
+        sim = PyVerilator(rtlsim_so, auto_eval=False)
+
+    # reset and call rtlsim, including any pre/post hooks
+    reset_rtlsim(sim)
+    if pre_hook is not None:
+        pre_hook(sim)
+    n_cycles = rtlsim_multi_io(sim, io_dict, num_out_values, trace_file, sname="_")
+    if post_hook is not None:
+        post_hook(sim)
+
+    # unpack outputs and put back into execution context
+    for o, o_vi in enumerate(model.graph.output):
+        o_name = o_vi.name
+        if_name = if_dict["m_axis"][o][0]
+        o_stream_w, o_dt, o_folded_shape, o_shape = o_tensor_info[o]
+        packed_output = io_dict["outputs"][if_name]
+        o_folded_tensor = rtlsim_output_to_npy(
+            packed_output, None, o_dt, o_folded_shape, o_stream_w, o_dt.bitwidth()
+        )
+        execution_context[o_name] = o_folded_tensor.reshape(o_shape)
+
+    model.set_metadata_prop("cycles_rtlsim", str(n_cycles))
diff --git a/src/finn/util/basic.py b/src/finn/util/basic.py
new file mode 100644
index 000000000..2cf806fe1
--- /dev/null
+++ b/src/finn/util/basic.py
@@ -0,0 +1,211 @@
+# Copyright (c) 2020 Xilinx, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of Xilinx nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import subprocess
+import sys
+import tempfile
+
+# mapping from PYNQ board names to FPGA part names
+pynq_part_map = dict()
+pynq_part_map["Ultra96"] = "xczu3eg-sbva484-1-e"
+pynq_part_map["Pynq-Z1"] = "xc7z020clg400-1"
+pynq_part_map["Pynq-Z2"] = "xc7z020clg400-1"
+pynq_part_map["ZCU102"] = "xczu9eg-ffvb1156-2-e"
+pynq_part_map["ZCU104"] = "xczu7ev-ffvc1156-2-e"
+pynq_part_map["ZCU111"] = "xczu28dr-ffvg1517-2-e"
+pynq_part_map["RFSoC2x2"] = "xczu28dr-ffvg1517-2-e"
+
+# native AXI HP port width (in bits) for PYNQ boards
+pynq_native_port_width = dict()
+pynq_native_port_width["Pynq-Z1"] = 64
+pynq_native_port_width["Pynq-Z2"] = 64
+pynq_native_port_width["Ultra96"] = 128
+pynq_native_port_width["ZCU102"] = 128
+pynq_native_port_width["ZCU104"] = 128
+pynq_native_port_width["ZCU111"] = 128
+pynq_native_port_width["RFSoC2x2"] = 128
+
+# Alveo device and platform mappings
+alveo_part_map = dict()
+alveo_part_map["U50"] = "xcu50-fsvh2104-2L-e"
+alveo_part_map["U200"] = "xcu200-fsgd2104-2-e"
+alveo_part_map["U250"] = "xcu250-figd2104-2L-e"
+alveo_part_map["U280"] = "xcu280-fsvh2892-2L-e"
+
+alveo_default_platform = dict()
+alveo_default_platform["U50"] = "xilinx_u50_gen3x16_xdma_201920_3"
+alveo_default_platform["U200"] = "xilinx_u200_xdma_201830_2"
+alveo_default_platform["U250"] = "xilinx_u250_xdma_201830_2"
+alveo_default_platform["U280"] = "xilinx_u280_xdma_201920_3"
+
+
+def get_rtlsim_trace_depth():
+    """Return the trace depth for rtlsim via PyVerilator. Controllable
+    via the RTLSIM_TRACE_DEPTH environment variable. If the env.var. is
+    undefined, the default value of 1 is returned. A trace depth of 1
+    will only show top-level signals and yield smaller .vcd files.
+
+    The following depth values are of interest for whole-network stitched IP
+    rtlsim:
+    - level 1 shows top-level input/output streams
+    - level 2 shows per-layer input/output streams
+    - level 3 shows per full-layer I/O including FIFO count signals
+    """
+
+    try:
+        return int(os.environ["RTLSIM_TRACE_DEPTH"])
+    except KeyError:
+        return 1
+
+
+def get_remote_vivado():
+    """Return the address of the remote Vivado synthesis server as set by the,
+    REMOTE_VIVADO environment variable, otherwise return None"""
+
+    try:
+        return os.environ["REMOTE_VIVADO"]
+    except KeyError:
+        return None
+
+
+def get_finn_root():
+    "Return the root directory that FINN is cloned into."
+
+    try:
+        return os.environ["FINN_ROOT"]
+    except KeyError:
+        raise Exception(
+            """Environment variable FINN_ROOT must be set
+        correctly. Please ensure you have launched the Docker contaier correctly.
+        """
+        )
+
+
+def make_build_dir(prefix=""):
+    """Creates a folder with given prefix to be used as a build dir.
+    Use this function instead of tempfile.mkdtemp to ensure any generated files
+    will survive on the host after the FINN Docker container exits."""
+    try:
+        tmpdir = tempfile.mkdtemp(prefix=prefix)
+        newdir = tmpdir.replace("/tmp", os.environ["FINN_BUILD_DIR"])
+        os.makedirs(newdir)
+        return newdir
+    except KeyError:
+        raise Exception(
+            """Environment variable FINN_BUILD_DIR must be set
+        correctly. Please ensure you have launched the Docker contaier correctly.
+        """
+        )
+
+
+class CppBuilder:
+    """Builds the g++ compiler command to produces the executable of the c++ code
+    in code_gen_dir which is passed to the function build() of this class."""
+
+    def __init__(self):
+        self.include_paths = []
+        self.cpp_files = []
+        self.executable_path = ""
+        self.code_gen_dir = ""
+        self.compile_components = []
+        self.compile_script = ""
+
+    def append_includes(self, library_path):
+        """Adds given library path to include_paths list."""
+        self.include_paths.append(library_path)
+
+    def append_sources(self, cpp_file):
+        """Adds given c++ file to cpp_files list."""
+        self.cpp_files.append(cpp_file)
+
+    def set_executable_path(self, path):
+        """Sets member variable "executable_path" to given path."""
+        self.executable_path = path
+
+    def build(self, code_gen_dir):
+        """Builds the g++ compiler command according to entries in include_paths
+        and cpp_files lists. Saves it in bash script in given folder and
+        executes it."""
+        # raise error if includes are empty
+        self.code_gen_dir = code_gen_dir
+        self.compile_components.append("g++ -o " + str(self.executable_path))
+        for cpp_file in self.cpp_files:
+            self.compile_components.append(cpp_file)
+        for lib in self.include_paths:
+            self.compile_components.append(lib)
+        bash_compile = ""
+        for component in self.compile_components:
+            bash_compile += str(component) + " "
+        self.compile_script = str(self.code_gen_dir) + "/compile.sh"
+        with open(self.compile_script, "w") as f:
+            f.write("#!/bin/bash \n")
+            f.write(bash_compile + "\n")
+        bash_command = ["bash", self.compile_script]
+        process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE)
+        process_compile.communicate()
+
+
+def launch_process_helper(args, proc_env=None, cwd=None):
+    """Helper function to launch a process in a way that facilitates logging
+    stdout/stderr with Python loggers.
+    Returns (cmd_out, cmd_err)."""
+    if proc_env is None:
+        proc_env = os.environ.copy()
+    with subprocess.Popen(
+        args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=proc_env, cwd=cwd
+    ) as proc:
+        (cmd_out, cmd_err) = proc.communicate()
+    if cmd_out is not None:
+        cmd_out = cmd_out.decode("utf-8")
+        sys.stdout.write(cmd_out)
+    if cmd_err is not None:
+        cmd_err = cmd_err.decode("utf-8")
+        sys.stderr.write(cmd_err)
+    return (cmd_out, cmd_err)
+
+
+def which(program):
+    "Python equivalent of the shell cmd 'which'."
+
+    # source:
+    # https://stackoverflow.com/questions/377017/test-if-executable-exists-in-python
+    def is_exe(fpath):
+        return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
+
+    fpath, fname = os.path.split(program)
+    if fpath:
+        if is_exe(program):
+            return program
+    else:
+        for path in os.environ["PATH"].split(os.pathsep):
+            exe_file = os.path.join(path, program)
+            if is_exe(exe_file):
+                return exe_file
+
+    return None
diff --git a/src/finn/util/data_packing.py b/src/finn/util/data_packing.py
new file mode 100644
index 000000000..de8aca405
--- /dev/null
+++ b/src/finn/util/data_packing.py
@@ -0,0 +1,455 @@
+# Copyright (c) 2020 Xilinx, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of Xilinx nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import binascii
+import numpy as np
+import os
+import sys
+from bitstring import BitArray
+from qonnx.core.datatype import DataType
+from qonnx.util.basic import roundup_to_integer_multiple
+
+
+def array2hexstring(array, dtype, pad_to_nbits, prefix="0x", reverse=False):
+    """
+    Pack given one-dimensional NumPy array with FINN DataType dtype into a hex
+    string.
+    Any BIPOLAR values will be converted to a single bit with a 0 representing
+    -1.
+    pad_to_nbits is used to prepend leading zeros to ensure packed strings of
+    fixed width. The minimum value for pad_to_nbits is 4, since a single hex
+    digit is four bits. reverse can be used to reverse the array prior to
+    packing.
+
+    Examples:
+
+    array2hexstring([1, 1, 1, 0], DataType["BINARY"], 4) = "0xe"
+
+    array2hexstring([1, 1, 1, 0], DataType["BINARY"], 8) = "0x0e"
+
+    array2hexstring([1, 1, 0, 1], DataType["BINARY"], 4, reverse=True) = "0xb"
+
+    array2hexstring([1, 1, 1, 0], DataType["BINARY"], 8, reverse=True) = "0x07"
+    """
+    if pad_to_nbits < 4:
+        pad_to_nbits = 4
+    # ensure input is a numpy array with float values
+    if type(array) != np.ndarray or array.dtype != np.float32:
+        # try to convert to a float numpy array (container dtype is float)
+        array = np.asarray(array, dtype=np.float32)
+    # ensure one-dimensional array to pack
+    assert array.ndim == 1, "The given array is not one-dimensional."
+    if dtype == DataType["BIPOLAR"]:
+        # convert bipolar values to binary
+        array = (array + 1) / 2
+        dtype = DataType["BINARY"]
+    # reverse prior to packing, if desired
+    if reverse:
+        array = np.flip(array, -1)
+    lineval = BitArray(length=0)
+    bw = dtype.bitwidth()
+    # special handling for fixed point: rescale, then pack as integers
+    if dtype.is_fixed_point():
+        sf = dtype.scale_factor()
+        array = array / sf
+        # replace dtype with signed integer equivalent
+        dtype = DataType["INT" + str(bw)]
+    for val in array:
+        # ensure that this value is permitted by chosen dtype
+        assert dtype.allowed(val), "This value is not permitted by chosen dtype."
+        if dtype.is_integer():
+            if dtype.signed():
+                lineval.append(BitArray(int=int(val), length=bw))
+            else:
+                lineval.append(BitArray(uint=int(val), length=bw))
+        else:
+            lineval.append(BitArray(float=val, length=bw))
+    if pad_to_nbits >= lineval.len:
+        # extend to the desired output width (a minimum of 4 bits)
+        lineval.prepend(BitArray(length=pad_to_nbits - lineval.len))
+    else:
+        raise Exception("Number of bits is greater than pad_to_nbits")
+    # represent as hex
+    return prefix + lineval.hex
+
+
+def hexstring2npbytearray(hexstring, remove_prefix="0x"):
+    """Convert a hex string into a NumPy array of dtype uint8.
+
+    Example:
+
+    hexstring2npbytearray("0f01") = array([15,  1], dtype=uint8)
+    """
+    # remove prefix if found
+    if hexstring.startswith(remove_prefix):
+        lrp = len(remove_prefix)
+        hexstring = hexstring[lrp:]
+    # use Python's built-in bytearray
+    return np.asarray(bytearray.fromhex(hexstring), dtype=np.uint8)
+
+
+def npbytearray2hexstring(npbytearray, prefix="0x"):
+    """Convert a NumPy array of uint8 dtype into a hex string.
+
+    Example:
+
+    npbytearray2hexstring(array([15,  1], dtype=uint8)) = "0x0f01"
+    """
+    return prefix + binascii.hexlify(bytearray(npbytearray)).decode("utf-8")
+
+
+def pack_innermost_dim_as_hex_string(
+    ndarray, dtype, pad_to_nbits, reverse_inner=False, prefix="0x"
+):
+    """Pack the innermost dimension of the given numpy ndarray into hex
+    strings using array2hexstring.
+
+    Examples:
+
+    A = [[1, 1, 1, 0], [0, 1, 1, 0]]
+
+    eA = ["0e", "06"]
+
+    pack_innermost_dim_as_hex_string(A, DataType["BINARY"], 8) == eA
+
+    B = [[[3, 3], [3, 3]], [[1, 3], [3, 1]]]
+
+    eB = [[ "0f", "0f"], ["07", "0d"]]
+
+    pack_innermost_dim_as_hex_string(B, DataType["UINT2"], 8) == eB
+    """
+
+    if type(ndarray) != np.ndarray or ndarray.dtype != np.float32:
+        # try to convert to a float numpy array (container dtype is float)
+        ndarray = np.asarray(ndarray, dtype=np.float32)
+
+    def fun(x):
+        return array2hexstring(
+            x, dtype, pad_to_nbits, reverse=reverse_inner, prefix=prefix
+        )
+
+    return np.apply_along_axis(fun, ndarray.ndim - 1, ndarray)
+
+
+def unpack_innermost_dim_from_hex_string(
+    ndarray, dtype, out_shape, packedBits, reverse_inner=False
+):
+    """Convert a NumPy array of hex strings into a FINN NumPy array by unpacking
+    the hex strings into the specified data type. out_shape can be specified
+    such that any padding in the packing dimension is removed. If reverse_inner
+    is set, the innermost unpacked dimension will be reversed."""
+
+    if type(ndarray) != np.ndarray:
+        raise Exception(
+            """unpack_innermost_dim_from_hex_string needs ndarray
+        as input"""
+        )
+    if ndarray.dtype.kind not in {"U", "S"}:
+        raise Exception(
+            """unpack_innermost_dim_from_hex_string needs ndarray of
+        hex strings as input"""
+        )
+    # convert ndarray into flattened list
+    data = ndarray.flatten().tolist()
+    targetBits = dtype.bitwidth()
+    # calculate outer and inner dim shapes
+    outer_dim_elems = 1
+    for dim in range(len(out_shape) - 1):
+        outer_dim_elems = outer_dim_elems * out_shape[dim]
+    inner_dim_elems = out_shape[-1]
+
+    array = []
+    if dtype.is_fixed_point():
+        # convert fixed point as signed integer
+        conv_dtype = DataType["INT" + str(targetBits)]
+    else:
+        conv_dtype = dtype
+    for outer_elem in range(outer_dim_elems):
+        ar_list = []
+        ar_elem = data[0]
+        data.pop(0)
+        ar_elem = ar_elem.split("x")
+        ar_elem_bin = bin(int(ar_elem[1], 16))[2:].zfill(packedBits)
+        ar_elem_bin = [int(x) for x in ar_elem_bin]
+
+        ar_elem_bin.reverse()
+        for i in range(inner_dim_elems):
+            upper_limit = (i + 1) * targetBits
+            lower_limit = i * targetBits
+            elem = ar_elem_bin[lower_limit:upper_limit]
+            elem.reverse()
+            elem_str = "".join(map(str, elem))
+            if conv_dtype == DataType["FLOAT32"]:
+                ar_list.append(BitArray(bin=elem_str).float)
+            elif conv_dtype.is_integer():
+                ar_list.append(int(elem_str, 2))
+            else:
+                raise Exception("Not implemented for conv_dtype " + conv_dtype.name)
+        # reverse inner dimension back to "normal" positions
+        if reverse_inner is False:
+            ar_list.reverse()
+
+        # interpret output values correctly
+
+        # interpret values as bipolar
+        if conv_dtype == DataType["BIPOLAR"]:
+            ar_list = [2 * x - 1 for x in ar_list]
+        # interpret values as signed values
+        elif conv_dtype.name.startswith("INT"):
+            mask = 2 ** (conv_dtype.bitwidth() - 1)
+            ar_list = [-(x & mask) + (x & ~mask) for x in ar_list]
+
+        array.append(ar_list)
+    array = np.asarray(array, dtype=np.float32).reshape(out_shape)
+    if dtype.is_fixed_point():
+        # convert signed integer to fixed point by applying scale
+        array = array * dtype.scale_factor()
+    return array
+
+
+def numpy_to_hls_code(
+    ndarray, dtype, hls_var_name, pack_innermost_dim=True, no_decl=False
+):
+    """Return C++ code representation of a numpy ndarray with FINN DataType
+    dtype, using hls_var_name as the resulting C++ variable name. If
+    pack_innermost_dim is specified, the innermost dimension of the ndarray
+    will be packed into a hex string using array2hexstring. If no_decl is
+    set to True, no variable name and type will be generated as part of the
+    emitted string.
+    """
+    hls_dtype = dtype.get_hls_datatype_str()
+    if type(ndarray) != np.ndarray or ndarray.dtype != np.float32:
+        # try to convert to a float numpy array (container dtype is float)
+        ndarray = np.asarray(ndarray, dtype=np.float32)
+    if pack_innermost_dim:
+        idimlen = ndarray.shape[-1]
+        idimbits = idimlen * dtype.bitwidth()
+        idimbits = roundup_to_integer_multiple(idimbits, 4)
+        ndarray = pack_innermost_dim_as_hex_string(ndarray, dtype, idimbits)
+        hls_dtype = "ap_uint<%d>" % idimbits
+    ndims = ndarray.ndim
+    # add type string and variable name
+    # e.g. "const ap_uint<64>" "weightMem0"
+    ret = "%s %s" % (hls_dtype, hls_var_name)
+    # add dimensions
+    for d in range(ndims):
+        ret += "[%d]" % ndarray.shape[d]
+    orig_printops = np.get_printoptions()
+    np.set_printoptions(threshold=sys.maxsize)
+
+    # define a function to convert a single element into a C++ init string
+    # a single element can be a hex string if we are using packing
+    def elem2str(x):
+        if type(x) == str or type(x) == np.str_ or type(x) == np.str:
+            return '%s("%s", 16)' % (hls_dtype, x)
+        elif type(x) == np.float32:
+            if dtype.is_integer():
+                return str(int(x))
+            else:
+                return str(x)
+        else:
+            raise Exception("Unsupported type for numpy_to_hls_code")
+
+    strarr = np.array2string(ndarray, separator=", ", formatter={"all": elem2str})
+    np.set_printoptions(**orig_printops)
+    strarr = strarr.replace("[", "{").replace("]", "}")
+    if no_decl:
+        ret = strarr + ";"
+    else:
+        ret = ret + " = \n" + strarr + ";"
+    return ret
+
+
+def npy_to_rtlsim_input(input_file, input_dtype, pad_to_nbits, reverse_inner=True):
+    """Convert the multidimensional NumPy array of integers (stored as floats)
+    from input_file into a flattened sequence of Python arbitrary-precision
+    integers, packing the innermost dimension. See
+    finn.util.basic.pack_innermost_dim_as_hex_string() for more info on how the
+    packing works. If reverse_inner is set, the innermost dimension will be
+    reversed prior to packing."""
+    pad_to_nbits = roundup_to_integer_multiple(pad_to_nbits, 4)
+    if issubclass(type(input_file), np.ndarray):
+        inp = input_file
+    elif os.path.isfile(input_file):
+        inp = np.load(input_file)
+    else:
+        raise Exception("input_file must be ndarray or filename for .npy")
+    if inp.shape[-1] == 1 and input_dtype.is_integer():
+        packed_data = inp.flatten().astype(input_dtype.to_numpy_dt())
+    else:
+        packed_data = pack_innermost_dim_as_hex_string(
+            inp, input_dtype, pad_to_nbits, reverse_inner=reverse_inner
+        )
+        packed_data = packed_data.flatten()
+        packed_data = [int(x[2:], 16) for x in packed_data]
+    return packed_data
+
+
+def rtlsim_output_to_npy(
+    output, path, dtype, shape, packedBits, targetBits, reverse_inner=True
+):
+    """Convert a flattened sequence of Python arbitrary-precision integers
+    output into a NumPy array, saved as npy file at path. Each arbitrary-precision
+    integer is assumed to be a packed array of targetBits-bit elements, which
+    will be unpacked as the innermost dimension of the NumPy array. If path is
+    not None it will also be saved as a npy file."""
+
+    # TODO should have its own testbench?
+    output = np.asarray([hex(int(x)) for x in output])
+    out_array = unpack_innermost_dim_from_hex_string(
+        output, dtype, shape, packedBits=packedBits, reverse_inner=reverse_inner
+    )
+    # make copy before saving the array
+    out_array = out_array.copy()
+    if path is not None:
+        np.save(path, out_array)
+    return out_array
+
+
+def finnpy_to_packed_bytearray(
+    ndarray, dtype, reverse_inner=False, reverse_endian=False, fast_mode=False
+):
+    """Given a numpy ndarray with FINN DataType dtype, pack the innermost
+    dimension and return the packed representation as an ndarray of uint8.
+    The packed innermost dimension will be padded to the nearest multiple
+    of 8 bits. The returned ndarray has the same number of dimensions as the
+    input.
+
+    If fast_mode is enabled, will attempt to use shortcuts  to save
+    on runtime for certain cases:
+    * 8-bit ndarray -> 8-bit
+    * ndarray -> 1-bit and total bits % 8 == 0
+    This mode is currently not well-tested, use at your own risk!
+    """
+
+    # handle fast_mode cases (currently only called from driver):
+    if issubclass(type(ndarray), np.ndarray) and fast_mode:
+        inp_is_byte = ndarray.dtype in [np.uint8, np.int8]
+        out_is_byte = dtype.bitwidth() == 8
+        double_reverse = reverse_inner and reverse_endian
+        # fast mode case: byte -> byte: cast
+        if inp_is_byte and out_is_byte and double_reverse:
+            return ndarray.view(np.uint8)
+        # fast mode case: xxx -> bit with nbits % 8 == 0: np.packbits
+        out_is_bit = dtype.bitwidth() == 1
+        bits = dtype.bitwidth() * ndarray.shape[-1]
+        bits_padded = roundup_to_integer_multiple(bits, 8)
+        no_pad = bits_padded == bits
+        if out_is_bit and no_pad and double_reverse:
+            in_as_int8 = ndarray.astype(np.int8)
+            # bipolar -> binary if needed
+            if dtype == DataType["BIPOLAR"]:
+                in_as_int8 = (in_as_int8 + 1) // 2
+            # reverse inner
+            in_as_int8 = np.flip(in_as_int8, axis=-1)
+            # pack with numpy
+            packed_data = np.packbits(in_as_int8, axis=-1)
+            # reverse endianness and return
+            return np.flip(packed_data, axis=-1)
+
+    if (not issubclass(type(ndarray), np.ndarray)) or ndarray.dtype != np.float32:
+        # try to convert to a float numpy array (container dtype is float)
+        ndarray = np.asarray(ndarray, dtype=np.float32)
+    # pack innermost dim to hex strings padded to 8 bits
+    bits = dtype.bitwidth() * ndarray.shape[-1]
+    bits_padded = roundup_to_integer_multiple(bits, 8)
+    packed_hexstring = pack_innermost_dim_as_hex_string(
+        ndarray, dtype, bits_padded, reverse_inner=reverse_inner
+    )
+
+    def fn(x):
+        return np.asarray(list(map(hexstring2npbytearray, x)))
+
+    if packed_hexstring.ndim == 0:
+        # scalar, call hexstring2npbytearray directly
+        ret = hexstring2npbytearray(np.asscalar(packed_hexstring))
+    else:
+        # convert ndarray of hex strings to byte array
+        ret = np.apply_along_axis(fn, packed_hexstring.ndim - 1, packed_hexstring)
+    if reverse_endian:
+        # reverse the endianness of packing dimension
+        ret = np.flip(ret, axis=-1)
+    return ret
+
+
+def packed_bytearray_to_finnpy(
+    packed_bytearray,
+    dtype,
+    output_shape=None,
+    reverse_inner=False,
+    reverse_endian=False,
+    fast_mode=False,
+):
+    """Given a packed numpy uint8 ndarray, unpack it into a FINN array of
+    given DataType.
+
+    output_shape can be specified to remove padding from the
+    packed dimension, or set to None to be inferred from the input.
+
+    If fast_mode is enabled, will attempt to use shortcuts (casting) to save
+    on runtime for certain cases.
+    This mode is currently not well-tested, use at your own risk.
+
+    """
+
+    if (
+        not issubclass(type(packed_bytearray), np.ndarray)
+    ) or packed_bytearray.dtype != np.uint8:
+        raise Exception("packed_bytearray_to_finnpy needs NumPy uint8 arrays")
+    if packed_bytearray.ndim == 0:
+        raise Exception("packed_bytearray_to_finnpy expects at least 1D ndarray")
+    packed_dim = packed_bytearray.ndim - 1
+    packed_bits = packed_bytearray.shape[packed_dim] * 8
+    target_bits = dtype.bitwidth()
+    if output_shape is None:
+        # determine output shape from input shape
+        assert (
+            packed_bits % target_bits == 0
+        ), """packed_bits are not divisable by
+        target_bits."""
+        n_target_elems = packed_bits // target_bits
+        output_shape = packed_bytearray.shape[:-1] + (n_target_elems,)
+    # handle no-packing cases (if fast_mode) via casting to save on compute
+    out_is_byte = target_bits in [8, 16]
+    double_reverse = reverse_inner and reverse_endian
+    if out_is_byte and double_reverse and fast_mode:
+        no_unpad = np.prod(packed_bytearray.shape) == np.prod(output_shape)
+        if no_unpad:
+            as_np_type = packed_bytearray.view(dtype.to_numpy_dt())
+            return as_np_type.reshape(output_shape).astype(np.float32)
+    if reverse_endian:
+        packed_bytearray = np.flip(packed_bytearray, axis=-1)
+    # convert innermost dim of byte array to hex strings
+    packed_hexstring = np.apply_along_axis(
+        npbytearray2hexstring, packed_dim, packed_bytearray
+    )
+    ret = unpack_innermost_dim_from_hex_string(
+        packed_hexstring, dtype, output_shape, packed_bits, reverse_inner
+    )
+
+    return ret
diff --git a/src/finn/util/fpgadataflow.py b/src/finn/util/fpgadataflow.py
new file mode 100644
index 000000000..769ddb946
--- /dev/null
+++ b/src/finn/util/fpgadataflow.py
@@ -0,0 +1,43 @@
+# Copyright (c) 2020 Xilinx, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of Xilinx nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from qonnx.util.basic import get_by_name, is_finn_op
+
+
+def is_fpgadataflow_node(node):
+    """Returns True if given node is fpgadataflow node. Otherwise False."""
+    is_node = False
+    if node is not None:
+        if is_finn_op(node.domain):
+            n_backend = get_by_name(node.attribute, "backend")
+            if n_backend is not None:
+                backend_value = n_backend.s.decode("UTF-8")
+                if backend_value == "fpgadataflow":
+                    is_node = True
+
+    return is_node
diff --git a/src/finn/util/hls.py b/src/finn/util/hls.py
new file mode 100644
index 000000000..fb23af046
--- /dev/null
+++ b/src/finn/util/hls.py
@@ -0,0 +1,74 @@
+# Copyright (c) 2021 Xilinx, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of Xilinx nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+import os
+import subprocess
+
+from finn.util.basic import which
+
+
+class CallHLS:
+    """Call either vivado_hls or vitis_hls to run HLS build tcl scripts."""
+
+    def __init__(self, backend="vivado_hls"):
+        self.tcl_script = ""
+        self.ipgen_path = ""
+        self.code_gen_dir = ""
+        self.ipgen_script = ""
+        assert backend in [
+            "vivado_hls",
+            "vitis_hls",
+        ], "Unrecognized backend for CallHLS"
+        self.backend = backend
+
+    def append_tcl(self, tcl_script):
+        """Sets the tcl script to be executed."""
+        self.tcl_script = tcl_script
+
+    def set_ipgen_path(self, path):
+        """Sets member variable ipgen_path to given path."""
+        self.ipgen_path = path
+
+    def build(self, code_gen_dir):
+        """Builds the bash script with given parameters and saves it in given folder.
+        To guarantee the generation in the correct folder the bash script contains a
+        cd command."""
+        assert which(self.backend) is not None, "%s not found in PATH" % self.backend
+        self.code_gen_dir = code_gen_dir
+        self.ipgen_script = str(self.code_gen_dir) + "/ipgen.sh"
+        working_dir = os.environ["PWD"]
+        f = open(self.ipgen_script, "w")
+        f.write("#!/bin/bash \n")
+        f.write("cd {}\n".format(code_gen_dir))
+        f.write("%s %s\n" % (self.backend, self.tcl_script))
+        f.write("cd {}\n".format(working_dir))
+        f.close()
+        bash_command = ["bash", self.ipgen_script]
+        process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE)
+        process_compile.communicate()
diff --git a/src/finn/util/platforms.py b/src/finn/util/platforms.py
new file mode 100644
index 000000000..8212cb571
--- /dev/null
+++ b/src/finn/util/platforms.py
@@ -0,0 +1,480 @@
+# Copyright (c) 2021, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import numpy as np
+from abc import abstractmethod
+
+# contains the amount of available FPGA resources for several
+# Xilinx platforms, as well as certain resource limit guidelines
+# for creating designs that can achieve timing closure
+
+# explicit value for res types/costs we don't care about
+DONT_CARE = -1
+# recommended resource limits from Xilinx for timing closure
+# respectively for LUT, FF, BRAM_18K, URAM, DSP res types
+DEFAULT_RES_LIMITS = np.array([0.7, 0.5, 0.80, 0.80, 0.80])
+DEFAULT_AVG_CONSTRAINTS = [((2, 3, 4), 0.7)]  #
+
+# resources required to instantiate certain infrastructure components
+# such as memory controllers and network interfaces
+DDR_RESOURCE_REQUIREMENTS = {
+    "LUT": 33256,
+    "FF": 44889,
+    "BRAM_18K": 199,
+    "URAM": 0,
+    "DSP": 3,
+}
+HBM_RESOURCE_REQUIREMENTS = {
+    "LUT": 10718,
+    "FF": 21793,
+    "BRAM_18K": 8,
+    "URAM": 0,
+    "DSP": 0,
+}
+
+# we assume use of VNx Alveo UDP stack
+# see: https://gitenterprise.xilinx.com/mruiznog/vitis_network_layer
+ETH_RESOURCE_REQUIREMENTS = {
+    "LUT": 35219,
+    "FF": 86269,
+    "BRAM_18K": 183,
+    "URAM": 0,
+    "DSP": 0,
+}
+
+
+class Platform:
+    def __init__(
+        self,
+        nslr=1,
+        ndevices=1,
+        sll_count=[],
+        hbm_slr=-1,
+        ddr_slr=[0],
+        eth_slr=0,
+        eth_gbps=0,
+        limits=DEFAULT_RES_LIMITS,
+        avg_constraints=DEFAULT_AVG_CONSTRAINTS,
+    ):
+        self.nslr = nslr
+        self.sll_count = sll_count
+        self.eth_slr = eth_slr
+        self.eth_gbps = eth_gbps
+        self.ndevices = ndevices
+        self.hbm_slr = hbm_slr
+        self.ddr_slr = ddr_slr
+        # limits must be a np.array either of
+        # the same shape as compute_resources
+        # or broadcastable to it
+        self.res_limits = limits
+        # list of tuples of the form ( tuple of resource positions to avg, limit )
+        self.avg_constraints = avg_constraints
+
+    @property
+    @abstractmethod
+    def compute_resources(self):
+        pass
+
+    @property
+    def guide_resources(self):
+        guide = []
+        # TODO: assert limits is of correct size
+        guide_res = (
+            np.tile(np.array(self.compute_resources), (self.ndevices, 1))
+        ).astype(int)
+        for i in range(self.nslr * self.ndevices):
+            # when in multi-FPGA mode, subtract cost of UDP connection from eth_slr
+            local_slr = i % self.nslr
+            if self.ndevices > 1 and local_slr == self.eth_slr:
+                guide_res[i][0] -= ETH_RESOURCE_REQUIREMENTS["LUT"]
+                guide_res[i][1] -= ETH_RESOURCE_REQUIREMENTS["FF"]
+                guide_res[i][2] -= ETH_RESOURCE_REQUIREMENTS["BRAM_18K"]
+                guide_res[i][3] -= ETH_RESOURCE_REQUIREMENTS["URAM"]
+                guide_res[i][4] -= ETH_RESOURCE_REQUIREMENTS["DSP"]
+            # subtract the cost of memory controllers
+            # if we have a choice between DDR and HBM, use HBM
+            if local_slr == self.hbm_slr:
+                guide_res[i][0] -= HBM_RESOURCE_REQUIREMENTS["LUT"]
+                guide_res[i][1] -= HBM_RESOURCE_REQUIREMENTS["FF"]
+                guide_res[i][2] -= HBM_RESOURCE_REQUIREMENTS["BRAM_18K"]
+                guide_res[i][3] -= HBM_RESOURCE_REQUIREMENTS["URAM"]
+                guide_res[i][4] -= HBM_RESOURCE_REQUIREMENTS["DSP"]
+            elif local_slr in self.ddr_slr:
+                guide_res[i][0] -= DDR_RESOURCE_REQUIREMENTS["LUT"]
+                guide_res[i][1] -= DDR_RESOURCE_REQUIREMENTS["FF"]
+                guide_res[i][2] -= DDR_RESOURCE_REQUIREMENTS["BRAM_18K"]
+                guide_res[i][3] -= DDR_RESOURCE_REQUIREMENTS["URAM"]
+                guide_res[i][4] -= DDR_RESOURCE_REQUIREMENTS["DSP"]
+            guide.append(list(guide_res[i]))
+        return guide
+
+    @property
+    def resource_count_dict(self):
+        res = dict()
+        for i in range(self.nslr * self.ndevices):
+            slr_res = dict()
+            slr_res["LUT"] = self.compute_resources[i % self.nslr][0]
+            slr_res["FF"] = self.compute_resources[i % self.nslr][1]
+            slr_res["BRAM_18K"] = self.compute_resources[i % self.nslr][2]
+            slr_res["URAM"] = self.compute_resources[i % self.nslr][3]
+            slr_res["DSP"] = self.compute_resources[i % self.nslr][4]
+            res["slr" + str(i)] = slr_res
+        return res
+
+    @property
+    def compute_connection_cost(self):
+        x = np.full((self.nslr * self.ndevices, self.nslr * self.ndevices), DONT_CARE)
+        # build connection cost matrix for one device's SLRs
+        xlocal = np.full((self.nslr, self.nslr), DONT_CARE)
+        for i in range(self.nslr):
+            for j in range(self.nslr):
+                if i == j:
+                    xlocal[i][j] = 0
+                elif abs(i - j) == 1:
+                    xlocal[i][j] = 1
+        # tile connection cost matrices for entire system
+        for i in range(self.ndevices):
+            x[
+                i * self.nslr : (i + 1) * self.nslr, i * self.nslr : (i + 1) * self.nslr
+            ] = xlocal
+        # set cost for ethernet connections, assuming daisy-chaining
+        for i in range(self.ndevices - 1):
+            x[i * self.nslr + self.eth_slr][(i + 1) * self.nslr + self.eth_slr] = 10
+            x[(i + 1) * self.nslr + self.eth_slr][i * self.nslr + self.eth_slr] = 10
+        return x
+
+    @property
+    def compute_connection_resource(self):
+        sll = np.full((self.nslr * self.ndevices, self.nslr * self.ndevices), 0)
+        # build connection resource matrix for one device's SLRs
+        slllocal = np.full((self.nslr, self.nslr), -1)
+        for i in range(self.nslr):
+            for j in range(self.nslr):
+                if i == j:
+                    # no SLL constraint when going from one SLR to itself
+                    slllocal[i][j] = -1
+                else:
+                    slllocal[i][j] = self.sll_count[i][j]
+        # tile connection cost matrices for entire system
+        for i in range(self.ndevices):
+            sll[
+                i * self.nslr : (i + 1) * self.nslr, i * self.nslr : (i + 1) * self.nslr
+            ] = slllocal
+        # set cost for ethernet connections, assuming daisy-chaining
+        eth = np.full((self.nslr * self.ndevices, self.nslr * self.ndevices), 0)
+        # no Eth throughput constraints from one SLR to itself
+        for i in range(self.ndevices * self.nslr):
+            eth[i][i] = -1
+        # apply symmetric ETH throughput constraints between the SLRs that have GTXes
+        for i in range(self.ndevices - 1):
+            eth[i * self.nslr + self.eth_slr][
+                (i + 1) * self.nslr + self.eth_slr
+            ] = self.eth_gbps * (10**9)
+            eth[(i + 1) * self.nslr + self.eth_slr][
+                i * self.nslr + self.eth_slr
+            ] = self.eth_gbps * (10**9)
+        # pack sll and eth info in one list-of-list-of-tuple structure
+        constraints = []
+        for i in range(self.ndevices * self.nslr):
+            constraints_line = []
+            for j in range(self.ndevices * self.nslr):
+                # make sure not to constrain both resources at the same time
+                # constrain for Eth throughput between SLRs on different devices
+                # constrain for SLLs between SLRs on same device
+                is_offchip = i // self.nslr != j // self.nslr
+                constraints_line.append(
+                    (-1 if is_offchip else sll[i][j], eth[i][j] if is_offchip else -1)
+                )
+            constraints.append(constraints_line)
+        return constraints
+
+    def map_device_to_slr(self, idx):
+        """Given a global SLR index, return device id and local slr index"""
+        assert idx <= self.nslr * self.ndevices
+        return (idx % self.nslr, idx // self.nslr)
+
+
+class Zynq7020_Platform(Platform):
+    def __init__(
+        self,
+        ndevices=1,
+        limits=DEFAULT_RES_LIMITS,
+        avg_constraints=DEFAULT_AVG_CONSTRAINTS,
+    ):
+        super(Zynq7020_Platform, self).__init__(
+            nslr=1,
+            ndevices=ndevices,
+            sll_count=[[0]],
+            ddr_slr=[],
+            eth_slr=0,
+            eth_gbps=1,
+            limits=limits,
+            avg_constraints=avg_constraints,
+        )
+
+    @property
+    def compute_resources(self):
+        return [[53200, 2 * 53200, 280, 0, 220] for i in range(1)]
+
+
+class ZU3EG_Platform(Platform):
+    def __init__(
+        self,
+        ndevices=1,
+        limits=DEFAULT_RES_LIMITS,
+        avg_constraints=DEFAULT_AVG_CONSTRAINTS,
+    ):
+        super(ZU3EG_Platform, self).__init__(
+            nslr=1,
+            ndevices=ndevices,
+            sll_count=[[0]],
+            ddr_slr=[],
+            eth_slr=0,
+            eth_gbps=1,
+            limits=limits,
+            avg_constraints=avg_constraints,
+        )
+
+    @property
+    def compute_resources(self):
+        return [[71000, 2 * 71000, 412, 0, 360] for i in range(1)]
+
+
+class ZU7EV_Platform(Platform):
+    def __init__(
+        self,
+        ndevices=1,
+        limits=DEFAULT_RES_LIMITS,
+        avg_constraints=DEFAULT_AVG_CONSTRAINTS,
+    ):
+        super(ZU7EV_Platform, self).__init__(
+            nslr=1,
+            ndevices=ndevices,
+            sll_count=[[0]],
+            ddr_slr=[],
+            eth_slr=0,
+            eth_gbps=1,
+            limits=limits,
+            avg_constraints=avg_constraints,
+        )
+
+    @property
+    def compute_resources(self):
+        return [[230000, 2 * 230000, 610, 92, 1728] for i in range(1)]
+
+
+class ZU9EG_Platform(Platform):
+    def __init__(
+        self,
+        ndevices=1,
+        limits=DEFAULT_RES_LIMITS,
+        avg_constraints=DEFAULT_AVG_CONSTRAINTS,
+    ):
+        super(ZU9EG_Platform, self).__init__(
+            nslr=1,
+            ndevices=ndevices,
+            sll_count=[[0]],
+            ddr_slr=[],
+            eth_slr=0,
+            eth_gbps=1,
+            limits=limits,
+            avg_constraints=avg_constraints,
+        )
+
+    @property
+    def compute_resources(self):
+        return [[274000, 2 * 274000, 1824, 0, 2520] for i in range(1)]
+
+
+class ZU28DR_Platform(Platform):
+    def __init__(
+        self,
+        ndevices=1,
+        limits=DEFAULT_RES_LIMITS,
+        avg_constraints=DEFAULT_AVG_CONSTRAINTS,
+    ):
+        super(ZU28DR_Platform, self).__init__(
+            nslr=1,
+            ndevices=ndevices,
+            sll_count=[[0]],
+            ddr_slr=[],
+            eth_slr=0,
+            eth_gbps=1,
+            limits=limits,
+            avg_constraints=avg_constraints,
+        )
+
+    @property
+    def compute_resources(self):
+        return [[425000, 2 * 425000, 2160, 80, 4272] for i in range(1)]
+
+
+class Alveo_NxU50_Platform(Platform):
+    def __init__(
+        self,
+        ndevices=1,
+        limits=DEFAULT_RES_LIMITS,
+        avg_constraints=DEFAULT_AVG_CONSTRAINTS,
+    ):
+        # according to Vivado: 23040 SLR0 <-> SLR1
+        sll_counts = [[0, 5000], [5000, 0]]
+        super(Alveo_NxU50_Platform, self).__init__(
+            nslr=2,
+            ndevices=ndevices,
+            sll_count=sll_counts,
+            ddr_slr=[],
+            hbm_slr=0,
+            eth_slr=1,
+            eth_gbps=100,
+            limits=limits,
+            avg_constraints=avg_constraints,
+        )
+
+    @property
+    def compute_resources(self):
+        # According to UG1120:
+        # U50 has identical resource counts on both SLRs
+        # return [[365000,2*365000,2*564, 304, 2580] for i in range(2)]
+        # we observe from Vivado that the resource counts are actually:
+        return [
+            [374400, 2 * 374400, 2 * 564, 304, 2592],
+            [368160, 2 * 368160, 2 * 564, 304, 2760],
+        ]
+
+
+class Alveo_NxU200_Platform(Platform):
+    def __init__(
+        self,
+        ndevices=1,
+        limits=DEFAULT_RES_LIMITS,
+        avg_constraints=DEFAULT_AVG_CONSTRAINTS,
+    ):
+        sll_counts = [[0, 5000, 0], [5000, 0, 5000], [0, 5000, 0]]
+        super(Alveo_NxU200_Platform, self).__init__(
+            nslr=3,
+            ndevices=ndevices,
+            sll_count=sll_counts,
+            ddr_slr=[0, 2],
+            eth_slr=2,
+            eth_gbps=100,
+            limits=limits,
+            avg_constraints=avg_constraints,
+        )
+
+    @property
+    def compute_resources(self):
+        # According to UG1120:
+        # return [[355000, 723000, 2*638, 320, 2265],
+        #        [160000, 331000, 2*326, 160, 1317],
+        #        [355000, 723000, 2*638, 320, 2265]]
+        # we observe from Vivado that the resource counts are actually:
+        return [
+            [385920, 2 * 385920, 2 * 714, 320, 2268],
+            [199680, 2 * 199680, 2 * 420, 160, 1320],
+            [385920, 2 * 385920, 2 * 714, 320, 2268],
+        ]
+
+
+class Alveo_NxU250_Platform(Platform):
+    def __init__(
+        self,
+        ndevices=1,
+        limits=DEFAULT_RES_LIMITS,
+        avg_constraints=DEFAULT_AVG_CONSTRAINTS,
+    ):
+        sll_counts = [
+            [0, 5000, 0, 0],
+            [5000, 0, 5000, 0],
+            [0, 5000, 0, 5000],
+            [0, 0, 5000, 0],
+        ]
+        super(Alveo_NxU250_Platform, self).__init__(
+            nslr=4,
+            ndevices=ndevices,
+            sll_count=sll_counts,
+            ddr_slr=[0, 1, 2, 3],
+            eth_slr=3,
+            eth_gbps=100,
+            limits=limits,
+            avg_constraints=avg_constraints,
+        )
+
+    @property
+    def compute_resources(self):
+        # According to UG1120:
+        # U250 has identical resource counts on all 4 SLRs:
+        # return [[345000,2*345000,2*500, 320, 2877] for i in range(4)]
+        # we observe from Vivado that the resource counts are actually:
+        return [[375000, 2 * 375000, 2 * 576, 320, 2880] for i in range(4)]
+
+
+class Alveo_NxU280_Platform(Platform):
+    def __init__(
+        self,
+        ndevices=1,
+        limits=DEFAULT_RES_LIMITS,
+        avg_constraints=DEFAULT_AVG_CONSTRAINTS,
+    ):
+        sll_counts = [[0, 5000, 0], [5000, 0, 5000], [0, 5000, 0]]
+        super(Alveo_NxU280_Platform, self).__init__(
+            nslr=3,
+            ndevices=ndevices,
+            sll_count=sll_counts,
+            ddr_slr=[0, 1],
+            hbm_slr=0,
+            eth_slr=2,
+            eth_gbps=100,
+            limits=limits,
+            avg_constraints=avg_constraints,
+        )
+
+    @property
+    def compute_resources(self):
+        # according to UG1120
+        # return [[369000, 746000, 2*507, 320, 2733],
+        #        [333000, 675000, 2*468, 320, 2877],
+        #        [367000, 729000, 2*512, 320, 2880]]
+        # observed from Vivado:
+        return [
+            [400800, 2 * 400800, 2 * 600, 320, 2736],
+            [382080, 2 * 382080, 2 * 576, 320, 2880],
+            [380640, 2 * 380640, 2 * 576, 320, 2880],
+        ]
+
+
+platforms = dict()
+platforms["U50"] = Alveo_NxU50_Platform
+platforms["U200"] = Alveo_NxU200_Platform
+platforms["U250"] = Alveo_NxU250_Platform
+platforms["U280"] = Alveo_NxU280_Platform
+platforms["Pynq-Z1"] = Zynq7020_Platform
+platforms["Pynq-Z2"] = Zynq7020_Platform
+platforms["Ultra96"] = ZU3EG_Platform
+platforms["ZCU104"] = ZU7EV_Platform
+platforms["ZCU102"] = ZU9EG_Platform
+platforms["ZCU111"] = ZU28DR_Platform
diff --git a/src/finn/util/vivado.py b/src/finn/util/vivado.py
new file mode 100644
index 000000000..bc8ca40d8
--- /dev/null
+++ b/src/finn/util/vivado.py
@@ -0,0 +1,83 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+
+from finn.util.basic import launch_process_helper, which
+
+
+def out_of_context_synth(
+    verilog_dir,
+    top_name,
+    fpga_part="xczu3eg-sbva484-1-e",
+    clk_name="ap_clk_0",
+    clk_period_ns=5.0,
+):
+    "Run out-of-context Vivado synthesis, return resources and slack."
+
+    # ensure that the OH_MY_XILINX envvar is set
+    if "OHMYXILINX" not in os.environ:
+        raise Exception("The environment variable OHMYXILINX is not defined.")
+    # ensure that vivado is in PATH: source $VIVADO_PATH/settings64.sh
+    if which("vivado") is None:
+        raise Exception("vivado is not in PATH, ensure settings64.sh is sourced.")
+    omx_path = os.environ["OHMYXILINX"]
+    script = "vivadocompile.sh"
+    # vivadocompile.sh <top-level-entity> <clock-name (optional)> <fpga-part (optional)>
+    call_omx = "zsh %s/%s %s %s %s %f" % (
+        omx_path,
+        script,
+        top_name,
+        clk_name,
+        fpga_part,
+        float(clk_period_ns),
+    )
+    call_omx = call_omx.split()
+    launch_process_helper(call_omx, proc_env=os.environ.copy(), cwd=verilog_dir)
+
+    vivado_proj_folder = "%s/results_%s" % (verilog_dir, top_name)
+    res_counts_path = vivado_proj_folder + "/res.txt"
+
+    with open(res_counts_path, "r") as myfile:
+        res_data = myfile.read().split("\n")
+    ret = {}
+    ret["vivado_proj_folder"] = vivado_proj_folder
+    for res_line in res_data:
+        res_fields = res_line.split("=")
+        print(res_fields)
+        try:
+            ret[res_fields[0]] = float(res_fields[1])
+        except ValueError:
+            ret[res_fields[0]] = 0
+        except IndexError:
+            ret[res_fields[0]] = 0
+    if ret["WNS"] == 0:
+        ret["fmax_mhz"] = 0
+    else:
+        ret["fmax_mhz"] = 1000.0 / (clk_period_ns - ret["WNS"])
+    return ret
-- 
GitLab