Merge remote-tracking branch 'origin/dev' into optimize_streamer

5d1b1740 · Lucian Petrica · f78176a6 · dda60a00 · 5d1b1740 · 5d1b1740
Commit 5d1b1740 authored 4 years ago by Lucian Petrica
--- a/.gitignore
+++ b/.gitignore
@@ -81,3 +81,6 @@ MANIFEST

 # SSH key dir mounted into Docker
 /ssh_keys/
+
+# PYNQ board files
+/board_files/
--- a/docker/Dockerfile.finn_ci
+++ b/docker/Dockerfile.finn_ci
@@ -37,7 +37,7 @@ RUN apt-get update
 RUN apt-get -y upgrade
 RUN apt-get install -y build-essential libglib2.0-0 libsm6 libxext6 libxrender-dev
 RUN apt-get install -y verilator zsh
-RUN apt-get -y install sshpass
+RUN apt-get -y install sshpass wget unzip
 RUN echo "StrictHostKeyChecking no" >> /etc/ssh/ssh_config

 # cloning dependency repos

--- a/docker/Dockerfile.finn_dev
+++ b/docker/Dockerfile.finn_dev
@@ -43,7 +43,7 @@ RUN apt-get update
 RUN apt-get -y upgrade
 RUN apt-get install -y build-essential libglib2.0-0 libsm6 libxext6 libxrender-dev
 RUN apt-get install -y verilator nano zsh rsync
-RUN apt-get -y install sshpass
+RUN apt-get -y install sshpass wget unzip
 RUN echo "StrictHostKeyChecking no" >> /etc/ssh/ssh_config

 COPY requirements.txt .

--- a/docker/finn_entrypoint.sh
+++ b/docker/finn_entrypoint.sh
@@ -57,4 +57,19 @@ if [ ! -z "$VITIS_PATH" ];then
  export XILINX_VITIS=$VITIS_PATH
  source $VITIS_PATH/settings64.sh
 fi
+
+# download PYNQ board files if not already there
+if [ ! -d "/workspace/finn/board_files" ]; then
+    gecho "Downloading PYNQ board files for Vivado"
+    wget -q https://github.com/cathalmccabe/pynq-z1_board_files/raw/master/pynq-z1.zip
+    wget -q https://d2m32eurp10079.cloudfront.net/Download/pynq-z2.zip
+    unzip -q pynq-z1.zip
+    unzip -q pynq-z2.zip
+    mkdir /workspace/finn/board_files
+    mv pynq-z1/ board_files/
+    mv pynq-z2/ board_files/
+    rm pynq-z1.zip
+    rm pynq-z2.zip
+fi
+
 exec "$@"
--- a/notebooks/end2end_example/cnv_end2end_example.ipynb
+++ b/notebooks/end2end_example/cnv_end2end_example.ipynb
@@ -574,7 +574,7 @@
    "target_dir = os.getenv(\"PYNQ_TARGET_DIR\", \"/home/xilinx/finn\")\n",
    "\n",
    "model = ModelWrapper(build_dir + \"/end2end_cnv_w1a1_synth.onnx\")\n",
-    "model = model.transform(MakePYNQDriver())\n",
+    "model = model.transform(MakePYNQDriver(platform="zynq"))\n",
    "model = model.transform(DeployToPYNQ(ip, port, username, password, target_dir))\n",
    "model.save(build_dir + \"/end2end_cnv_w1a1_pynq_deploy.onnx\")"
   ]

--- a/notebooks/end2end_example/tfc_end2end_example.ipynb
+++ b/notebooks/end2end_example/tfc_end2end_example.ipynb
@@ -730,7 +730,7 @@
       " 'ip_path': ('s', False, ''),\n",
       " 'ip_vlnv': ('s', False, ''),\n",
       " 'exec_mode': ('s', False, ''),\n",
-       " 'sim_cycles': ('i', False, 0),\n",
+       " 'cycles_rtlsim': ('i', False, 0),\n",
       " 'rtlsim_trace': ('s', False, ''),\n",
       " 'res_estimate': ('s', False, ''),\n",
       " 'res_hls': ('s', False, ''),\n",
@@ -1422,7 +1422,7 @@
   "source": [
    "from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver\n",
    "model = ModelWrapper(build_dir + \"/tfc_w1_a1_post_synthesis.onnx\")\n",
-    "model = model.transform(MakePYNQDriver())"
+    "model = model.transform(MakePYNQDriver(platform="zynq"))"
   ]
  },
  {

--- a/src/finn/analysis/fpgadataflow/exp_cycles_per_layer.py
+++ b/src/finn/analysis/fpgadataflow/exp_cycles_per_layer.py
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import finn.custom_op.registry as registry
+from finn.util.fpgadataflow import is_fpgadataflow_node
+
+
+def exp_cycles_per_layer(model):
+    """Estimates the number of cycles per sample for dataflow layers in the given model.
+    Ensure that all nodes have unique names (by calling the GiveUniqueNodeNames
+    transformation) prior to calling this analysis pass to ensure all nodes are
+    visible in the results.
+
+    Returns {node name : cycle estimation}."""
+
+    cycle_dict = {}
+    for node in model.graph.node:
+        if is_fpgadataflow_node(node) is True:
+            op_type = node.op_type
+            inst = registry.custom_op[op_type](node)
+            cycle_dict[node.name] = inst.get_exp_cycles()
+
+    return cycle_dict
--- a/src/finn/analysis/fpgadataflow/hls_synth_res_estimation.py
+++ b/src/finn/analysis/fpgadataflow/hls_synth_res_estimation.py
@@ -35,6 +35,9 @@ from finn.util.fpgadataflow import is_fpgadataflow_node

 def hls_synth_res_estimation(model):
    """Extracts the FPGA resource results from the Vivado HLS synthesis estimates.
+    Ensure that all nodes have unique names (by calling the GiveUniqueNodeNames
+    transformation) prior to calling this analysis pass to ensure all nodes are
+    visible in the results.

    Returns {node name : resources_dict}."""


--- a/src/finn/analysis/fpgadataflow/post_synth_res.py
+++ b/src/finn/analysis/fpgadataflow/post_synth_res.py
@@ -30,15 +30,23 @@ import os
 import xml.etree.ElementTree as ET

 from finn.transformation.move_reshape import _is_fpgadataflow_node
+from finn.core.modelwrapper import ModelWrapper
+from finn.custom_op.registry import getCustomOp


-def post_synth_res(model):
+def post_synth_res(model, override_synth_report_filename=None):
    """Extracts the FPGA resource results from the Vivado synthesis.
+    Ensure that all nodes have unique names (by calling the GiveUniqueNodeNames
+    transformation) prior to calling this analysis pass to ensure all nodes are
+    visible in the results.

    Returns {node name : resources_dict}."""

    res_dict = {}
-    synth_report_filename = model.get_metadata_prop("vivado_synth_rpt")
+    if override_synth_report_filename is not None:
+        synth_report_filename = override_synth_report_filename
+    else:
+        synth_report_filename = model.get_metadata_prop("vivado_synth_rpt")
    if os.path.isfile(synth_report_filename):
        tree = ET.parse(synth_report_filename)
        root = tree.getroot()
@@ -50,7 +58,11 @@ def post_synth_res(model):
        raise Exception("Please run synthesis first")

    for node in model.graph.node:
-        if _is_fpgadataflow_node(node):
+        if node.op_type == "StreamingDataflowPartition":
+            sdp_model = ModelWrapper(getCustomOp(node).get_nodeattr("model"))
+            sdp_res_dict = post_synth_res(sdp_model, synth_report_filename)
+            res_dict.update(sdp_res_dict)
+        elif _is_fpgadataflow_node(node):
            row = root.findall(".//*[@contents='%s']/.." % node.name)
            if row != []:
                node_dict = {}

--- a/src/finn/analysis/fpgadataflow/res_estimation.py
+++ b/src/finn/analysis/fpgadataflow/res_estimation.py
@@ -32,6 +32,9 @@ from finn.util.fpgadataflow import is_fpgadataflow_node

 def res_estimation(model):
    """Estimates the resources needed for the given model.
+    Ensure that all nodes have unique names (by calling the GiveUniqueNodeNames
+    transformation) prior to calling this analysis pass to ensure all nodes are
+    visible in the results.

    Returns {node name : resource estimation}."""


--- a/src/finn/core/onnx_exec.py
+++ b/src/finn/core/onnx_exec.py
@@ -51,8 +51,20 @@ def execute_node(node, context, graph):
    if node.op_type == "StreamingDataflowPartition":
        sdp_node = getCustomOp(node)
        model = ModelWrapper(sdp_node.get_nodeattr("model"))
-        ret = execute_onnx(model, context, True)
-        context.update(ret)
+        inp_ctx = dict(filter(lambda x: x[0] in node.input, context.items()))
+        # input may have been renamed in partition
+        assert len(inp_ctx) == 1
+        old_iname = node.input[0]
+        new_iname = model.graph.input[0].name
+        if old_iname != new_iname:
+            inp_ctx[new_iname] = inp_ctx[old_iname]
+            del inp_ctx[old_iname]
+        ret = execute_onnx(model, inp_ctx, False)
+        # output may have been renamed in partition
+        assert len(ret) == 1
+        node_oname = node.output[0]
+        model_oname = model.graph.output[0].name
+        context[node_oname] = ret[model_oname]
    else:
        if node.domain == "finn":


--- a/src/finn/core/remote_exec.py
+++ b/src/finn/core/remote_exec.py
@@ -62,11 +62,15 @@ def remote_exec(model, execution_context):
    bash_command = ["/bin/bash", "-c", cmd]
    process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE)
    process_compile.communicate()
+    # set platform attribute for correct remote execution
+    platform = model.get_metadata_prop("platform")
+    assert platform in ["alveo", "zynq", "zynq-iodma"]
    cmd = (
        "sshpass -p {} ssh {}@{} -p {} "
        '"cd {}/{}; echo "{}" | '
        'sudo -S python3.6 driver.py --exec_mode="execute" --batchsize=1" '
-        '--bitfile="resizer.bit" --inputfile="input.npy" --outputfile="output.npy"'
+        '--bitfile="resizer.bit" --inputfile="input.npy" --outputfile="output.npy" '
+        '--platform="{}" '
    ).format(
        pynq_password,
        pynq_username,
@@ -75,6 +79,7 @@ def remote_exec(model, execution_context):
        pynq_target_dir,
        deployment_folder,
        pynq_password,
+        platform,
    )
    bash_command = ["/bin/bash", "-c", cmd]
    process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE)

--- a/src/finn/core/rtlsim_exec.py
+++ b/src/finn/core/rtlsim_exec.py
@@ -102,7 +102,7 @@ def rtlsim_exec(model, execution_context):
        sim = PyVerilator(rtlsim_so, auto_eval=False)
    ret = _run_rtlsim(sim, packed_input, num_out_values, trace_file)
    packed_output = ret[0]
-    model.set_metadata_prop("sim_cycles", str(ret[1]))
+    model.set_metadata_prop("cycles_rtlsim", str(ret[1]))
    # unpack output and put into context
    o_folded_tensor = rtlsim_output_to_npy(
        packed_output, None, o_dt, o_folded_shape, packedBits, targetBits
@@ -171,7 +171,7 @@ def _run_rtlsim(sim, inp, num_out_values, trace_file=None, reset=True):
        no_change_count = no_change_count + 1

        if len(outputs) == num_out_values:
-            sim_cycles = observation_count
+            cycles_rtlsim = observation_count
            output_observed = True

        if no_change_count == liveness_threshold:
@@ -191,4 +191,4 @@ def _run_rtlsim(sim, inp, num_out_values, trace_file=None, reset=True):
        sim.flush_vcd_trace()
        sim.stop_vcd_trace()

-    return (outputs, sim_cycles)
+    return (outputs, cycles_rtlsim)
--- a/src/finn/core/throughput_test.py
+++ b/src/finn/core/throughput_test.py
@@ -125,7 +125,7 @@ def throughput_test_rtlsim(model, batchsize=100):
    os.environ["LIVENESS_THRESHOLD"] = "-1"
    rtlsim_exec(model, ctx)
    # extract metrics
-    cycles = int(model.get_metadata_prop("sim_cycles"))
+    cycles = int(model.get_metadata_prop("cycles_rtlsim"))
    clk_ns = float(model.get_metadata_prop("clk_ns"))
    fclk_mhz = 1 / (clk_ns * 0.001)
    runtime_s = (cycles * clk_ns) * (10 ** -9)

--- a/src/finn/custom_op/fpgadataflow/__init__.py
+++ b/src/finn/custom_op/fpgadataflow/__init__.py
@@ -82,7 +82,8 @@ class HLSCustomOp(CustomOp):
            "ip_path": ("s", False, ""),
            "ip_vlnv": ("s", False, ""),
            "exec_mode": ("s", False, ""),
-            "sim_cycles": ("i", False, 0),
+            "cycles_rtlsim": ("i", False, 0),
+            "cycles_estimate": ("i", False, 0),
            "rtlsim_trace": ("s", False, ""),
            "res_estimate": ("s", False, ""),
            "res_hls": ("s", False, ""),
@@ -209,6 +210,12 @@ class HLSCustomOp(CustomOp):
        HLSCustomOp class but has to be filled by every node"""
        return 0

+    def get_exp_cycles(self):
+        """Function for estimation of expected cycles for set folding,
+        is member function of HLSCustomOp class but has to be filled
+        by every node"""
+        return 0
+
    def code_generation_ipgen(self, model, fpgapart, clk):
        """Generates c++ code and tcl script for ip generation."""
        node = self.onnx_node
@@ -436,7 +443,7 @@ compilation transformations?
            no_change_count = no_change_count + 1

            if len(outputs) == num_out_values:
-                self.set_nodeattr("sim_cycles", observation_count)
+                self.set_nodeattr("cycles_rtlsim", observation_count)
                output_observed = True

            if no_change_count == liveness_threshold:
@@ -465,7 +472,7 @@ compilation transformations?
            trace_file = self.onnx_node.name + ".vcd"
        num_out_values = self.get_number_output_values()
        total_cycle_count = rtlsim_multi_io(sim, io_dict, num_out_values, trace_file)
-        self.set_nodeattr("sim_cycles", total_cycle_count)
+        self.set_nodeattr("cycles_rtlsim", total_cycle_count)

    def execute_node(self, context, graph):
        """Executes single node using cppsim or rtlsim."""

--- a/src/finn/custom_op/fpgadataflow/addstreams_batch.py
+++ b/src/finn/custom_op/fpgadataflow/addstreams_batch.py
@@ -170,6 +170,10 @@ class AddStreams_Batch(HLSCustomOp):
    def get_number_output_values(self):
        return np.prod(self.get_folded_output_shape()[:-1])

+    def get_exp_cycles(self):
+        # Channels/PE * batch size * fmdim * fmdim
+        return np.prod(self.get_folded_output_shape()[:-1])
+
    def execute_node(self, context, graph):
        mode = self.get_nodeattr("exec_mode")
        node = self.onnx_node

--- a/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py
+++ b/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py
@@ -224,6 +224,10 @@ class ChannelwiseOp_Batch(HLSCustomOp):
        nf = np.prod(self.get_folded_output_shape()[:-1])
        return nf

+    def get_exp_cycles(self):
+        # Channels/PE * batch size * fmdim * fmdim
+        return np.prod(self.get_folded_output_shape()[:-1])
+
    def get_template_param_values(self):
        """Returns the template parameter values according to input, output and weight
        data types."""

--- a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py
+++ b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py
@@ -177,6 +177,23 @@ class ConvolutionInputGenerator(HLSCustomOp):
        num_output_elems = np.prod(folded_oshape[:-1])
        return num_output_elems

+    def get_exp_cycles(self):
+        simd = self.get_nodeattr("SIMD")
+        ifm_ch = self.get_nodeattr("IFMChannels")
+        k = self.get_nodeattr("ConvKernelDim")
+        ifm_dim = self.get_nodeattr("IFMDim")
+        ofm_dim = self.get_nodeattr("OFMDim")
+        stride = self.get_nodeattr("Stride")
+        # since mmv != 1 is not supported yet, we set mmv for now to 1
+        mmv = 1
+        # see https://github.com/Xilinx/finn-hlslib/blob/master/slidingwindow.h
+        cycles_write_block = (ofm_dim * k * k * (ifm_ch / simd)) / mmv
+        cycles_read_block = stride * ifm_dim * (ifm_ch / simd)
+        max_cycles = max(cycles_write_block, cycles_read_block)
+        exp_cycles = ifm_dim * k * (ifm_ch / simd) + ofm_dim * max_cycles
+
+        return int(exp_cycles)
+
    def execute_node(self, context, graph):
        mode = self.get_nodeattr("exec_mode")
        node = self.onnx_node

--- a/src/finn/custom_op/fpgadataflow/downsampler.py
+++ b/src/finn/custom_op/fpgadataflow/downsampler.py
@@ -36,6 +36,14 @@ class DownSampler(HLSCustomOp):
        stride = self.get_nodeattr("Stride")
        return int(np.floor((idim - 1) / stride) + 1)

+    def get_exp_cycles(self):
+        idim = self.get_nodeattr("ImgDim")
+        channels = self.get_nodeattr("NumChannels")
+        simd = self.get_nodeattr("SIMD")
+        batch_size = self.get_nodeattr("numInputVectors")
+        exp_cycles = channels / simd * batch_size * idim * idim
+        return int(exp_cycles)
+
    def get_normal_input_shape(self):
        idim = self.get_nodeattr("ImgDim")
        num_ch = self.get_nodeattr("NumChannels")

--- a/src/finn/custom_op/fpgadataflow/duplicatestreams_batch.py
+++ b/src/finn/custom_op/fpgadataflow/duplicatestreams_batch.py
@@ -164,6 +164,10 @@ class DuplicateStreams_Batch(HLSCustomOp):
    def get_number_output_values(self):
        return 2 * np.prod(self.get_folded_output_shape()[1:-1])

+    def get_exp_cycles(self):
+        # Channels/PE * batch size * fmdim * fmdim
+        return np.prod(self.get_folded_output_shape()[:-1])
+
    def execute_node(self, context, graph):
        mode = self.get_nodeattr("exec_mode")
        node = self.onnx_node