Merge branch 'dev' into feature/depthwise_convolution

deda5d23 · auphelia · 1cf42e6b · 8d0f6f13 · deda5d23 · deda5d23
Commit deda5d23 authored 4 years ago by auphelia
--- a/src/finn/core/remote_exec.py
+++ b/src/finn/core/remote_exec.py
@@ -79,6 +79,12 @@ def remote_exec(model, execution_context):
    bash_command = ["/bin/bash", "-c", cmd]
    process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE)
    process_compile.communicate()
+    # remove stale output file from local dir, if any
+    try:
+        os.remove("{}/output.npy".format(deployment_dir))
+    except FileNotFoundError:
+        pass
+    # copy generated output to local
    cmd = "sshpass -p {} scp -P{} {}@{}:{}/{}/output.npy {}".format(
        pynq_password,
        pynq_port,

--- a/src/finn/core/throughput_test.py
+++ b/src/finn/core/throughput_test.py
@@ -33,7 +33,8 @@ import subprocess
 def throughput_test(model, batchsize=1000):
    """Runs the throughput test for the given model remotely on the pynq board.
    The metadata properties related to the pynq board have to be set.
-    Returns a dictionary with results of the throughput test"""
+    Returns a dictionary with results of the throughput test. Returns None
+    if the test fails."""

    pynq_ip = model.get_metadata_prop("pynq_ip")
    pynq_port = int(model.get_metadata_prop("pynq_port"))
@@ -62,6 +63,12 @@ def throughput_test(model, batchsize=1000):
    process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE)
    process_compile.communicate()

+    # remove any pre-existing metrics file
+    try:
+        os.remove("{}/nw_metrics.txt".format(deployment_dir))
+    except FileNotFoundError:
+        pass
+
    cmd = "sshpass -p {} scp -P{} {}@{}:{}/{}/nw_metrics.txt {}".format(
        pynq_password,
        pynq_port,
@@ -75,7 +82,9 @@ def throughput_test(model, batchsize=1000):
    process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE)
    process_compile.communicate()

-    with open("{}/nw_metrics.txt".format(deployment_dir), "r") as file:
-        res = eval(file.read())
-
-    return res
+    try:
+        with open("{}/nw_metrics.txt".format(deployment_dir), "r") as file:
+            res = eval(file.read())
+        return res
+    except FileNotFoundError:
+        return None
--- a/src/finn/transformation/fpgadataflow/templates.py
+++ b/src/finn/transformation/fpgadataflow/templates.py
@@ -91,7 +91,7 @@ cd %s

 pynq_driver_template = """
 import argparse
-
+import os
 from pynq import Overlay
 import numpy as np
 from pynq import allocate
@@ -207,6 +207,12 @@ if __name__ == "__main__":
    # for the remote execution the data from the input npy file has to be loaded,
    # packed and copied to the PYNQ buffer
    if exec_mode == "execute":
+        # remove old output file to prevent reusing old output
+        # in case execution fails
+        try:
+            os.remove(outputfile)
+        except FileNotFoundError:
+            pass
        # load desired input .npy file
        ibuf_normal = np.load(inputfile)
        ibuf_folded = finnDriver.fold_input(ibuf_normal)
@@ -217,10 +223,15 @@ if __name__ == "__main__":

    # for the throughput test the runtime of the network has to be measured
    if exec_mode == "throughput_test":
-        # measure runtime of network
-        start = time.time()
+        # remove old metrics file
+        try:
+            os.remove("nw_metrics.txt")
+        except FileNotFoundError:
+            pass
        # dictionary for results of throughput test
        res={}
+        # measure runtime of network
+        start = time.time()

    # execute accelerator
    finnDriver.execute()

--- a/src/finn/util/basic.py
+++ b/src/finn/util/basic.py
@@ -43,6 +43,13 @@ pynq_part_map["Pynq-Z1"] = "xc7z020clg400-1"
 pynq_part_map["Pynq-Z2"] = "xc7z020clg400-1"
 pynq_part_map["ZCU104"] = "xczu7ev-ffvc1156-2-e"

+# native AXI HP port width (in bits) for PYNQ boards
+pynq_native_port_width = dict()
+pynq_native_port_width["Pynq-Z1"] = 64
+pynq_native_port_width["Pynq-Z2"] = 64
+pynq_native_port_width["Ultra96"] = 128
+pynq_native_port_width["ZCU104"] = 128
+

 def get_rtlsim_trace_depth():
    """Return the trace depth for rtlsim via PyVerilator. Controllable

--- a/tests/end2end/test_end2end_cnv_w1a1.py
+++ b/tests/end2end/test_end2end_cnv_w1a1.py
@@ -76,7 +76,7 @@ from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
 build_dir = "/tmp/" + os.environ["FINN_INST_NAME"]
 test_pynq_board = os.getenv("PYNQ_BOARD", default="Pynq-Z1")
 test_fpga_part = pynq_part_map[test_pynq_board]
-target_clk_ns = 5
+target_clk_ns = 10
 mem_mode = "decoupled"



--- a/tests/pynq/test_pynq_performance_end2end.py
+++ b/tests/pynq/test_pynq_performance_end2end.py
@@ -11,6 +11,7 @@ build_dir = "/tmp/" + os.environ["FINN_INST_NAME"]


 @pytest.mark.parametrize("end2end_example", ["tfc_w1a1", "cnv_w1a1"])
+@pytest.mark.slow
 def test_pynq_performance_end2end(end2end_example):
    model = load_test_checkpoint_or_skip(
        build_dir + "/end2end_%s_pynq_deploy.onnx" % end2end_example
@@ -21,11 +22,18 @@ def test_pynq_performance_end2end(end2end_example):
        if ip == "" or board == "":
            pytest.skip("PYNQ board or IP address not specified")
        ret = dict()
-        bsize_range = [1, 10, 100, 1000, 10000]
-        for bsize in bsize_range:
+        # try a range of batch sizes, some may fail due to insufficient DMA
+        # buffers
+        bsize_range_in = [2 ** i for i in range(16)]
+        bsize_range = []
+        for bsize in bsize_range_in:
            res = throughput_test(model, bsize)
-            assert res is not None
-            ret[bsize] = res
+            if res is not None:
+                ret[bsize] = res
+                bsize_range.append(bsize)
+            else:
+                # assume we reached largest possible N
+                break

        y = [ret[key]["runtime[ms]"] for key in bsize_range]
        lrret = linregress(bsize_range, y)

--- a/tests/pynq/test_pynq_performance_fifo.py
+++ b/tests/pynq/test_pynq_performance_fifo.py
@@ -17,7 +17,7 @@ from finn.transformation.fpgadataflow.make_pynq_proj import MakePYNQProject
 from finn.transformation.fpgadataflow.synth_pynq_proj import SynthPYNQProject
 import finn.transformation.fpgadataflow.replace_verilog_relpaths as rvp
 from finn.transformation.general import GiveUniqueNodeNames
-from finn.util.basic import pynq_part_map
+from finn.util.basic import pynq_part_map, pynq_native_port_width
 from finn.core.throughput_test import throughput_test
 from scipy.stats import linregress
 import warnings
@@ -60,8 +60,9 @@ def test_pynq_performance_fifo():
        board = os.environ["PYNQ_BOARD"]  # NOQA
        if ip == "" or board == "":
            pytest.skip("PYNQ board or IP address not specified")
-        shape = (1, 128)
-        folded_shape = (1, 1, 128)
+        fifo_width = pynq_native_port_width[board]
+        shape = (1, fifo_width)
+        folded_shape = (1, 1, fifo_width)
        depth = 16
        clk_ns = 10
        dtype = DataType.BIPOLAR
@@ -84,11 +85,18 @@ def test_pynq_performance_fifo():
        model = model.transform(DeployToPYNQ(ip, port, username, password, target_dir))

        ret = dict()
-        bsize_range = [1, 10, 100, 1000, 10000, 100000]
-        for bsize in bsize_range:
+        # try a range of batch sizes, some may fail due to insufficient DMA
+        # buffers
+        bsize_range_in = [2 ** i for i in range(20)]
+        bsize_range = []
+        for bsize in bsize_range_in:
            res = throughput_test(model, bsize)
-            assert res is not None
-            ret[bsize] = res
+            if res is not None:
+                ret[bsize] = res
+                bsize_range.append(bsize)
+            else:
+                # assume we reached largest possible N
+                break

        y = [ret[key]["runtime[ms]"] for key in bsize_range]
        lrret = linregress(bsize_range, y)