From 1c7d81f39849d16f104effbba96e1fa5ee7495b6 Mon Sep 17 00:00:00 2001
From: Hugo LE BLEVEC <hlebleve@amd.com>
Date: Wed, 26 Oct 2022 17:49:16 +0100
Subject: [PATCH] [set_fifo_depths] Adding a new transformation to split fifos
 larger than the max allowed depth

---
 src/finn/builder/build_dataflow_config.py     |   4 +
 src/finn/builder/build_dataflow_steps.py      |   3 +
 .../fpgadataflow/set_fifo_depths.py           |  75 ++++++++++++-
 tests/fpgadataflow/test_split_large_fifos.py  | 104 ++++++++++++++++++
 4 files changed, 185 insertions(+), 1 deletion(-)
 create mode 100644 tests/fpgadataflow/test_split_large_fifos.py

diff --git a/src/finn/builder/build_dataflow_config.py b/src/finn/builder/build_dataflow_config.py
index d3c4156d9..2068d83f6 100644
--- a/src/finn/builder/build_dataflow_config.py
+++ b/src/finn/builder/build_dataflow_config.py
@@ -253,6 +253,10 @@ class DataflowBuildConfig:
     #: for each FIFO.
     auto_fifo_depths: Optional[bool] = True
 
+    #: Whether FIFO nodes with depth larger than 32768 will be split.
+    #: Allow to configure very large FIFOs in the folding_config_file.
+    split_large_fifos: Optional[bool] = False
+
     #: When `auto_fifo_depths = True`, select which method will be used for
     #: setting the FIFO sizes.
     auto_fifo_strategy: Optional[
diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py
index 5da608c27..72f3fb225 100644
--- a/src/finn/builder/build_dataflow_steps.py
+++ b/src/finn/builder/build_dataflow_steps.py
@@ -98,6 +98,7 @@ from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.fpgadataflow.set_fifo_depths import (
     InsertAndSetFIFODepths,
     RemoveShallowFIFOs,
+    SplitLargeFifos,
 )
 from finn.transformation.fpgadataflow.set_folding import SetFolding
 from finn.transformation.fpgadataflow.synth_ooc import SynthOutOfContext
@@ -551,6 +552,8 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig):
         model = model.transform(GiveReadableTensorNames())
         if cfg.folding_config_file is not None:
             model = model.transform(ApplyConfig(cfg.folding_config_file))
+        if cfg.split_large_fifos:
+            model = model.transform(SplitLargeFifos())
         # remove any shallow FIFOs
         model = model.transform(RemoveShallowFIFOs())
 
diff --git a/src/finn/transformation/fpgadataflow/set_fifo_depths.py b/src/finn/transformation/fpgadataflow/set_fifo_depths.py
index f715aaeff..3e841bf58 100644
--- a/src/finn/transformation/fpgadataflow/set_fifo_depths.py
+++ b/src/finn/transformation/fpgadataflow/set_fifo_depths.py
@@ -29,10 +29,17 @@
 import math
 import numpy as np
 import warnings
+from onnx import TensorProto, helper
 from pyverilator.util.axi_utils import reset_rtlsim, toggle_clk
+from qonnx.core.datatype import DataType
 from qonnx.custom_op.registry import getCustomOp
 from qonnx.transformation.base import Transformation
-from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
+from qonnx.transformation.general import (
+    GiveReadableTensorNames,
+    GiveUniqueNodeNames,
+    SortGraph,
+)
+from qonnx.util.basic import get_by_name
 
 from finn.analysis.fpgadataflow.dataflow_performance import dataflow_performance
 from finn.transformation.fpgadataflow.annotate_cycles import AnnotateCycles
@@ -414,3 +421,69 @@ class InsertAndSetFIFODepths(Transformation):
         model = model.transform(RemoveShallowFIFOs())
 
         return (model, False)
+
+
+class SplitLargeFifos(Transformation):
+    """Split FIFOs with a depth larger than 32768 into smaller ones
+    to ensure that they can be correctly generated."""
+
+    def apply(self, model):
+        graph = model.graph
+        node_ind = 0
+        graph_modified = False
+        for n in graph.node:
+            node_ind += 1
+            if n.op_type == "StreamingFIFO":
+                depth = get_by_name(n.attribute, "depth")
+                if depth.i > 32768:
+                    n0 = getCustomOp(n)
+                    fld_shape = n0.get_folded_output_shape()
+                    dtype = n0.get_nodeattr("dataType")
+                    impl_style = n0.get_nodeattr("impl_style")
+                    ram_style = n0.get_nodeattr("ram_style")
+                    shape = model.get_tensor_shape(n.input[0])
+                    split_n = math.ceil(depth.i / 32768)
+                    fifo_depth = math.ceil(depth.i / split_n)
+                    for i in range(split_n):
+                        if i == 0:
+                            inp = n.input[0]
+                        else:
+                            inp = n.name + "_" + str(i - 1) + "_out"
+                        if i == split_n - 1:
+                            outp = n.output[0]
+                        else:
+                            outp = n.name + "_" + str(i) + "_out"
+                            out_tensor = helper.make_tensor_value_info(
+                                outp, TensorProto.FLOAT, shape
+                            )
+                            graph.value_info.append(out_tensor)
+                            model.set_tensor_datatype(out_tensor.name, DataType[dtype])
+                        fifo_node = helper.make_node(
+                            "StreamingFIFO",
+                            [inp],
+                            [outp],
+                            domain="finn.custom_op.fpgadataflow",
+                            backend="fpgadataflow",
+                            depth=fifo_depth,
+                            folded_shape=fld_shape,
+                            dataType=dtype,
+                            impl_style=impl_style,
+                            ram_style=ram_style,
+                        )
+                        graph.node.insert(node_ind + i, fifo_node)
+
+                    graph.node.remove(n)
+                    if n.output[0] != "global_out":
+                        consumer = model.find_consumer(n.output[0])
+                        n1 = getCustomOp(consumer)
+                        n1.set_nodeattr("outFIFODepth", fifo_depth)
+                    if n.input[0] != "global_in":
+                        producer = model.find_producer(n.input[0])
+                        n2 = getCustomOp(producer)
+                        n2.set_nodeattr("inFIFODepth", fifo_depth)
+                    graph_modified = True
+        if graph_modified:
+            model = model.transform(SortGraph())
+            model = model.transform(GiveUniqueNodeNames())
+            model = model.transform(GiveReadableTensorNames())
+        return (model, graph_modified)
diff --git a/tests/fpgadataflow/test_split_large_fifos.py b/tests/fpgadataflow/test_split_large_fifos.py
new file mode 100644
index 000000000..ab9230ad3
--- /dev/null
+++ b/tests/fpgadataflow/test_split_large_fifos.py
@@ -0,0 +1,104 @@
+# Copyright (c) 2022 Xilinx, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of Xilinx nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+import pytest
+
+import json
+import shutil
+from brevitas.export.onnx.generic.manager import BrevitasONNXManager
+from math import ceil
+
+import finn.builder.build_dataflow as build
+import finn.builder.build_dataflow_config as build_cfg
+from finn.util.basic import make_build_dir
+from finn.util.test import get_trained_network_and_ishape
+
+
+def fetch_test_model(topology, wbits=2, abits=2):
+    tmp_output_dir = make_build_dir("build_fifosizing_%s_" % topology)
+    (model, ishape) = get_trained_network_and_ishape(topology, wbits, abits)
+    chkpt_name = tmp_output_dir + "/model.onnx"
+    BrevitasONNXManager.export(model, ishape, chkpt_name)
+    return tmp_output_dir
+
+
+def get_folding_cfg(depth=65536):
+    cfg = dict()
+    cfg["Defaults"] = dict()
+    for i in range(3):
+        key = "StreamingFIFO_" + str(i)
+        cfg[key] = {"depth": depth, "ram_style": "auto", "impl_style": "rtl"}
+    return cfg
+
+
+@pytest.mark.slow
+@pytest.mark.vivado
+@pytest.mark.fpgadataflow
+@pytest.mark.parametrize("depth", [16384, 65536, 45000])
+def test_split_large_fifos(depth):
+    tmp_output_dir = fetch_test_model("tfc")
+    folding_cfg = get_folding_cfg(depth)
+    with open(tmp_output_dir + "/folding_config.json", "w") as f:
+        json.dump(folding_cfg, f, indent=2)
+    cfg = build_cfg.DataflowBuildConfig(
+        output_dir=tmp_output_dir,
+        auto_fifo_depths=False,
+        split_large_fifos=True,
+        folding_config_file=tmp_output_dir + "/folding_config.json",
+        target_fps=10000,
+        synth_clk_period_ns=10.0,
+        board="Pynq-Z1",
+        rtlsim_batch_size=100,
+        shell_flow_type=build_cfg.ShellFlowType.VIVADO_ZYNQ,
+        generate_outputs=[
+            build_cfg.DataflowOutputType.ESTIMATE_REPORTS,
+            build_cfg.DataflowOutputType.STITCHED_IP,
+            build_cfg.DataflowOutputType.RTLSIM_PERFORMANCE,
+        ],
+        default_mem_mode=build_cfg.ComputeEngineMemMode.DECOUPLED,
+    )
+    build.build_dataflow_cfg(tmp_output_dir + "/model.onnx", cfg)
+    with open(tmp_output_dir + "/report/estimate_network_performance.json") as f:
+        est_data = json.load(f)
+    with open(tmp_output_dir + "/report/rtlsim_performance.json") as f:
+        sim_data = json.load(f)
+    assert (
+        float(sim_data["throughput[images/s]"])
+        / float(est_data["estimated_throughput_fps"])
+        > 0.9
+    )
+    with open(tmp_output_dir + "/final_hw_config.json") as f:
+        hw_config = json.load(f)
+    n = 0
+    for key in hw_config:
+        if "StreamingFIFO" in key:
+            n += 1
+    assert n == 3 * ceil(depth / 32768) + 1
+
+    shutil.rmtree(tmp_output_dir)
-- 
GitLab