From 1c7d81f39849d16f104effbba96e1fa5ee7495b6 Mon Sep 17 00:00:00 2001 From: Hugo LE BLEVEC <hlebleve@amd.com> Date: Wed, 26 Oct 2022 17:49:16 +0100 Subject: [PATCH] [set_fifo_depths] Adding a new transformation to split fifos larger than the max allowed depth --- src/finn/builder/build_dataflow_config.py | 4 + src/finn/builder/build_dataflow_steps.py | 3 + .../fpgadataflow/set_fifo_depths.py | 75 ++++++++++++- tests/fpgadataflow/test_split_large_fifos.py | 104 ++++++++++++++++++ 4 files changed, 185 insertions(+), 1 deletion(-) create mode 100644 tests/fpgadataflow/test_split_large_fifos.py diff --git a/src/finn/builder/build_dataflow_config.py b/src/finn/builder/build_dataflow_config.py index d3c4156d9..2068d83f6 100644 --- a/src/finn/builder/build_dataflow_config.py +++ b/src/finn/builder/build_dataflow_config.py @@ -253,6 +253,10 @@ class DataflowBuildConfig: #: for each FIFO. auto_fifo_depths: Optional[bool] = True + #: Whether FIFO nodes with depth larger than 32768 will be split. + #: Allow to configure very large FIFOs in the folding_config_file. + split_large_fifos: Optional[bool] = False + #: When `auto_fifo_depths = True`, select which method will be used for #: setting the FIFO sizes. auto_fifo_strategy: Optional[ diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py index 5da608c27..72f3fb225 100644 --- a/src/finn/builder/build_dataflow_steps.py +++ b/src/finn/builder/build_dataflow_steps.py @@ -98,6 +98,7 @@ from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode from finn.transformation.fpgadataflow.set_fifo_depths import ( InsertAndSetFIFODepths, RemoveShallowFIFOs, + SplitLargeFifos, ) from finn.transformation.fpgadataflow.set_folding import SetFolding from finn.transformation.fpgadataflow.synth_ooc import SynthOutOfContext @@ -551,6 +552,8 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig): model = model.transform(GiveReadableTensorNames()) if cfg.folding_config_file is not None: model = model.transform(ApplyConfig(cfg.folding_config_file)) + if cfg.split_large_fifos: + model = model.transform(SplitLargeFifos()) # remove any shallow FIFOs model = model.transform(RemoveShallowFIFOs()) diff --git a/src/finn/transformation/fpgadataflow/set_fifo_depths.py b/src/finn/transformation/fpgadataflow/set_fifo_depths.py index f715aaeff..3e841bf58 100644 --- a/src/finn/transformation/fpgadataflow/set_fifo_depths.py +++ b/src/finn/transformation/fpgadataflow/set_fifo_depths.py @@ -29,10 +29,17 @@ import math import numpy as np import warnings +from onnx import TensorProto, helper from pyverilator.util.axi_utils import reset_rtlsim, toggle_clk +from qonnx.core.datatype import DataType from qonnx.custom_op.registry import getCustomOp from qonnx.transformation.base import Transformation -from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames +from qonnx.transformation.general import ( + GiveReadableTensorNames, + GiveUniqueNodeNames, + SortGraph, +) +from qonnx.util.basic import get_by_name from finn.analysis.fpgadataflow.dataflow_performance import dataflow_performance from finn.transformation.fpgadataflow.annotate_cycles import AnnotateCycles @@ -414,3 +421,69 @@ class InsertAndSetFIFODepths(Transformation): model = model.transform(RemoveShallowFIFOs()) return (model, False) + + +class SplitLargeFifos(Transformation): + """Split FIFOs with a depth larger than 32768 into smaller ones + to ensure that they can be correctly generated.""" + + def apply(self, model): + graph = model.graph + node_ind = 0 + graph_modified = False + for n in graph.node: + node_ind += 1 + if n.op_type == "StreamingFIFO": + depth = get_by_name(n.attribute, "depth") + if depth.i > 32768: + n0 = getCustomOp(n) + fld_shape = n0.get_folded_output_shape() + dtype = n0.get_nodeattr("dataType") + impl_style = n0.get_nodeattr("impl_style") + ram_style = n0.get_nodeattr("ram_style") + shape = model.get_tensor_shape(n.input[0]) + split_n = math.ceil(depth.i / 32768) + fifo_depth = math.ceil(depth.i / split_n) + for i in range(split_n): + if i == 0: + inp = n.input[0] + else: + inp = n.name + "_" + str(i - 1) + "_out" + if i == split_n - 1: + outp = n.output[0] + else: + outp = n.name + "_" + str(i) + "_out" + out_tensor = helper.make_tensor_value_info( + outp, TensorProto.FLOAT, shape + ) + graph.value_info.append(out_tensor) + model.set_tensor_datatype(out_tensor.name, DataType[dtype]) + fifo_node = helper.make_node( + "StreamingFIFO", + [inp], + [outp], + domain="finn.custom_op.fpgadataflow", + backend="fpgadataflow", + depth=fifo_depth, + folded_shape=fld_shape, + dataType=dtype, + impl_style=impl_style, + ram_style=ram_style, + ) + graph.node.insert(node_ind + i, fifo_node) + + graph.node.remove(n) + if n.output[0] != "global_out": + consumer = model.find_consumer(n.output[0]) + n1 = getCustomOp(consumer) + n1.set_nodeattr("outFIFODepth", fifo_depth) + if n.input[0] != "global_in": + producer = model.find_producer(n.input[0]) + n2 = getCustomOp(producer) + n2.set_nodeattr("inFIFODepth", fifo_depth) + graph_modified = True + if graph_modified: + model = model.transform(SortGraph()) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(GiveReadableTensorNames()) + return (model, graph_modified) diff --git a/tests/fpgadataflow/test_split_large_fifos.py b/tests/fpgadataflow/test_split_large_fifos.py new file mode 100644 index 000000000..ab9230ad3 --- /dev/null +++ b/tests/fpgadataflow/test_split_large_fifos.py @@ -0,0 +1,104 @@ +# Copyright (c) 2022 Xilinx, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of Xilinx nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +import pytest + +import json +import shutil +from brevitas.export.onnx.generic.manager import BrevitasONNXManager +from math import ceil + +import finn.builder.build_dataflow as build +import finn.builder.build_dataflow_config as build_cfg +from finn.util.basic import make_build_dir +from finn.util.test import get_trained_network_and_ishape + + +def fetch_test_model(topology, wbits=2, abits=2): + tmp_output_dir = make_build_dir("build_fifosizing_%s_" % topology) + (model, ishape) = get_trained_network_and_ishape(topology, wbits, abits) + chkpt_name = tmp_output_dir + "/model.onnx" + BrevitasONNXManager.export(model, ishape, chkpt_name) + return tmp_output_dir + + +def get_folding_cfg(depth=65536): + cfg = dict() + cfg["Defaults"] = dict() + for i in range(3): + key = "StreamingFIFO_" + str(i) + cfg[key] = {"depth": depth, "ram_style": "auto", "impl_style": "rtl"} + return cfg + + +@pytest.mark.slow +@pytest.mark.vivado +@pytest.mark.fpgadataflow +@pytest.mark.parametrize("depth", [16384, 65536, 45000]) +def test_split_large_fifos(depth): + tmp_output_dir = fetch_test_model("tfc") + folding_cfg = get_folding_cfg(depth) + with open(tmp_output_dir + "/folding_config.json", "w") as f: + json.dump(folding_cfg, f, indent=2) + cfg = build_cfg.DataflowBuildConfig( + output_dir=tmp_output_dir, + auto_fifo_depths=False, + split_large_fifos=True, + folding_config_file=tmp_output_dir + "/folding_config.json", + target_fps=10000, + synth_clk_period_ns=10.0, + board="Pynq-Z1", + rtlsim_batch_size=100, + shell_flow_type=build_cfg.ShellFlowType.VIVADO_ZYNQ, + generate_outputs=[ + build_cfg.DataflowOutputType.ESTIMATE_REPORTS, + build_cfg.DataflowOutputType.STITCHED_IP, + build_cfg.DataflowOutputType.RTLSIM_PERFORMANCE, + ], + default_mem_mode=build_cfg.ComputeEngineMemMode.DECOUPLED, + ) + build.build_dataflow_cfg(tmp_output_dir + "/model.onnx", cfg) + with open(tmp_output_dir + "/report/estimate_network_performance.json") as f: + est_data = json.load(f) + with open(tmp_output_dir + "/report/rtlsim_performance.json") as f: + sim_data = json.load(f) + assert ( + float(sim_data["throughput[images/s]"]) + / float(est_data["estimated_throughput_fps"]) + > 0.9 + ) + with open(tmp_output_dir + "/final_hw_config.json") as f: + hw_config = json.load(f) + n = 0 + for key in hw_config: + if "StreamingFIFO" in key: + n += 1 + assert n == 3 * ceil(depth / 32768) + 1 + + shutil.rmtree(tmp_output_dir) -- GitLab