diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index f61af878ffadb6b4fd7a527110b105b19cee0ded..5f03379bbc37ab913f712571c630035dbad84cce 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -16,7 +16,7 @@ jobs: uses: actions/checkout@v3 - name: Setup Python - uses: actions/setup-python@v3 + uses: actions/setup-python@v4 with: python-version: '3.8' diff --git a/fetch-repos.sh b/fetch-repos.sh index ee1923e3a442f92d4d95b61f2a6e7410a0517d81..16960c71e31671b042dcfb4c31208aaaf8e29906 100755 --- a/fetch-repos.sh +++ b/fetch-repos.sh @@ -36,6 +36,7 @@ HLSLIB_COMMIT="d27f6b6c5d8f1bb208db395659389603f63ad4be" OMX_COMMIT="d1065a788219ca0eb54d5e57600b1f9d7f67d4cc" AVNET_BDF_COMMIT="2d49cfc25766f07792c0b314489f21fe916b639b" XIL_BDF_COMMIT="8cf4bb674a919ac34e3d99d8d71a9e60af93d14e" +KV260_BDF_COMMIT="98e0d3efc901f0b974006bc4370c2a7ad8856c79" EXP_BOARD_FILES_MD5="30eecc497c31050bd46d10ea20eba232" QONNX_URL="https://github.com/fastmachinelearning/qonnx.git" @@ -47,6 +48,7 @@ HLSLIB_URL="https://github.com/Xilinx/finn-hlslib.git" OMX_URL="https://github.com/maltanar/oh-my-xilinx.git" AVNET_BDF_URL="https://github.com/Avnet/bdf.git" XIL_BDF_URL="https://github.com/Xilinx/XilinxBoardStore.git" +KV260_BDF_URL="https://github.com/Xilinx/XilinxBoardStore.git" QONNX_DIR="qonnx" FINN_EXP_DIR="finn-experimental" @@ -57,6 +59,7 @@ HLSLIB_DIR="finn-hlslib" OMX_DIR="oh-my-xilinx" AVNET_BDF_DIR="avnet-bdf" XIL_BDF_DIR="xil-bdf" +KV260_SOM_BDF_DIR="kv260-som-bdf" # absolute path to this script, e.g. /home/user/bin/foo.sh SCRIPT=$(readlink -f "$0") @@ -104,6 +107,7 @@ fetch_board_files() { unzip -q pynq-z2.zip cp -r $SCRIPTPATH/deps/$AVNET_BDF_DIR/* $SCRIPTPATH/deps/board_files/ cp -r $SCRIPTPATH/deps/$XIL_BDF_DIR/boards/Xilinx/rfsoc2x2 $SCRIPTPATH/deps/board_files/; + cp -r $SCRIPTPATH/deps/$KV260_SOM_BDF_DIR/boards/Xilinx/kv260_som $SCRIPTPATH/deps/board_files/; cd $OLD_PWD } @@ -116,6 +120,7 @@ fetch_repo $HLSLIB_URL $HLSLIB_COMMIT $HLSLIB_DIR fetch_repo $OMX_URL $OMX_COMMIT $OMX_DIR fetch_repo $AVNET_BDF_URL $AVNET_BDF_COMMIT $AVNET_BDF_DIR fetch_repo $XIL_BDF_URL $XIL_BDF_COMMIT $XIL_BDF_DIR +fetch_repo $KV260_BDF_URL $KV260_BDF_COMMIT $KV260_SOM_BDF_DIR # download extra Pynq board files and extract if needed if [ ! -d "$SCRIPTPATH/deps/board_files" ]; then diff --git a/src/finn/builder/build_dataflow_config.py b/src/finn/builder/build_dataflow_config.py index 80934d812fd7a165fdaba9c3fb17dc37e5a82d49..a38cb6e572d683871a924330742a1859b6fbe75d 100644 --- a/src/finn/builder/build_dataflow_config.py +++ b/src/finn/builder/build_dataflow_config.py @@ -253,6 +253,10 @@ class DataflowBuildConfig: #: for each FIFO. auto_fifo_depths: Optional[bool] = True + #: Whether FIFO nodes with depth larger than 32768 will be split. + #: Allow to configure very large FIFOs in the folding_config_file. + split_large_fifos: Optional[bool] = False + #: When `auto_fifo_depths = True`, select which method will be used for #: setting the FIFO sizes. auto_fifo_strategy: Optional[ diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py index ce0ffb4771bad4a9818842f3f744c7a2b4207b8d..b0f7b6ec6cada69d402af9089c66636248150b19 100644 --- a/src/finn/builder/build_dataflow_steps.py +++ b/src/finn/builder/build_dataflow_steps.py @@ -99,6 +99,7 @@ from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode from finn.transformation.fpgadataflow.set_fifo_depths import ( InsertAndSetFIFODepths, RemoveShallowFIFOs, + SplitLargeFIFOs, ) from finn.transformation.fpgadataflow.set_folding import SetFolding from finn.transformation.fpgadataflow.synth_ooc import SynthOutOfContext @@ -566,8 +567,6 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig): model = model.transform(GiveReadableTensorNames()) if cfg.folding_config_file is not None: model = model.transform(ApplyConfig(cfg.folding_config_file)) - # remove any shallow FIFOs - model = model.transform(RemoveShallowFIFOs()) # extract the final configuration and save it as json hw_attrs = [ @@ -586,6 +585,13 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig): model, cfg.output_dir + "/final_hw_config.json", hw_attrs ) + # perform FIFO splitting and shallow FIFO removal only after the final config + # json file has been written. otherwise, since these transforms may add/remove + # FIFOs, we get name mismatch problems when trying to reuse the final config. + if cfg.split_large_fifos: + model = model.transform(SplitLargeFIFOs()) + model = model.transform(RemoveShallowFIFOs()) + # after FIFOs are ready to go, call PrepareIP and HLSSynthIP again # this will only run for the new nodes (e.g. FIFOs and DWCs) model = model.transform( diff --git a/src/finn/custom_op/fpgadataflow/eltwise.py b/src/finn/custom_op/fpgadataflow/eltwise.py index d6284750c73026c09fb7986ffc2517ed9ae3b153..68ed6546c741277bd8e962b6e80eda083cedba9c 100644 --- a/src/finn/custom_op/fpgadataflow/eltwise.py +++ b/src/finn/custom_op/fpgadataflow/eltwise.py @@ -398,7 +398,7 @@ class StreamingEltwise(HLSCustomOp): "StreamingEltwise", self.get_nodeattr("NumChannels"), self.get_nodeattr("PE"), - self.get_number_output_values(), + int(np.prod(self.get_folded_output_shape()[:-2])), slice_in0, slice_in1, slice_out, diff --git a/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py b/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py index 9f34eb15153219255ff2659fb01c8fcdb24c1b44..72128fda4cfe23db4858fe3ffe80a755733954cc 100644 --- a/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py +++ b/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py @@ -576,6 +576,10 @@ class MatrixVectorActivation(HLSCustomOp): def minimize_accumulator_width(self, model): weights = model.get_initializer(self.onnx_node.input[1]) + # since in the calculation the values of the weight matrix are used, + # for the bipolar case they need to be converted to bipolar + if self.get_nodeattr("binaryXnorMode"): + weights = 2 * weights - 1 if len(self.onnx_node.input) > 2: thresholds = model.get_initializer(self.onnx_node.input[2]) else: diff --git a/src/finn/custom_op/fpgadataflow/thresholding_batch.py b/src/finn/custom_op/fpgadataflow/thresholding_batch.py index 1bb37c0fde0bfb17d47d6dd69721639281c5e66b..d9745acf63c4685b3369ac379abde0a6c5a3f157 100644 --- a/src/finn/custom_op/fpgadataflow/thresholding_batch.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_batch.py @@ -602,13 +602,17 @@ class Thresholding_Batch(HLSCustomOp): # TODO check and add whatever missing def defines(self, var): + numReps = 1 numInputVectors = list(self.get_nodeattr("numInputVectors")) - numReps = int(np.prod(numInputVectors)) + total_spatial_size = int(np.prod(numInputVectors)) + self.code_gen_dict["$DEFINES$"] = [ - """#define NumChannels1 {}\n #define PE1 {}\n #define numReps {}""".format( + """#define NumChannels1 {}\n #define PE1 {}\n #define numReps {}\n + #define ImgDim1 {}""".format( self.get_nodeattr("NumChannels"), self.get_nodeattr("PE"), numReps, + total_spatial_size, ) ] if self.get_nodeattr("mem_mode") == "decoupled": @@ -649,7 +653,7 @@ class Thresholding_Batch(HLSCustomOp): npy_in = "%s/thresholds.npy" % code_gen_dir self.code_gen_dict["$READNPYDATA$"].append( - 'npy2apintstream<%s, %s, %d, %s>("%s", weights, false, numReps);' + 'npy2apintstream<%s, %s, %d, %s>("%s", weights, false, ImgDim1);' % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in) ) @@ -671,18 +675,13 @@ class Thresholding_Batch(HLSCustomOp): def docompute(self): tmpl_args = self.get_template_param_values() - # TODO: why put some template parameters into defines and not others? - # should ImgDim be defined or just filled in here like we do now? node = self.onnx_node - inp_vecs = self.get_nodeattr("numInputVectors") - total_spatial_size = int(np.prod(inp_vecs)) mem_mode = self.get_nodeattr("mem_mode") if mem_mode == "const": self.code_gen_dict["$DOCOMPUTE$"] = [ - """{}<{}, NumChannels1, PE1, {}, {}> + """{}<ImgDim1, NumChannels1, PE1, {}, {}> (in0, out, threshs, numReps);""".format( node.op_type, - total_spatial_size, tmpl_args["TSrcI"], tmpl_args["TDstI"], ) @@ -692,10 +691,9 @@ class Thresholding_Batch(HLSCustomOp): # - for cppsim the repetition comes from the threshold stream reader+input # - for synth the unit runs continuously anyway (ap_ctrl_none) self.code_gen_dict["$DOCOMPUTE$"] = [ - """{}<{}, NumChannels1, PE1, {}, {}, ActVal1, ThresType1, NumSteps1> - (in0, out, weights, 1);""".format( + """{}<ImgDim1, NumChannels1, PE1, {}, {}, ActVal1, ThresType1, NumSteps1> + (in0, out, weights, numReps);""".format( "Thresholding_Stream_Batch", - total_spatial_size, tmpl_args["TSrcI"], tmpl_args["TDstI"], ) diff --git a/src/finn/transformation/fpgadataflow/set_fifo_depths.py b/src/finn/transformation/fpgadataflow/set_fifo_depths.py index 9282d399f857fa9da6f685421ce46f1d9cc5e851..2619557edfb92059f0ac0d824f7e9c289b282612 100644 --- a/src/finn/transformation/fpgadataflow/set_fifo_depths.py +++ b/src/finn/transformation/fpgadataflow/set_fifo_depths.py @@ -29,10 +29,16 @@ import math import numpy as np import warnings +from onnx import TensorProto, helper from pyverilator.util.axi_utils import reset_rtlsim, toggle_clk +from qonnx.core.datatype import DataType from qonnx.custom_op.registry import getCustomOp from qonnx.transformation.base import Transformation -from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames +from qonnx.transformation.general import ( + GiveReadableTensorNames, + GiveUniqueNodeNames, + SortGraph, +) from finn.analysis.fpgadataflow.dataflow_performance import dataflow_performance from finn.transformation.fpgadataflow.annotate_cycles import AnnotateCycles @@ -484,3 +490,126 @@ class InsertAndSetFIFODepths(Transformation): node_inst.set_nodeattr("outFIFODepths", fifodepth_out) return (model, False) + + +def get_fifo_split_configs(depth, max_qsrl_depth=256, max_vivado_depth=32768): + """Break non-power-of-2 sized FIFO depths into several ones""" + + def floor_pow2(x): + if (x & (x - 1) == 0) and x != 0: + return x + else: + return 1 << ((x - 1).bit_length() - 1) + + def decompose_pow2(x): + if x <= max_qsrl_depth: + return [x] + else: + r = floor_pow2(x) + if x == r: + return [x] + else: + return [r, *decompose_pow2(x - r)] + + ret = [] + # trivial case: for small FIFOs, return as-is with rtl style + if depth <= max_qsrl_depth: + return [(depth, "rtl")] + # first pass: ensure max depth is respected + # (restricted by Vivado AXIS infra IP) + remainder = depth + while remainder != 0: + if remainder > max_vivado_depth: + ret.append(max_vivado_depth) + remainder -= max_vivado_depth + else: + ret.append(remainder) + remainder = 0 + # second pass: break non-power-of-2 sized FIFOs + # into several ones + + ret_pass2 = list(map(decompose_pow2, ret)) + # unpack list of lists + ret_pass2 = [x for dec_list in ret_pass2 for x in dec_list] + + # finally, add impl_style to each split FIFO + ret_final = [] + for cand_depth in ret_pass2: + if cand_depth <= max_qsrl_depth: + ret_final.append((cand_depth, "rtl")) + else: + ret_final.append((cand_depth, "vivado")) + + return ret_final + + +class SplitLargeFIFOs(Transformation): + """Split large FIFOs before implementation, for two reasons: + + - impl_style="vivado" supports a max depth of 32k. Any larger + FIFOs must be implemented as a sequence of smaller FIFOs. + - impl_style="vivado" requires power-of-two depths, which is + normally handled by rounding up to the nearest power-of-two. + So a FIFO of size 8196 normally gets rounded-up to a depth of + 16384 and wastes a lot of resources. Here, instead, we split + this up into two FIFOs of depth 8192 + 4. + + """ + + def __init__(self, max_qsrl_depth=256, max_vivado_depth=32768): + super().__init__() + self.max_qsrl_depth = max_qsrl_depth + self.max_vivado_depth = max_vivado_depth + + def apply(self, model): + graph = model.graph + node_ind = 0 + graph_modified = False + for node in graph.node: + node_ind += 1 + if node.op_type == "StreamingFIFO": + n_inst = getCustomOp(node) + depth = n_inst.get_nodeattr("depth") + cfgs = get_fifo_split_configs( + depth, self.max_qsrl_depth, self.max_vivado_depth + ) + if len(cfgs) > 1: + fld_shape = n_inst.get_folded_output_shape() + dtype = n_inst.get_nodeattr("dataType") + ram_style = n_inst.get_nodeattr("ram_style") + shape = model.get_tensor_shape(node.input[0]) + for i, (fifo_depth, impl_style) in enumerate(cfgs): + if i == 0: + inp = node.input[0] + else: + inp = node.name + "_" + str(i - 1) + "_out" + if i == len(cfgs) - 1: + outp = node.output[0] + else: + outp = node.name + "_" + str(i) + "_out" + out_tensor = helper.make_tensor_value_info( + outp, TensorProto.FLOAT, shape + ) + graph.value_info.append(out_tensor) + model.set_tensor_datatype(out_tensor.name, DataType[dtype]) + fifo_node = helper.make_node( + "StreamingFIFO", + [inp], + [outp], + domain="finn.custom_op.fpgadataflow", + backend="fpgadataflow", + depth=fifo_depth, + folded_shape=fld_shape, + dataType=dtype, + impl_style=impl_style, + ram_style=ram_style, + name=node.name + "_" + str(i), + ) + graph.node.insert(node_ind + i, fifo_node) + + graph.node.remove(node) + graph_modified = True + if graph_modified: + model = model.transform(SortGraph()) + model = model.transform(GiveReadableTensorNames()) + return (model, False) diff --git a/src/finn/transformation/fpgadataflow/templates.py b/src/finn/transformation/fpgadataflow/templates.py index 78bcdea0d701f97e9f80d7c7c489aa01bc93fa52..f52bad0ffb35ae4714acc24aef368d01967db426 100644 --- a/src/finn/transformation/fpgadataflow/templates.py +++ b/src/finn/transformation/fpgadataflow/templates.py @@ -126,6 +126,9 @@ if {$BOARD == "ZCU104"} { } elseif {$BOARD == "Pynq-Z1"} { set ZYNQ_TYPE "zynq_7000" set_property board_part www.digilentinc.com:pynq-z1:part0:1.0 [current_project] +} elseif {$BOARD == "KV260_SOM"} { + set ZYNQ_TYPE "zynq_us+" + set_property board_part xilinx.com:kv260_som:part0:1.3 [current_project] } else { puts "Unrecognized board" } diff --git a/src/finn/util/basic.py b/src/finn/util/basic.py index 4aba87216c8999612f748e989a945ceff33da167..3bc5b803db2072f4d0ed3829adab93b4fbd3b98e 100644 --- a/src/finn/util/basic.py +++ b/src/finn/util/basic.py @@ -40,6 +40,8 @@ pynq_part_map["ZCU102"] = "xczu9eg-ffvb1156-2-e" pynq_part_map["ZCU104"] = "xczu7ev-ffvc1156-2-e" pynq_part_map["ZCU111"] = "xczu28dr-ffvg1517-2-e" pynq_part_map["RFSoC2x2"] = "xczu28dr-ffvg1517-2-e" +pynq_part_map["KV260_SOM"] = "xck26-sfvc784-2LV-c" + # native AXI HP port width (in bits) for PYNQ boards pynq_native_port_width = dict() @@ -50,6 +52,7 @@ pynq_native_port_width["ZCU102"] = 128 pynq_native_port_width["ZCU104"] = 128 pynq_native_port_width["ZCU111"] = 128 pynq_native_port_width["RFSoC2x2"] = 128 +pynq_native_port_width["KV260_SOM"] = 128 # Alveo device and platform mappings alveo_part_map = dict() diff --git a/src/finn/util/pyverilator.py b/src/finn/util/pyverilator.py index a00899cf784cbe3985b942af6b3d9a4c14cd8706..8d188585694c172d97d73fa6b5820edb7b48a948 100644 --- a/src/finn/util/pyverilator.py +++ b/src/finn/util/pyverilator.py @@ -133,6 +133,7 @@ def verilator_fifosim(model, n_inputs, max_iters=100000000): and throughput measurement.""" vivado_stitch_proj_dir = prepare_stitched_ip_for_verilator(model) + verilog_header_dir = vivado_stitch_proj_dir + "/pyverilator_vh" build_dir = make_build_dir("verilator_fifosim_") fifosim_cpp_fname = pk.resource_filename( "finn.qnn-data", "cpp/verilator_fifosim.cpp" @@ -184,6 +185,19 @@ def verilator_fifosim(model, n_inputs, max_iters=100000000): if which_verilator is None: raise Exception("'verilator' executable not found") + # add defines to make certain XPM src files work with Verilator + xpm_args = [] + xpm_args.append("-DDISABLE_XPM_ASSERTIONS") + xpm_args.append("-DOBSOLETE") + xpm_args.append("-DONESPIN") + xpm_args.append("--bbox-unsup") + vivado_path = os.environ["VIVADO_PATH"] + # additional SystemVerilog modules to make XPMs work with Verilator + xpm_memory = f"{vivado_path}/data/ip/xpm/xpm_memory/hdl/xpm_memory.sv" + xpm_cdc = f"{vivado_path}/data/ip/xpm/xpm_cdc/hdl/xpm_cdc.sv" + xpm_fifo = f"{vivado_path}/data/ip/xpm/xpm_fifo/hdl/xpm_fifo.sv" + verilog_file_arg = ["finn_design_wrapper.v", xpm_memory, xpm_cdc, xpm_fifo] + verilator_args = [ "perl", which_verilator, @@ -192,6 +206,8 @@ def verilator_fifosim(model, n_inputs, max_iters=100000000): build_dir, "-y", vivado_stitch_proj_dir, + "-y", + verilog_header_dir, "--CFLAGS", "--std=c++11", "-O3", @@ -201,13 +217,14 @@ def verilator_fifosim(model, n_inputs, max_iters=100000000): "fast", "--noassert", "--cc", - "finn_design_wrapper.v", + *verilog_file_arg, "--top-module", "finn_design_wrapper", "--exe", "verilator_fifosim.cpp", "--threads", "4", + *xpm_args, ] proc_env = os.environ.copy() diff --git a/tests/fpgadataflow/test_fifosizing.py b/tests/fpgadataflow/test_fifosizing.py index b1655485a0eea84f7afc2273e42eb28376db6381..f4f2b8dbfff0d720ec4eb901704581b096c0ea40 100644 --- a/tests/fpgadataflow/test_fifosizing.py +++ b/tests/fpgadataflow/test_fifosizing.py @@ -51,6 +51,7 @@ def fetch_test_model(topology, wbits=2, abits=2): @pytest.mark.slow @pytest.mark.vivado +@pytest.mark.fpgadataflow @pytest.mark.parametrize( "method", ["largefifo_rtlsim_python", "largefifo_rtlsim_cpp", "characterize"] ) diff --git a/tests/fpgadataflow/test_split_large_fifos.py b/tests/fpgadataflow/test_split_large_fifos.py new file mode 100644 index 0000000000000000000000000000000000000000..85b4a2bfa8dc0de3cbdd0ca34ec5b1ee68f37acf --- /dev/null +++ b/tests/fpgadataflow/test_split_large_fifos.py @@ -0,0 +1,128 @@ +# Copyright (C) 2022, Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of Xilinx nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +import pytest + +import json +import shutil +from brevitas.export.onnx.generic.manager import BrevitasONNXManager +from qonnx.core.modelwrapper import ModelWrapper +from qonnx.custom_op.registry import getCustomOp + +import finn.builder.build_dataflow as build +import finn.builder.build_dataflow_config as build_cfg +from finn.transformation.fpgadataflow.set_fifo_depths import get_fifo_split_configs +from finn.util.basic import make_build_dir +from finn.util.test import get_trained_network_and_ishape + + +def fetch_test_model(topology, wbits=2, abits=2): + tmp_output_dir = make_build_dir("build_fifosizing_%s_" % topology) + (model, ishape) = get_trained_network_and_ishape(topology, wbits, abits) + chkpt_name = tmp_output_dir + "/model.onnx" + BrevitasONNXManager.export(model, ishape, chkpt_name) + return tmp_output_dir + + +def get_folding_cfg(depth=65536): + cfg = dict() + cfg["Defaults"] = dict() + for i in range(3): + key = "StreamingFIFO_" + str(i) + cfg[key] = {"depth": depth, "ram_style": "auto", "impl_style": "vivado"} + return cfg + + +@pytest.mark.slow +@pytest.mark.vivado +@pytest.mark.fpgadataflow +@pytest.mark.parametrize("depth", [16384, 65536, 45000]) +@pytest.mark.parametrize("force_python_rtlsim", ["True", "False"]) +def test_split_large_fifos(depth, force_python_rtlsim): + tmp_output_dir = fetch_test_model("tfc") + folding_cfg = get_folding_cfg(depth) + with open(tmp_output_dir + "/folding_config.json", "w") as f: + json.dump(folding_cfg, f, indent=2) + cfg = build_cfg.DataflowBuildConfig( + output_dir=tmp_output_dir, + auto_fifo_depths=False, + split_large_fifos=True, + folding_config_file=tmp_output_dir + "/folding_config.json", + target_fps=10000, + force_python_rtlsim=force_python_rtlsim, + synth_clk_period_ns=10.0, + board="Pynq-Z1", + rtlsim_batch_size=100, + shell_flow_type=build_cfg.ShellFlowType.VIVADO_ZYNQ, + generate_outputs=[ + build_cfg.DataflowOutputType.ESTIMATE_REPORTS, + build_cfg.DataflowOutputType.STITCHED_IP, + build_cfg.DataflowOutputType.RTLSIM_PERFORMANCE, + ], + default_mem_mode=build_cfg.ComputeEngineMemMode.DECOUPLED, + ) + build.build_dataflow_cfg(tmp_output_dir + "/model.onnx", cfg) + with open(tmp_output_dir + "/report/estimate_network_performance.json") as f: + est_data = json.load(f) + with open(tmp_output_dir + "/report/rtlsim_performance.json") as f: + sim_data = json.load(f) + assert ( + float(sim_data["throughput[images/s]"]) + / float(est_data["estimated_throughput_fps"]) + > 0.9 + ) + model = ModelWrapper( + tmp_output_dir + "/intermediate_models/step_set_fifo_depths.onnx" + ) + # exclude final FIFO node (output FIFO, not part of test) + fifo_nodes = model.get_nodes_by_op_type("StreamingFIFO")[:-1] + golden_cfg = get_fifo_split_configs(depth, 256, 32768) + for i, fifo_node in enumerate(fifo_nodes): + inst = getCustomOp(fifo_node) + fifo_depth = inst.get_nodeattr("depth") + assert fifo_depth == golden_cfg[i % len(golden_cfg)][0] + + shutil.rmtree(tmp_output_dir) + + +def test_split_large_fifo_configs(): + ret0 = get_fifo_split_configs(513, 256, 32768) + assert ret0 == [(512, "vivado"), (1, "rtl")] + ret1 = get_fifo_split_configs(1200, 256, 32768) + assert ret1 == [(1024, "vivado"), (176, "rtl")] + ret2 = get_fifo_split_configs(45000, 256, 32768) + assert ret2 == [ + (32768, "vivado"), + (8192, "vivado"), + (2048, "vivado"), + (1024, "vivado"), + (512, "vivado"), + (256, "rtl"), + (200, "rtl"), + ]