diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py index f196ed6331aea04ad0186980528f9a0a90e01d6a..07f971bdbd35faa789bdb19d0af224d749cdeba4 100644 --- a/src/finn/builder/build_dataflow_steps.py +++ b/src/finn/builder/build_dataflow_steps.py @@ -29,6 +29,7 @@ import json import numpy as np import os +import warnings from copy import deepcopy from distutils.dir_util import copy_tree from qonnx.core.modelwrapper import ModelWrapper @@ -112,6 +113,7 @@ from finn.util.basic import ( get_rtlsim_trace_depth, pyverilate_get_liveness_threshold_cycles, ) +from finn.util.pyverilator import verilator_fifosim from finn.util.test import execute_parent @@ -486,12 +488,20 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig): model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) elif cfg.auto_fifo_strategy == "largefifo_rtlsim": + # multi-in/out streams currently not supported in our C++ verilator driver + model_multi_io = len(model.graph.input) > 1 or len(model.graph.output) > 1 + force_python_sim = model_multi_io or cfg.force_python_rtlsim + if model_multi_io: + warnings.warn( + "Multi-in/out streams currently not supported " + + "in FINN C++ verilator driver, falling back to Python" + ) model = model.transform( InsertAndSetFIFODepths( cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period(), vivado_ram_style=cfg.large_fifo_mem_style, - force_python_sim=cfg.force_python_rtlsim, + force_python_sim=force_python_sim, ) ) else: @@ -588,20 +598,48 @@ def step_measure_rtlsim_performance(model: ModelWrapper, cfg: DataflowBuildConfi # prepare ip-stitched rtlsim rtlsim_model = deepcopy(model) rtlsim_model = prepare_for_stitched_ip_rtlsim(rtlsim_model, cfg) - # run with single input to get latency - orig_rtlsim_trace_depth = get_rtlsim_trace_depth() + # multi-in/out streams currently not supported in our C++ verilator driver + model_multi_io = ( + len(rtlsim_model.graph.input) > 1 or len(rtlsim_model.graph.output) > 1 + ) + force_python_rtlsim = cfg.force_python_rtlsim or model_multi_io + if model_multi_io: + warnings.warn( + "Multi-in/out streams currently not supported " + + "in FINN C++ verilator driver, falling back to Python" + ) rtlsim_bs = int(cfg.rtlsim_batch_size) - assert rtlsim_bs > 0, "rtlsim batch size must be >0" - if cfg.verify_save_rtlsim_waveforms: - # set depth to 3 for layer-by-layer visibility - os.environ["RTLSIM_TRACE_DEPTH"] = "3" + if force_python_rtlsim: + # run with single input to get latency + orig_rtlsim_trace_depth = get_rtlsim_trace_depth() + assert rtlsim_bs > 0, "rtlsim batch size must be >0" + if cfg.verify_save_rtlsim_waveforms: + # set depth to 3 for layer-by-layer visibility + os.environ["RTLSIM_TRACE_DEPTH"] = "3" + rtlsim_model.set_metadata_prop( + "rtlsim_trace", + "%s/rtlsim_perf_batch_%d.vcd" % (report_dir, rtlsim_bs), + ) rtlsim_model.set_metadata_prop( - "rtlsim_trace", "%s/rtlsim_perf_batch_%d.vcd" % (report_dir, rtlsim_bs) + "extra_verilator_args", str(["-CFLAGS", "-O3"]) ) - rtlsim_model.set_metadata_prop("extra_verilator_args", str(["-CFLAGS", "-O3"])) - rtlsim_perf_dict = throughput_test_rtlsim(rtlsim_model, rtlsim_bs) - rtlsim_latency = rtlsim_perf_dict["cycles"] - rtlsim_perf_dict["latency_cycles"] = rtlsim_latency + rtlsim_perf_dict = throughput_test_rtlsim(rtlsim_model, rtlsim_bs) + rtlsim_latency = rtlsim_perf_dict["cycles"] + rtlsim_perf_dict["latency_cycles"] = rtlsim_latency + else: + rtlsim_perf_dict = verilator_fifosim(model, rtlsim_bs) + # keep keys consistent between the Python and C++-styles + cycles = rtlsim_perf_dict["cycles"] + clk_ns = float(model.get_metadata_prop("clk_ns")) + fclk_mhz = 1 / (clk_ns * 0.001) + runtime_s = (cycles * clk_ns) * (10**-9) + rtlsim_perf_dict["runtime[ms]"] = runtime_s * 1000 + rtlsim_perf_dict["throughput[images/s]"] = rtlsim_bs / runtime_s + rtlsim_perf_dict["fclk[mhz]"] = fclk_mhz + for (key, val) in rtlsim_perf_dict.items(): + if "max_count" in key: + del rtlsim_perf_dict[key] + with open(report_dir + "/rtlsim_performance.json", "w") as f: json.dump(rtlsim_perf_dict, f, indent=2) if cfg.verify_save_rtlsim_waveforms: