diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py index 2ee898bc7d50822f962b6a70cf86b2893e0937b7..6e07a541e3d462b159792482dae4777999921a2c 100644 --- a/src/finn/builder/build_dataflow_steps.py +++ b/src/finn/builder/build_dataflow_steps.py @@ -668,7 +668,6 @@ def step_measure_rtlsim_performance(model: ModelWrapper, cfg: DataflowBuildConfi rtlsim_bs = int(cfg.rtlsim_batch_size) orig_rtlsim_trace_depth = get_rtlsim_trace_depth() if force_python_rtlsim: - # run with single input to get latency assert rtlsim_bs > 0, "rtlsim batch size must be >0" if cfg.verify_save_rtlsim_waveforms: # set depth to 3 for layer-by-layer visibility @@ -680,9 +679,11 @@ def step_measure_rtlsim_performance(model: ModelWrapper, cfg: DataflowBuildConfi rtlsim_model.set_metadata_prop( "extra_verilator_args", str(["-CFLAGS", "-O3"]) ) + # run with single input to get latency + rtlsim_latency_dict = throughput_test_rtlsim(rtlsim_model, 1) + # run with batch to get stable-state throughput rtlsim_perf_dict = throughput_test_rtlsim(rtlsim_model, rtlsim_bs) - rtlsim_latency = rtlsim_perf_dict["cycles"] - rtlsim_perf_dict["latency_cycles"] = rtlsim_latency + rtlsim_perf_dict["latency_cycles"] = rtlsim_latency_dict["cycles"] else: rtlsim_perf_dict = verilator_fifosim(model, rtlsim_bs) # keep keys consistent between the Python and C++-styles @@ -696,6 +697,19 @@ def step_measure_rtlsim_performance(model: ModelWrapper, cfg: DataflowBuildConfi for (key, val) in rtlsim_perf_dict.items(): if "max_count" in key: del rtlsim_perf_dict[key] + # estimate stable-state throughput based on latency+throughput + if rtlsim_bs == 1: + rtlsim_perf_dict["stable_throughput[images/s]"] = rtlsim_perf_dict[ + "throughput[images/s]" + ] + else: + total_cycles = rtlsim_perf_dict["cycles"] + latency_cycles = rtlsim_perf_dict["latency_cycles"] + stablestate_cycles = total_cycles - latency_cycles + clk_ns = float(model.get_metadata_prop("clk_ns")) + fclk_mhz = 1 / (clk_ns * 0.001) + runtime_s = (stablestate_cycles * clk_ns) * (10**-9) + rtlsim_perf_dict["stable_throughput[images/s]"] = rtlsim_bs / runtime_s with open(report_dir + "/rtlsim_performance.json", "w") as f: json.dump(rtlsim_perf_dict, f, indent=2) diff --git a/src/finn/transformation/fpgadataflow/insert_dwc.py b/src/finn/transformation/fpgadataflow/insert_dwc.py index 632d1f813b4d2509407930bc9294f7531d4c90af..cff8b602674fec41a1e6fd1d467acdc989b4afe2 100644 --- a/src/finn/transformation/fpgadataflow/insert_dwc.py +++ b/src/finn/transformation/fpgadataflow/insert_dwc.py @@ -81,15 +81,11 @@ class InsertDWC(Transformation): dwc_in_width = n0.get_outstream_width() # determine dwc outwidth dwc_out_width = n1.get_instream_width() - larger_width = max(dwc_in_width, dwc_out_width) - smaller_width = min(dwc_in_width, dwc_out_width) - both_8bit_aligned = (larger_width % 8 == 0) and ( - smaller_width % 8 == 0 - ) - if both_8bit_aligned: - impl_style = "vivado" - else: - impl_style = "hls" + # use hls mode by default since it supports more configs + # vivado mode can be manually enabled by user, but does not + # support e.g. node-by-node rtlsim neded for + # characterization-based FIFO sizing + impl_style = "hls" # determine shape for dwc dwc_shape = n0.get_normal_output_shape() diff --git a/tests/fpgadataflow/test_fifosizing.py b/tests/fpgadataflow/test_fifosizing.py index f4f2b8dbfff0d720ec4eb901704581b096c0ea40..9399fbe3949a5d0052ba80b24b1a9e0c44c5597c 100644 --- a/tests/fpgadataflow/test_fifosizing.py +++ b/tests/fpgadataflow/test_fifosizing.py @@ -55,7 +55,7 @@ def fetch_test_model(topology, wbits=2, abits=2): @pytest.mark.parametrize( "method", ["largefifo_rtlsim_python", "largefifo_rtlsim_cpp", "characterize"] ) -@pytest.mark.parametrize("topology", ["tfc"]) +@pytest.mark.parametrize("topology", ["tfc", "cnv"]) def test_fifosizing_linear(method, topology): force_python_rtlsim = "python" in method method_key = "largefifo_rtlsim" if "largefifo_rtlsim" in method else "characterize" @@ -68,7 +68,7 @@ def test_fifosizing_linear(method, topology): force_python_rtlsim=force_python_rtlsim, synth_clk_period_ns=10.0, board="Pynq-Z1", - rtlsim_batch_size=100, + rtlsim_batch_size=100 if topology == "tfc" else 2, shell_flow_type=build_cfg.ShellFlowType.VIVADO_ZYNQ, generate_outputs=[ build_cfg.DataflowOutputType.ESTIMATE_REPORTS, @@ -83,7 +83,7 @@ def test_fifosizing_linear(method, topology): with open(tmp_output_dir + "/report/rtlsim_performance.json") as f: sim_data = json.load(f) assert ( - float(sim_data["throughput[images/s]"]) + float(sim_data["stable_throughput[images/s]"]) / float(est_data["estimated_throughput_fps"]) > 0.9 )