diff --git a/docker/finn_entrypoint.sh b/docker/finn_entrypoint.sh index fcd6f9d788d0af1cad6de5259e5e181e76ac96bc..7e13e117859365531f459928b7c664edb3fbf4ce 100644 --- a/docker/finn_entrypoint.sh +++ b/docker/finn_entrypoint.sh @@ -16,7 +16,7 @@ gecho () { BREVITAS_COMMIT=989cdfdba4700fdd900ba0b25a820591d561c21a CNPY_COMMIT=4e8810b1a8637695171ed346ce68f6984e585ef4 HLSLIB_COMMIT=13e9b0772a27a3a1efc40c878d8e78ed09efb716 -PYVERILATOR_COMMIT=1d89cb0d4e0c97469cc6352c611f876ec13edfa6 +PYVERILATOR_COMMIT=c97a5ba41bbc7c419d6f25c74cdf3bdc3393174f PYNQSHELL_COMMIT=0c82a61b0ec1a07fa275a14146233824ded7a13d diff --git a/requirements.txt b/requirements.txt index 6b8e4d02c8ca1dcdbe607aabdccd27cec8056332..b15d86ed89f7b0e76b772ce42aba6481937310b0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,4 +9,5 @@ pyverilator scipy sphinx toposort +vcdvcd wget diff --git a/src/finn/core/rtlsim_exec.py b/src/finn/core/rtlsim_exec.py index e5e6d29bd8d8ed23f6a4958856ed1ddea3617175..ad44dab578b396c80af35af2ede031baca798150 100644 --- a/src/finn/core/rtlsim_exec.py +++ b/src/finn/core/rtlsim_exec.py @@ -86,9 +86,7 @@ def rtlsim_exec(model, execution_context): sim = pyverilate_stitched_ip(model) model.set_metadata_prop("rtlsim_so", sim.lib._name) else: - sim = PyVerilator(rtlsim_so) - _reset_rtlsim(sim) - _toggle_clk(sim) + sim = PyVerilator(rtlsim_so, auto_eval=False) ret = _run_rtlsim(sim, packed_input, num_out_values, trace_file) packed_output = ret[0] model.set_metadata_prop("sim_cycles", str(ret[1])) @@ -104,18 +102,22 @@ def _reset_rtlsim(sim): """Sets reset input in pyverilator to zero, toggles the clock and set it back to one""" sim.io.ap_rst_n_0 = 0 - sim.io.ap_clk_0 = 1 - sim.io.ap_clk_0 = 0 + _toggle_clk(sim) + _toggle_clk(sim) sim.io.ap_rst_n_0 = 1 + _toggle_clk(sim) + _toggle_clk(sim) def _toggle_clk(sim): """Toggles the clock input in pyverilator once.""" - sim.io.ap_clk_0 = 1 sim.io.ap_clk_0 = 0 + sim.eval() + sim.io.ap_clk_0 = 1 + sim.eval() -def _run_rtlsim(sim, inp, num_out_values, trace_file=None): +def _run_rtlsim(sim, inp, num_out_values, trace_file=None, reset=True): """Runs the pyverilator simulation by passing the input values to the simulation, toggle the clock and observing the execution time. Argument num_out_values contains the number of expected output values, so the simulation is closed after all @@ -140,6 +142,8 @@ def _run_rtlsim(sim, inp, num_out_values, trace_file=None): if trace_file is not None: sim.start_vcd_trace(trace_file) + if reset: + _reset_rtlsim(sim) while not (output_observed): sim.io.in0_V_V_0_tvalid = 1 if len(inputs) > 0 else 0 @@ -148,8 +152,7 @@ def _run_rtlsim(sim, inp, num_out_values, trace_file=None): inputs = inputs[1:] if sim.io.out_r_0_tvalid == 1 and sim.io.out_r_0_tready == 1: outputs = outputs + [sim.io.out_r_0_tdata] - sim.io.ap_clk_0 = 1 - sim.io.ap_clk_0 = 0 + _toggle_clk(sim) observation_count = observation_count + 1 no_change_count = no_change_count + 1 diff --git a/src/finn/custom_op/fpgadataflow/streamingfifo.py b/src/finn/custom_op/fpgadataflow/streamingfifo.py index 66190333ce8d71dafba99aaeae4fb2c973d67410..1f734b548f923341687843c538d1887fcc069bee 100644 --- a/src/finn/custom_op/fpgadataflow/streamingfifo.py +++ b/src/finn/custom_op/fpgadataflow/streamingfifo.py @@ -110,6 +110,8 @@ class StreamingFIFO(HLSCustomOp): ] # make instream width a multiple of 8 for axi interface in_width = self.get_instream_width_padded() + count_width = int(self.get_nodeattr("depth") - 1).bit_length() + self.code_gen_dict["$COUNT_RANGE$"] = ["[{}:0]".format(count_width - 1)] self.code_gen_dict["$IN_RANGE$"] = ["[{}:0]".format(in_width - 1)] self.code_gen_dict["$OUT_RANGE$"] = ["[{}:0]".format(in_width - 1)] self.code_gen_dict["$WIDTH$"] = [str(in_width)] diff --git a/src/finn/custom_op/fpgadataflow/templates.py b/src/finn/custom_op/fpgadataflow/templates.py index 5f526aa2aa1917144c7a048c9d9314aa9288a2d8..1a8216f64bf71b7fb9f1f8becf4732970b5bf451 100644 --- a/src/finn/custom_op/fpgadataflow/templates.py +++ b/src/finn/custom_op/fpgadataflow/templates.py @@ -408,6 +408,7 @@ strm_fifo_wrapper = """ module $TOPNAME$( ap_clk, ap_rst_n, +count, in0_V_V_TDATA, in0_V_V_TVALID, in0_V_V_TREADY, @@ -418,6 +419,7 @@ out_V_V_TREADY input ap_clk; input ap_rst_n; +output $COUNT_RANGE$ count; input $IN_RANGE$ in0_V_V_TDATA; input in0_V_V_TVALID; output in0_V_V_TREADY; @@ -433,6 +435,7 @@ $LAYER_NAME$ ( .clock(ap_clk), .reset(!ap_rst_n), + .count(count), .i_d(in0_V_V_TDATA), .i_v(in0_V_V_TVALID), .i_r(in0_V_V_TREADY), diff --git a/src/finn/util/basic.py b/src/finn/util/basic.py index 3880bb9591e27af5fe9d063dba2485d304e4db54..d3bfb73fe239d7194fab3760555663895a209e84 100644 --- a/src/finn/util/basic.py +++ b/src/finn/util/basic.py @@ -56,6 +56,12 @@ def get_rtlsim_trace_depth(): via the RTLSIM_TRACE_DEPTH environment variable. If the env.var. is undefined, the default value of 1 is returned. A trace depth of 1 will only show top-level signals and yield smaller .vcd files. + + The following depth values are of interest for whole-network stitched IP + rtlsim: + - level 1 shows top-level input/output streams + - level 2 shows per-layer input/output streams + - level 3 shows per full-layer I/O including FIFO count signals """ try: diff --git a/src/finn/util/fpgadataflow.py b/src/finn/util/fpgadataflow.py index d1669444e55cb0fddb2690e51849c4603d47d32c..3fe747a84985b2702ffb1e5855d9071362efebda 100644 --- a/src/finn/util/fpgadataflow.py +++ b/src/finn/util/fpgadataflow.py @@ -104,6 +104,7 @@ def pyverilate_stitched_ip(model): build_dir=build_dir, trace_depth=get_rtlsim_trace_depth(), top_module_name=top_module_name, + auto_eval=False, ) return sim diff --git a/src/finn/util/vcd.py b/src/finn/util/vcd.py new file mode 100644 index 0000000000000000000000000000000000000000..d9e244422065314ceb790dc6719b57688ff76828 --- /dev/null +++ b/src/finn/util/vcd.py @@ -0,0 +1,184 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from vcdvcd import VCDVCD +from finn.util.basic import get_num_default_workers +import multiprocessing as mp + +# string patterns to search for to find particular interfaces +# streaming interfaces +vname = "TVALID" +rname = "TREADY" +# FIFO count signals +fifo_mod_name = "StreamingFIFO" +fifo_cname = "count" + + +def list_stream_if(vcd_file): + "Return a list of stream interface names from given vcd trace." + + sig_names = VCDVCD(vcd_file, print_dumps=False, only_sigs=True).get_signals() + stream_if_names = [] + for cand_name in filter(lambda x: x.endswith(vname), sig_names): + base_name = cand_name.replace(vname, "") + if base_name + rname in sig_names: + stream_if_names.append(base_name) + return stream_if_names + + +def list_fifo_count_signals(vcd_file): + "Return a list of FIFO count signal names from given vcd trace." + + sig_names = VCDVCD(vcd_file, print_dumps=False, only_sigs=True).get_signals() + fifo_cnt_names = [] + for cand_name in filter(lambda x: fifo_cname in x, sig_names): + if fifo_mod_name in cand_name: + fifo_cnt_names.append(cand_name) + return fifo_cnt_names + + +def get_fifo_count_max(vcd_file, fifo_count_signal): + "Return the maximum value of the given FIFO count signal in vcd trace." + + d = VCDVCD(vcd_file, signals=[fifo_count_signal], store_tvs=True).get_data() + assert len(d) != 0, "FIFO count signal not found" + events = list(d.values())[0]["tv"] + max = 0 + for (time, val) in events: + current = int(val, base=2) + if current > max: + max = current + return max + + +def _get_fifo_max(x): + return (x[0], get_fifo_count_max(x[1], x[0])) + + +def get_all_fifo_count_max(vcd_file, fifo_count_signals=None): + """Return a list of max FIFO counts. If fifo_count_signals is None, + all FIFO count signals will be returned, otherwise treated as a list of + signal names to return the stats for.""" + if fifo_count_signals is None: + fifo_count_signals = list_fifo_count_signals(vcd_file) + + with mp.Pool(get_num_default_workers()) as p: + fifo_count_signals = map(lambda x: (x, vcd_file), fifo_count_signals) + all_stats = p.map(_get_fifo_max, fifo_count_signals) + + return all_stats + + +def get_stream_if_stats(vcd_file, if_base_name): + """Return statistics for given streaming interface in vcd trace in the + following dict format: + + <stream_state>: (<num_samples>, <fraction_of_time>), + + where <stream_state> is the combination of (V)alid/(R)eady values, + <num_samples> is the approximate number of rising clock edges spent in <state> + , and <fraction_of_time> is the fraction of <num_samples> to total + amount of time recorded by the trace. + + Example: + {"{'V': 0, 'R': 0}": (5, 0.0006060606060606061), + "{'V': 1, 'R': 0}": (0, 0.0), + "{'V': 0, 'R': 1}": (7605, 0.9218181818181819), + "{'V': 1, 'R': 1}": (640, 0.07757575757575758)} + + Here we can see the stream was transmitting values 7.7% of the time, + and 9.2% of the time there was no incoming data (valid 0, ready 1) + """ + if_valid = if_base_name + vname + if_ready = if_base_name + rname + v = VCDVCD(vcd_file, signals=[if_valid], store_tvs=True) + endtime = v.get_endtime() + v = v.get_data() + assert len(v) != 0, "Streaming interface not found" + v = list(v.values())[0]["tv"] + v = list(map(lambda x: ("V", x[0], x[1]), v)) + v.append(("V", endtime, "0")) + r = VCDVCD(vcd_file, signals=[if_ready], store_tvs=True).get_data() + assert len(r) != 0, "Streaming interface not found" + r = list(r.values())[0]["tv"] + r = list(map(lambda x: ("R", x[0], x[1]), r)) + r.append(("R", endtime, "0")) + events = sorted(v + r, key=lambda x: x[1]) + ret = { + "{'V': 0, 'R': 0}": 0, + "{'V': 1, 'R': 0}": 0, + "{'V': 0, 'R': 1}": 0, + "{'V': 1, 'R': 1}": 0, + } + status = {"V": 0, "R": 0} + last_time = 0 + total_rising_clock_edges = 0 + for (sig, time, val) in events: + # pyverilator generates 5 time units per sample + time = time / 5 + # pyverilator generates 4 samples per clock period + n_rising_clock_edges = int((time - last_time) / 4) + # note that the calculation of n_rising_clock_edges is approximate + # doing this exactly would require a cycle-by-cycle walkthrough of the + # trace, which can take very long + ret[str(status)] += n_rising_clock_edges + total_rising_clock_edges += n_rising_clock_edges + status[sig] = int(val) + last_time = time + + for state in ret: + v = ret[state] + ret[state] = (v, v / total_rising_clock_edges) + + return ret + + +def _get_stats(x): + return (x[0], get_stream_if_stats(x[1], x[0])) + + +def get_all_stream_if_stats(vcd_file, stream_ifs=None, sort_by="{'V': 1, 'R': 0}"): + """Return a list of streaming interface stats, sorted by the percentage + for the given sort_by key. If stream_ifs is None, all streamin interface + stats will be returned, otherwise treated as a list of interface names to + return the stats for.""" + + if stream_ifs is None: + stream_ifs = list_stream_if(vcd_file) + + with mp.Pool(get_num_default_workers()) as p: + stream_ifs = map(lambda x: (x, vcd_file), stream_ifs) + all_stats = p.map(_get_stats, stream_ifs) + + def sort_key(x): + stat = x[1] + (samples, percent) = stat[sort_by] + return percent + + ret = sorted(all_stats, key=sort_key) + return ret diff --git a/tests/end2end/test_end2end_tfc_w1a1.py b/tests/end2end/test_end2end_tfc_w1a1.py index 15c1c41b006c6f87d79a0e7eb6a4458838de5fd2..13758e01e1df96a79658f5ebc7501c9fb43d0882 100644 --- a/tests/end2end/test_end2end_tfc_w1a1.py +++ b/tests/end2end/test_end2end_tfc_w1a1.py @@ -72,6 +72,7 @@ from finn.util.basic import pynq_part_map from finn.util.test import get_test_model_trained, load_test_checkpoint_or_skip from finn.transformation.fpgadataflow.annotate_resources import AnnotateResources from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim +import finn.util.vcd as vcd build_dir = "/tmp/" + os.environ["FINN_INST_NAME"] test_pynq_board = os.getenv("PYNQ_BOARD", default="Pynq-Z1") @@ -197,6 +198,8 @@ def test_end2end_tfc_w1a1_verify_dataflow_part(): res_rtlsim_nodebynode = ret_rtlsim_nodebynode[out_name] # whole-network (ip-stitched) rtlsim model.set_metadata_prop("exec_mode", "rtlsim") + model.set_metadata_prop("rtlsim_trace", build_dir + "/tfc_w1a1.vcd") + os.environ["RTLSIM_TRACE_DEPTH"] = "3" model.save(build_dir + "/end2end_tfc_w1a1_ipstitch_whole_rtlsim.onnx") ret_rtlsim_whole = execute_onnx(model, inp_dict, True) res_rtlsim_whole = ret_rtlsim_whole[out_name] @@ -204,6 +207,24 @@ def test_end2end_tfc_w1a1_verify_dataflow_part(): assert np.isclose(res_cppsim, res_rtlsim_whole).all() +def test_end2end_tfc_w1a1_verify_fifo_fullness(): + vcdf = build_dir + "/tfc_w1a1.vcd" + if not os.path.isfile(vcdf): + pytest.skip("Cannot find %s, skipping" % vcdf) + stream_ifs = vcd.list_stream_if(vcdf) + fifos = vcd.list_fifo_count_signals(vcdf) + assert len(stream_ifs) == 37 + assert len(fifos) == 6 + fifo_max = vcd.get_all_fifo_count_max(vcdf) + assert fifo_max[0][0] == "TOP.v.finn_design_i.StreamingFIFO_0.count[3:0]" + assert fifo_max[0][1] == 3 + stream_stat = vcd.get_all_stream_if_stats(vcdf) + assert ( + stream_stat[0][0] + == "TOP.v.finn_design_i.StreamingDataWidthConverter_Batch_0_out_V_V_" + ) + + @pytest.mark.vivado def test_end2end_tfc_w1a1_verify_all(): # use the streamlined model as the "golden" model for right answers