diff --git a/notebooks/end2end_example/tfc_end2end_example.ipynb b/notebooks/end2end_example/tfc_end2end_example.ipynb index fd272d7bf1138981f9651e4c2551fa040af17c19..c388feca2340792c3535dba3fb3cf5e7220adf3c 100644 --- a/notebooks/end2end_example/tfc_end2end_example.ipynb +++ b/notebooks/end2end_example/tfc_end2end_example.ipynb @@ -730,7 +730,7 @@ " 'ip_path': ('s', False, ''),\n", " 'ip_vlnv': ('s', False, ''),\n", " 'exec_mode': ('s', False, ''),\n", - " 'sim_cycles': ('i', False, 0),\n", + " 'cycles_rtlsim': ('i', False, 0),\n", " 'rtlsim_trace': ('s', False, ''),\n", " 'res_estimate': ('s', False, ''),\n", " 'res_hls': ('s', False, ''),\n", diff --git a/src/finn/core/rtlsim_exec.py b/src/finn/core/rtlsim_exec.py index bb5b3075582b8e01e8eed95f709934302fcadb42..d83bcd3a75dd0d2fc02315c72784e57348901a04 100644 --- a/src/finn/core/rtlsim_exec.py +++ b/src/finn/core/rtlsim_exec.py @@ -102,7 +102,7 @@ def rtlsim_exec(model, execution_context): sim = PyVerilator(rtlsim_so, auto_eval=False) ret = _run_rtlsim(sim, packed_input, num_out_values, trace_file) packed_output = ret[0] - model.set_metadata_prop("sim_cycles", str(ret[1])) + model.set_metadata_prop("cycles_rtlsim", str(ret[1])) # unpack output and put into context o_folded_tensor = rtlsim_output_to_npy( packed_output, None, o_dt, o_folded_shape, packedBits, targetBits @@ -171,7 +171,7 @@ def _run_rtlsim(sim, inp, num_out_values, trace_file=None, reset=True): no_change_count = no_change_count + 1 if len(outputs) == num_out_values: - sim_cycles = observation_count + cycles_rtlsim = observation_count output_observed = True if no_change_count == liveness_threshold: @@ -191,4 +191,4 @@ def _run_rtlsim(sim, inp, num_out_values, trace_file=None, reset=True): sim.flush_vcd_trace() sim.stop_vcd_trace() - return (outputs, sim_cycles) + return (outputs, cycles_rtlsim) diff --git a/src/finn/core/throughput_test.py b/src/finn/core/throughput_test.py index 4444e7584f843cd0edb016b520d01d71e659b904..fbfe775e581e063b08e34b3096fd34f412b47d11 100644 --- a/src/finn/core/throughput_test.py +++ b/src/finn/core/throughput_test.py @@ -125,7 +125,7 @@ def throughput_test_rtlsim(model, batchsize=100): os.environ["LIVENESS_THRESHOLD"] = "-1" rtlsim_exec(model, ctx) # extract metrics - cycles = int(model.get_metadata_prop("sim_cycles")) + cycles = int(model.get_metadata_prop("cycles_rtlsim")) clk_ns = float(model.get_metadata_prop("clk_ns")) fclk_mhz = 1 / (clk_ns * 0.001) runtime_s = (cycles * clk_ns) * (10 ** -9) diff --git a/src/finn/custom_op/fpgadataflow/__init__.py b/src/finn/custom_op/fpgadataflow/__init__.py index 97056ac77c5bff8cc287041c9b9bef01db6a66cb..65c898a8c453420ed96ca22715ef2595c5840288 100644 --- a/src/finn/custom_op/fpgadataflow/__init__.py +++ b/src/finn/custom_op/fpgadataflow/__init__.py @@ -82,7 +82,8 @@ class HLSCustomOp(CustomOp): "ip_path": ("s", False, ""), "ip_vlnv": ("s", False, ""), "exec_mode": ("s", False, ""), - "sim_cycles": ("i", False, 0), + "cycles_rtlsim": ("i", False, 0), + "cycles_estimate": ("i", False, 0), "rtlsim_trace": ("s", False, ""), "res_estimate": ("s", False, ""), "res_hls": ("s", False, ""), @@ -442,7 +443,7 @@ compilation transformations? no_change_count = no_change_count + 1 if len(outputs) == num_out_values: - self.set_nodeattr("sim_cycles", observation_count) + self.set_nodeattr("cycles_rtlsim", observation_count) output_observed = True if no_change_count == liveness_threshold: @@ -471,7 +472,7 @@ compilation transformations? trace_file = self.onnx_node.name + ".vcd" num_out_values = self.get_number_output_values() total_cycle_count = rtlsim_multi_io(sim, io_dict, num_out_values, trace_file) - self.set_nodeattr("sim_cycles", total_cycle_count) + self.set_nodeattr("cycles_rtlsim", total_cycle_count) def execute_node(self, context, graph): """Executes single node using cppsim or rtlsim.""" diff --git a/src/finn/transformation/fpgadataflow/annotate_cycles.py b/src/finn/transformation/fpgadataflow/annotate_cycles.py new file mode 100644 index 0000000000000000000000000000000000000000..521c84952daf25982e574421dfba3ff0f7df91ae --- /dev/null +++ b/src/finn/transformation/fpgadataflow/annotate_cycles.py @@ -0,0 +1,59 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import finn.custom_op.registry as registry +from finn.transformation import Transformation +from finn.transformation.move_reshape import _is_fpgadataflow_node +from finn.core.modelwrapper import ModelWrapper +from finn.custom_op.registry import getCustomOp + + +class AnnotateCycles(Transformation): + """Annotate the estimate of clock cycles per sample taken by each fpgadataflow + node as an attribute on the node. + """ + + def __init__(self): + super().__init__() + + def apply(self, model): + graph = model.graph + # annotate node cycles + for node in graph.node: + if _is_fpgadataflow_node(node): + op_inst = registry.getCustomOp(node) + cycles = op_inst.get_exp_cycles() + op_inst.set_nodeattr("cycles_estimate", cycles) + elif node.op_type == "StreamingDataflowPartition": + # recurse into model to manually annotate per-layer cycles + sdp_model_filename = getCustomOp(node).get_nodeattr("model") + sdp_model = ModelWrapper(sdp_model_filename) + sdp_model = sdp_model.transform(AnnotateCycles()) + # save transformed model + sdp_model.save(sdp_model_filename) + return (model, False) diff --git a/tests/end2end/test_zynqbuild_end2end_cnv_w1a1.py b/tests/end2end/test_zynqbuild_end2end_cnv_w1a1.py index cefe4d038f3e346f39f3c2aa995708123a2e26da..25cafcfd4c552fb368cbaca2d1d2714cf2d14011 100644 --- a/tests/end2end/test_zynqbuild_end2end_cnv_w1a1.py +++ b/tests/end2end/test_zynqbuild_end2end_cnv_w1a1.py @@ -62,6 +62,7 @@ from finn.transformation.move_reshape import RemoveCNVtoFCFlatten from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul from finn.transformation.streamline.reorder import MakeMaxPoolNHWC from finn.transformation.infer_data_layouts import InferDataLayouts +from finn.transformation.fpgadataflow.annotate_cycles import AnnotateCycles build_dir = "/tmp/" + os.environ["FINN_INST_NAME"] @@ -115,6 +116,7 @@ def test_end2end_zynqbuild_cnv_w1a1_convert_to_hls_layers(): model = model.transform(to_hls.InferQuantizedStreamingFCLayer(mem_mode)) model = model.transform(to_hls.InferConvInpGen()) model = model.transform(to_hls.InferStreamingMaxPool()) + model = model.transform(GiveUniqueNodeNames()) model = model.transform(RemoveCNVtoFCFlatten()) model = model.transform(InferDataLayouts()) model.save(build_dir + "/end2end_zynqbuild_cnv_w1a1_hls_layers.onnx") @@ -164,6 +166,7 @@ def test_end2end_zynqbuild_cnv_w1a1_fold(): swg_inst.set_nodeattr("SIMD", simd) swg_inst.set_nodeattr("inFIFODepth", swg_idepth[i]) model = model.transform(AnnotateResources("estimate")) + model = model.transform(AnnotateCycles()) model.save(build_dir + "/end2end_zynqbuild_cnv_w1a1_folded.onnx") diff --git a/tests/fpgadataflow/test_convert_to_hls_conv_layer.py b/tests/fpgadataflow/test_convert_to_hls_conv_layer.py index 6d1ff31ab554ef1d3fe8ef1fac66e6bc3406efbb..d69e4c3231a3381a9eecab2a551455714dd26720 100644 --- a/tests/fpgadataflow/test_convert_to_hls_conv_layer.py +++ b/tests/fpgadataflow/test_convert_to_hls_conv_layer.py @@ -115,10 +115,10 @@ def test_convert_to_hls_conv_layer(conv_config, exec_mode): if exec_mode == "rtlsim": node = new_model.get_nodes_by_op_type("DownSampler")[0] inst = getCustomOp(node) - sim_cycles = inst.get_nodeattr("sim_cycles") + cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = new_model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] - assert np.isclose(exp_cycles, sim_cycles, atol=11) + assert np.isclose(exp_cycles, cycles_rtlsim, atol=11) assert exp_cycles != 0 if pad == 1: diff --git a/tests/fpgadataflow/test_convert_to_hls_pool_batch.py b/tests/fpgadataflow/test_convert_to_hls_pool_batch.py index fbf1e72da266141bd8328cc88c2e8bebff8301fb..86409feffd120b1baeeee471415e93f29d9e655a 100644 --- a/tests/fpgadataflow/test_convert_to_hls_pool_batch.py +++ b/tests/fpgadataflow/test_convert_to_hls_pool_batch.py @@ -215,7 +215,7 @@ def test_convert_to_hls_pool_batch( if exec_mode == "rtlsim": node = new_model.get_nodes_by_op_type("Pool_Batch")[0] inst = getCustomOp(node) - sim_cycles = inst.get_nodeattr("sim_cycles") + cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = new_model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] - assert np.isclose(exp_cycles, sim_cycles, atol=10) + assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) diff --git a/tests/fpgadataflow/test_fpgadataflow_addstreams.py b/tests/fpgadataflow/test_fpgadataflow_addstreams.py index 7a3df667b7feeafa017e3b03c11d4e55be07b195..81456796a75c6bf6a01c0a1f83c38b0b39bf4c81 100644 --- a/tests/fpgadataflow/test_fpgadataflow_addstreams.py +++ b/tests/fpgadataflow/test_fpgadataflow_addstreams.py @@ -132,8 +132,8 @@ def test_fpgadataflow_addstreams(idt, ch, fold, exec_mode): if exec_mode == "rtlsim": node = model.get_nodes_by_op_type("AddStreams_Batch")[0] inst = getCustomOp(node) - sim_cycles = inst.get_nodeattr("sim_cycles") + cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] - assert np.isclose(exp_cycles, sim_cycles, atol=10) + assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) assert exp_cycles != 0 diff --git a/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py b/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py index 3cd937287270481911622c118db27d5a2153a823..23ce8314e9c45196d7311ac58cb6bb5ef5267220 100644 --- a/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py +++ b/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py @@ -159,8 +159,8 @@ def test_fpgadataflow_channelwise_ops(idt, act, pdt, nf, ich, func, vecs, exec_m node = model.get_nodes_by_op_type("ChannelwiseOp_Batch")[0] inst = getCustomOp(node) - sim_cycles = inst.get_nodeattr("sim_cycles") + cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] - assert np.isclose(exp_cycles, sim_cycles, atol=10) + assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) assert exp_cycles != 0 diff --git a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py index afebcca73f1f2cfdf82061004a7473145b2ff928..020a2a545dadaf32c469789c90d0ea530688812c 100644 --- a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py +++ b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py @@ -190,8 +190,8 @@ def test_fpgadataflow_slidingwindow( if exec_mode == "rtlsim": node = model.get_nodes_by_op_type("ConvolutionInputGenerator")[0] inst = getCustomOp(node) - sim_cycles = inst.get_nodeattr("sim_cycles") + cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] - assert np.isclose(exp_cycles, sim_cycles, atol=10) + assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) assert exp_cycles != 0 diff --git a/tests/fpgadataflow/test_fpgadataflow_duplicatestreams.py b/tests/fpgadataflow/test_fpgadataflow_duplicatestreams.py index 4255a4dcafadadf3e3de53bf5e7ee9798e74a26d..5066b9709cac922f6bd3670ec7199f3e0f8fd9a2 100644 --- a/tests/fpgadataflow/test_fpgadataflow_duplicatestreams.py +++ b/tests/fpgadataflow/test_fpgadataflow_duplicatestreams.py @@ -137,8 +137,8 @@ def test_fpgadataflow_duplicatestreams(idt, ch, fold, imdim, exec_mode): if exec_mode == "rtlsim": node = model.get_nodes_by_op_type("DuplicateStreams_Batch")[0] inst = getCustomOp(node) - sim_cycles = inst.get_nodeattr("sim_cycles") + cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] - assert np.isclose(exp_cycles, sim_cycles, atol=10) + assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) assert exp_cycles != 0 diff --git a/tests/fpgadataflow/test_fpgadataflow_fclayer.py b/tests/fpgadataflow/test_fpgadataflow_fclayer.py index 249f84e9014c4a2f656074062bc53d3f3efd485f..37a1cc81ebd0824cdd8ac2c073298ad39424f57f 100644 --- a/tests/fpgadataflow/test_fpgadataflow_fclayer.py +++ b/tests/fpgadataflow/test_fpgadataflow_fclayer.py @@ -314,10 +314,10 @@ def test_fpgadataflow_fclayer_rtlsim(mem_mode, idt, wdt, act, nf, sf, mw, mh): node = model.get_nodes_by_op_type("StreamingFCLayer_Batch")[0] inst = getCustomOp(node) - sim_cycles = inst.get_nodeattr("sim_cycles") + cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] - assert np.isclose(exp_cycles, sim_cycles, atol=15) + assert np.isclose(exp_cycles, cycles_rtlsim, atol=15) assert exp_cycles != 0 @@ -415,8 +415,8 @@ def test_fpgadataflow_fclayer_large_depth_decoupled_mode_rtlsim( node = model.get_nodes_by_op_type("StreamingFCLayer_Batch")[0] inst = getCustomOp(node) - sim_cycles = inst.get_nodeattr("sim_cycles") + cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] - assert np.isclose(exp_cycles, sim_cycles, atol=15) + assert np.isclose(exp_cycles, cycles_rtlsim, atol=15) assert exp_cycles != 0 diff --git a/tests/fpgadataflow/test_fpgadataflow_fmpadding.py b/tests/fpgadataflow/test_fpgadataflow_fmpadding.py index d1142ceacaec00f6b532cfa54ad5397bf5562bf4..ef4f17998dbb09d31cdc9b3c89afafd10653fd28 100644 --- a/tests/fpgadataflow/test_fpgadataflow_fmpadding.py +++ b/tests/fpgadataflow/test_fpgadataflow_fmpadding.py @@ -129,8 +129,8 @@ def test_fpgadataflow_fmpadding(idim, pad, num_ch, simd, pad_style, idt, mode): if mode == "rtlsim": node = model.get_nodes_by_op_type("FMPadding_Batch")[0] inst = getCustomOp(node) - sim_cycles = inst.get_nodeattr("sim_cycles") + cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] - assert np.isclose(exp_cycles, sim_cycles, atol=10) + assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) assert exp_cycles != 0 diff --git a/tests/fpgadataflow/test_fpgadataflow_globalaccpool.py b/tests/fpgadataflow/test_fpgadataflow_globalaccpool.py index 06a1311ab99fefd88b15ee1896b978c83f495e2b..27f1a32a481f006818fbdd7e879bd9dd92242c80 100644 --- a/tests/fpgadataflow/test_fpgadataflow_globalaccpool.py +++ b/tests/fpgadataflow/test_fpgadataflow_globalaccpool.py @@ -127,13 +127,13 @@ def test_fpgadataflow_globalaccpool(idt, ch, fold, imdim, exec_mode): if exec_mode == "rtlsim": node = model.get_nodes_by_op_type("GlobalAccPool_Batch")[0] inst = getCustomOp(node) - sim_cycles = inst.get_nodeattr("sim_cycles") + cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] # commented out, needs performance debug: # test_fpgadataflow_globalaccpool[rtlsim-7-1-64-DataType.UINT4] # assert False where False = # <function isclose at 0x7eff26d5ca60>(50, 103, atol=(0.1 * 103)) - # assert np.isclose(exp_cycles, sim_cycles, atol=0.1 * sim_cycles) + # assert np.isclose(exp_cycles, cycles_rtlsim, atol=0.1 * cycles_rtlsim) assert exp_cycles != 0 - assert sim_cycles != 0 + assert cycles_rtlsim != 0 diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding.py b/tests/fpgadataflow/test_fpgadataflow_thresholding.py index 218c9e61ee5d5ef561bc7c720c2a408c858967af..1715bcad0dd29799cdc99497179ce8635058f3be 100644 --- a/tests/fpgadataflow/test_fpgadataflow_thresholding.py +++ b/tests/fpgadataflow/test_fpgadataflow_thresholding.py @@ -157,8 +157,8 @@ def test_fpgadataflow_thresholding(idt, act, nf, ich, exec_mode): node = model.get_nodes_by_op_type("Thresholding_Batch")[0] inst = getCustomOp(node) - sim_cycles = inst.get_nodeattr("sim_cycles") + cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] - assert np.isclose(exp_cycles, sim_cycles, atol=10) + assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) assert exp_cycles != 0 diff --git a/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py b/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py index 0b021a4c48047a321b0a7be88d034d6043207984..d61edc86dd6b5669c334e6b7f78ea9a8550cae93 100644 --- a/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py +++ b/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py @@ -161,8 +161,8 @@ def test_fpgadataflow_streamingmaxpool(idt, k, ifm_dim, ifm_ch, exec_mode): if exec_mode == "rtlsim": node = model.get_nodes_by_op_type("StreamingMaxPool_Batch")[0] inst = getCustomOp(node) - sim_cycles = inst.get_nodeattr("sim_cycles") + cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] - assert np.isclose(exp_cycles, sim_cycles, atol=15) + assert np.isclose(exp_cycles, cycles_rtlsim, atol=15) assert exp_cycles != 0