diff --git a/notebooks/end2end_example/tfc_end2end_example.ipynb b/notebooks/end2end_example/tfc_end2end_example.ipynb
index fd272d7bf1138981f9651e4c2551fa040af17c19..c388feca2340792c3535dba3fb3cf5e7220adf3c 100644
--- a/notebooks/end2end_example/tfc_end2end_example.ipynb
+++ b/notebooks/end2end_example/tfc_end2end_example.ipynb
@@ -730,7 +730,7 @@
        " 'ip_path': ('s', False, ''),\n",
        " 'ip_vlnv': ('s', False, ''),\n",
        " 'exec_mode': ('s', False, ''),\n",
-       " 'sim_cycles': ('i', False, 0),\n",
+       " 'cycles_rtlsim': ('i', False, 0),\n",
        " 'rtlsim_trace': ('s', False, ''),\n",
        " 'res_estimate': ('s', False, ''),\n",
        " 'res_hls': ('s', False, ''),\n",
diff --git a/src/finn/core/rtlsim_exec.py b/src/finn/core/rtlsim_exec.py
index bb5b3075582b8e01e8eed95f709934302fcadb42..d83bcd3a75dd0d2fc02315c72784e57348901a04 100644
--- a/src/finn/core/rtlsim_exec.py
+++ b/src/finn/core/rtlsim_exec.py
@@ -102,7 +102,7 @@ def rtlsim_exec(model, execution_context):
         sim = PyVerilator(rtlsim_so, auto_eval=False)
     ret = _run_rtlsim(sim, packed_input, num_out_values, trace_file)
     packed_output = ret[0]
-    model.set_metadata_prop("sim_cycles", str(ret[1]))
+    model.set_metadata_prop("cycles_rtlsim", str(ret[1]))
     # unpack output and put into context
     o_folded_tensor = rtlsim_output_to_npy(
         packed_output, None, o_dt, o_folded_shape, packedBits, targetBits
@@ -171,7 +171,7 @@ def _run_rtlsim(sim, inp, num_out_values, trace_file=None, reset=True):
         no_change_count = no_change_count + 1
 
         if len(outputs) == num_out_values:
-            sim_cycles = observation_count
+            cycles_rtlsim = observation_count
             output_observed = True
 
         if no_change_count == liveness_threshold:
@@ -191,4 +191,4 @@ def _run_rtlsim(sim, inp, num_out_values, trace_file=None, reset=True):
         sim.flush_vcd_trace()
         sim.stop_vcd_trace()
 
-    return (outputs, sim_cycles)
+    return (outputs, cycles_rtlsim)
diff --git a/src/finn/core/throughput_test.py b/src/finn/core/throughput_test.py
index 4444e7584f843cd0edb016b520d01d71e659b904..fbfe775e581e063b08e34b3096fd34f412b47d11 100644
--- a/src/finn/core/throughput_test.py
+++ b/src/finn/core/throughput_test.py
@@ -125,7 +125,7 @@ def throughput_test_rtlsim(model, batchsize=100):
     os.environ["LIVENESS_THRESHOLD"] = "-1"
     rtlsim_exec(model, ctx)
     # extract metrics
-    cycles = int(model.get_metadata_prop("sim_cycles"))
+    cycles = int(model.get_metadata_prop("cycles_rtlsim"))
     clk_ns = float(model.get_metadata_prop("clk_ns"))
     fclk_mhz = 1 / (clk_ns * 0.001)
     runtime_s = (cycles * clk_ns) * (10 ** -9)
diff --git a/src/finn/custom_op/fpgadataflow/__init__.py b/src/finn/custom_op/fpgadataflow/__init__.py
index 97056ac77c5bff8cc287041c9b9bef01db6a66cb..65c898a8c453420ed96ca22715ef2595c5840288 100644
--- a/src/finn/custom_op/fpgadataflow/__init__.py
+++ b/src/finn/custom_op/fpgadataflow/__init__.py
@@ -82,7 +82,8 @@ class HLSCustomOp(CustomOp):
             "ip_path": ("s", False, ""),
             "ip_vlnv": ("s", False, ""),
             "exec_mode": ("s", False, ""),
-            "sim_cycles": ("i", False, 0),
+            "cycles_rtlsim": ("i", False, 0),
+            "cycles_estimate": ("i", False, 0),
             "rtlsim_trace": ("s", False, ""),
             "res_estimate": ("s", False, ""),
             "res_hls": ("s", False, ""),
@@ -442,7 +443,7 @@ compilation transformations?
             no_change_count = no_change_count + 1
 
             if len(outputs) == num_out_values:
-                self.set_nodeattr("sim_cycles", observation_count)
+                self.set_nodeattr("cycles_rtlsim", observation_count)
                 output_observed = True
 
             if no_change_count == liveness_threshold:
@@ -471,7 +472,7 @@ compilation transformations?
             trace_file = self.onnx_node.name + ".vcd"
         num_out_values = self.get_number_output_values()
         total_cycle_count = rtlsim_multi_io(sim, io_dict, num_out_values, trace_file)
-        self.set_nodeattr("sim_cycles", total_cycle_count)
+        self.set_nodeattr("cycles_rtlsim", total_cycle_count)
 
     def execute_node(self, context, graph):
         """Executes single node using cppsim or rtlsim."""
diff --git a/src/finn/transformation/fpgadataflow/annotate_cycles.py b/src/finn/transformation/fpgadataflow/annotate_cycles.py
new file mode 100644
index 0000000000000000000000000000000000000000..521c84952daf25982e574421dfba3ff0f7df91ae
--- /dev/null
+++ b/src/finn/transformation/fpgadataflow/annotate_cycles.py
@@ -0,0 +1,59 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import finn.custom_op.registry as registry
+from finn.transformation import Transformation
+from finn.transformation.move_reshape import _is_fpgadataflow_node
+from finn.core.modelwrapper import ModelWrapper
+from finn.custom_op.registry import getCustomOp
+
+
+class AnnotateCycles(Transformation):
+    """Annotate the estimate of clock cycles per sample taken by each fpgadataflow
+    node as an attribute on the node.
+    """
+
+    def __init__(self):
+        super().__init__()
+
+    def apply(self, model):
+        graph = model.graph
+        # annotate node cycles
+        for node in graph.node:
+            if _is_fpgadataflow_node(node):
+                op_inst = registry.getCustomOp(node)
+                cycles = op_inst.get_exp_cycles()
+                op_inst.set_nodeattr("cycles_estimate", cycles)
+            elif node.op_type == "StreamingDataflowPartition":
+                # recurse into model to manually annotate per-layer cycles
+                sdp_model_filename = getCustomOp(node).get_nodeattr("model")
+                sdp_model = ModelWrapper(sdp_model_filename)
+                sdp_model = sdp_model.transform(AnnotateCycles())
+                # save transformed model
+                sdp_model.save(sdp_model_filename)
+        return (model, False)
diff --git a/tests/end2end/test_zynqbuild_end2end_cnv_w1a1.py b/tests/end2end/test_zynqbuild_end2end_cnv_w1a1.py
index cefe4d038f3e346f39f3c2aa995708123a2e26da..25cafcfd4c552fb368cbaca2d1d2714cf2d14011 100644
--- a/tests/end2end/test_zynqbuild_end2end_cnv_w1a1.py
+++ b/tests/end2end/test_zynqbuild_end2end_cnv_w1a1.py
@@ -62,6 +62,7 @@ from finn.transformation.move_reshape import RemoveCNVtoFCFlatten
 from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
 from finn.transformation.streamline.reorder import MakeMaxPoolNHWC
 from finn.transformation.infer_data_layouts import InferDataLayouts
+from finn.transformation.fpgadataflow.annotate_cycles import AnnotateCycles
 
 
 build_dir = "/tmp/" + os.environ["FINN_INST_NAME"]
@@ -115,6 +116,7 @@ def test_end2end_zynqbuild_cnv_w1a1_convert_to_hls_layers():
     model = model.transform(to_hls.InferQuantizedStreamingFCLayer(mem_mode))
     model = model.transform(to_hls.InferConvInpGen())
     model = model.transform(to_hls.InferStreamingMaxPool())
+    model = model.transform(GiveUniqueNodeNames())
     model = model.transform(RemoveCNVtoFCFlatten())
     model = model.transform(InferDataLayouts())
     model.save(build_dir + "/end2end_zynqbuild_cnv_w1a1_hls_layers.onnx")
@@ -164,6 +166,7 @@ def test_end2end_zynqbuild_cnv_w1a1_fold():
         swg_inst.set_nodeattr("SIMD", simd)
         swg_inst.set_nodeattr("inFIFODepth", swg_idepth[i])
     model = model.transform(AnnotateResources("estimate"))
+    model = model.transform(AnnotateCycles())
     model.save(build_dir + "/end2end_zynqbuild_cnv_w1a1_folded.onnx")
 
 
diff --git a/tests/fpgadataflow/test_convert_to_hls_conv_layer.py b/tests/fpgadataflow/test_convert_to_hls_conv_layer.py
index 6d1ff31ab554ef1d3fe8ef1fac66e6bc3406efbb..d69e4c3231a3381a9eecab2a551455714dd26720 100644
--- a/tests/fpgadataflow/test_convert_to_hls_conv_layer.py
+++ b/tests/fpgadataflow/test_convert_to_hls_conv_layer.py
@@ -115,10 +115,10 @@ def test_convert_to_hls_conv_layer(conv_config, exec_mode):
         if exec_mode == "rtlsim":
             node = new_model.get_nodes_by_op_type("DownSampler")[0]
             inst = getCustomOp(node)
-            sim_cycles = inst.get_nodeattr("sim_cycles")
+            cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
             exp_cycles_dict = new_model.analysis(exp_cycles_per_layer)
             exp_cycles = exp_cycles_dict[node.name]
-            assert np.isclose(exp_cycles, sim_cycles, atol=11)
+            assert np.isclose(exp_cycles, cycles_rtlsim, atol=11)
             assert exp_cycles != 0
 
     if pad == 1:
diff --git a/tests/fpgadataflow/test_convert_to_hls_pool_batch.py b/tests/fpgadataflow/test_convert_to_hls_pool_batch.py
index fbf1e72da266141bd8328cc88c2e8bebff8301fb..86409feffd120b1baeeee471415e93f29d9e655a 100644
--- a/tests/fpgadataflow/test_convert_to_hls_pool_batch.py
+++ b/tests/fpgadataflow/test_convert_to_hls_pool_batch.py
@@ -215,7 +215,7 @@ def test_convert_to_hls_pool_batch(
     if exec_mode == "rtlsim":
         node = new_model.get_nodes_by_op_type("Pool_Batch")[0]
         inst = getCustomOp(node)
-        sim_cycles = inst.get_nodeattr("sim_cycles")
+        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
         exp_cycles_dict = new_model.analysis(exp_cycles_per_layer)
         exp_cycles = exp_cycles_dict[node.name]
-        assert np.isclose(exp_cycles, sim_cycles, atol=10)
+        assert np.isclose(exp_cycles, cycles_rtlsim, atol=10)
diff --git a/tests/fpgadataflow/test_fpgadataflow_addstreams.py b/tests/fpgadataflow/test_fpgadataflow_addstreams.py
index 7a3df667b7feeafa017e3b03c11d4e55be07b195..81456796a75c6bf6a01c0a1f83c38b0b39bf4c81 100644
--- a/tests/fpgadataflow/test_fpgadataflow_addstreams.py
+++ b/tests/fpgadataflow/test_fpgadataflow_addstreams.py
@@ -132,8 +132,8 @@ def test_fpgadataflow_addstreams(idt, ch, fold, exec_mode):
     if exec_mode == "rtlsim":
         node = model.get_nodes_by_op_type("AddStreams_Batch")[0]
         inst = getCustomOp(node)
-        sim_cycles = inst.get_nodeattr("sim_cycles")
+        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
         exp_cycles_dict = model.analysis(exp_cycles_per_layer)
         exp_cycles = exp_cycles_dict[node.name]
-        assert np.isclose(exp_cycles, sim_cycles, atol=10)
+        assert np.isclose(exp_cycles, cycles_rtlsim, atol=10)
         assert exp_cycles != 0
diff --git a/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py b/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py
index 3cd937287270481911622c118db27d5a2153a823..23ce8314e9c45196d7311ac58cb6bb5ef5267220 100644
--- a/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py
+++ b/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py
@@ -159,8 +159,8 @@ def test_fpgadataflow_channelwise_ops(idt, act, pdt, nf, ich, func, vecs, exec_m
 
         node = model.get_nodes_by_op_type("ChannelwiseOp_Batch")[0]
         inst = getCustomOp(node)
-        sim_cycles = inst.get_nodeattr("sim_cycles")
+        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
         exp_cycles_dict = model.analysis(exp_cycles_per_layer)
         exp_cycles = exp_cycles_dict[node.name]
-        assert np.isclose(exp_cycles, sim_cycles, atol=10)
+        assert np.isclose(exp_cycles, cycles_rtlsim, atol=10)
         assert exp_cycles != 0
diff --git a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py
index afebcca73f1f2cfdf82061004a7473145b2ff928..020a2a545dadaf32c469789c90d0ea530688812c 100644
--- a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py
+++ b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py
@@ -190,8 +190,8 @@ def test_fpgadataflow_slidingwindow(
     if exec_mode == "rtlsim":
         node = model.get_nodes_by_op_type("ConvolutionInputGenerator")[0]
         inst = getCustomOp(node)
-        sim_cycles = inst.get_nodeattr("sim_cycles")
+        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
         exp_cycles_dict = model.analysis(exp_cycles_per_layer)
         exp_cycles = exp_cycles_dict[node.name]
-        assert np.isclose(exp_cycles, sim_cycles, atol=10)
+        assert np.isclose(exp_cycles, cycles_rtlsim, atol=10)
         assert exp_cycles != 0
diff --git a/tests/fpgadataflow/test_fpgadataflow_duplicatestreams.py b/tests/fpgadataflow/test_fpgadataflow_duplicatestreams.py
index 4255a4dcafadadf3e3de53bf5e7ee9798e74a26d..5066b9709cac922f6bd3670ec7199f3e0f8fd9a2 100644
--- a/tests/fpgadataflow/test_fpgadataflow_duplicatestreams.py
+++ b/tests/fpgadataflow/test_fpgadataflow_duplicatestreams.py
@@ -137,8 +137,8 @@ def test_fpgadataflow_duplicatestreams(idt, ch, fold, imdim, exec_mode):
     if exec_mode == "rtlsim":
         node = model.get_nodes_by_op_type("DuplicateStreams_Batch")[0]
         inst = getCustomOp(node)
-        sim_cycles = inst.get_nodeattr("sim_cycles")
+        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
         exp_cycles_dict = model.analysis(exp_cycles_per_layer)
         exp_cycles = exp_cycles_dict[node.name]
-        assert np.isclose(exp_cycles, sim_cycles, atol=10)
+        assert np.isclose(exp_cycles, cycles_rtlsim, atol=10)
         assert exp_cycles != 0
diff --git a/tests/fpgadataflow/test_fpgadataflow_fclayer.py b/tests/fpgadataflow/test_fpgadataflow_fclayer.py
index 249f84e9014c4a2f656074062bc53d3f3efd485f..37a1cc81ebd0824cdd8ac2c073298ad39424f57f 100644
--- a/tests/fpgadataflow/test_fpgadataflow_fclayer.py
+++ b/tests/fpgadataflow/test_fpgadataflow_fclayer.py
@@ -314,10 +314,10 @@ def test_fpgadataflow_fclayer_rtlsim(mem_mode, idt, wdt, act, nf, sf, mw, mh):
 
     node = model.get_nodes_by_op_type("StreamingFCLayer_Batch")[0]
     inst = getCustomOp(node)
-    sim_cycles = inst.get_nodeattr("sim_cycles")
+    cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
     exp_cycles_dict = model.analysis(exp_cycles_per_layer)
     exp_cycles = exp_cycles_dict[node.name]
-    assert np.isclose(exp_cycles, sim_cycles, atol=15)
+    assert np.isclose(exp_cycles, cycles_rtlsim, atol=15)
     assert exp_cycles != 0
 
 
@@ -415,8 +415,8 @@ def test_fpgadataflow_fclayer_large_depth_decoupled_mode_rtlsim(
 
     node = model.get_nodes_by_op_type("StreamingFCLayer_Batch")[0]
     inst = getCustomOp(node)
-    sim_cycles = inst.get_nodeattr("sim_cycles")
+    cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
     exp_cycles_dict = model.analysis(exp_cycles_per_layer)
     exp_cycles = exp_cycles_dict[node.name]
-    assert np.isclose(exp_cycles, sim_cycles, atol=15)
+    assert np.isclose(exp_cycles, cycles_rtlsim, atol=15)
     assert exp_cycles != 0
diff --git a/tests/fpgadataflow/test_fpgadataflow_fmpadding.py b/tests/fpgadataflow/test_fpgadataflow_fmpadding.py
index d1142ceacaec00f6b532cfa54ad5397bf5562bf4..ef4f17998dbb09d31cdc9b3c89afafd10653fd28 100644
--- a/tests/fpgadataflow/test_fpgadataflow_fmpadding.py
+++ b/tests/fpgadataflow/test_fpgadataflow_fmpadding.py
@@ -129,8 +129,8 @@ def test_fpgadataflow_fmpadding(idim, pad, num_ch, simd, pad_style, idt, mode):
     if mode == "rtlsim":
         node = model.get_nodes_by_op_type("FMPadding_Batch")[0]
         inst = getCustomOp(node)
-        sim_cycles = inst.get_nodeattr("sim_cycles")
+        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
         exp_cycles_dict = model.analysis(exp_cycles_per_layer)
         exp_cycles = exp_cycles_dict[node.name]
-        assert np.isclose(exp_cycles, sim_cycles, atol=10)
+        assert np.isclose(exp_cycles, cycles_rtlsim, atol=10)
         assert exp_cycles != 0
diff --git a/tests/fpgadataflow/test_fpgadataflow_globalaccpool.py b/tests/fpgadataflow/test_fpgadataflow_globalaccpool.py
index 06a1311ab99fefd88b15ee1896b978c83f495e2b..27f1a32a481f006818fbdd7e879bd9dd92242c80 100644
--- a/tests/fpgadataflow/test_fpgadataflow_globalaccpool.py
+++ b/tests/fpgadataflow/test_fpgadataflow_globalaccpool.py
@@ -127,13 +127,13 @@ def test_fpgadataflow_globalaccpool(idt, ch, fold, imdim, exec_mode):
     if exec_mode == "rtlsim":
         node = model.get_nodes_by_op_type("GlobalAccPool_Batch")[0]
         inst = getCustomOp(node)
-        sim_cycles = inst.get_nodeattr("sim_cycles")
+        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
         exp_cycles_dict = model.analysis(exp_cycles_per_layer)
         exp_cycles = exp_cycles_dict[node.name]
         # commented out, needs performance debug:
         # test_fpgadataflow_globalaccpool[rtlsim-7-1-64-DataType.UINT4]
         # assert False where False =
         # <function isclose at 0x7eff26d5ca60>(50, 103, atol=(0.1 * 103))
-        # assert np.isclose(exp_cycles, sim_cycles, atol=0.1 * sim_cycles)
+        # assert np.isclose(exp_cycles, cycles_rtlsim, atol=0.1 * cycles_rtlsim)
         assert exp_cycles != 0
-        assert sim_cycles != 0
+        assert cycles_rtlsim != 0
diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding.py b/tests/fpgadataflow/test_fpgadataflow_thresholding.py
index 218c9e61ee5d5ef561bc7c720c2a408c858967af..1715bcad0dd29799cdc99497179ce8635058f3be 100644
--- a/tests/fpgadataflow/test_fpgadataflow_thresholding.py
+++ b/tests/fpgadataflow/test_fpgadataflow_thresholding.py
@@ -157,8 +157,8 @@ def test_fpgadataflow_thresholding(idt, act, nf, ich, exec_mode):
 
         node = model.get_nodes_by_op_type("Thresholding_Batch")[0]
         inst = getCustomOp(node)
-        sim_cycles = inst.get_nodeattr("sim_cycles")
+        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
         exp_cycles_dict = model.analysis(exp_cycles_per_layer)
         exp_cycles = exp_cycles_dict[node.name]
-        assert np.isclose(exp_cycles, sim_cycles, atol=10)
+        assert np.isclose(exp_cycles, cycles_rtlsim, atol=10)
         assert exp_cycles != 0
diff --git a/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py b/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py
index 0b021a4c48047a321b0a7be88d034d6043207984..d61edc86dd6b5669c334e6b7f78ea9a8550cae93 100644
--- a/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py
+++ b/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py
@@ -161,8 +161,8 @@ def test_fpgadataflow_streamingmaxpool(idt, k, ifm_dim, ifm_ch, exec_mode):
     if exec_mode == "rtlsim":
         node = model.get_nodes_by_op_type("StreamingMaxPool_Batch")[0]
         inst = getCustomOp(node)
-        sim_cycles = inst.get_nodeattr("sim_cycles")
+        cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
         exp_cycles_dict = model.analysis(exp_cycles_per_layer)
         exp_cycles = exp_cycles_dict[node.name]
-        assert np.isclose(exp_cycles, sim_cycles, atol=15)
+        assert np.isclose(exp_cycles, cycles_rtlsim, atol=15)
         assert exp_cycles != 0