diff --git a/tests/end2end/test_end2end_bnn_pynq.py b/tests/end2end/test_end2end_bnn_pynq.py
index ac73831ab492eb983055bfbae64d3b385b48d09f..7f82874716d8e646ee7aeb0850466a5d356ba1ac 100644
--- a/tests/end2end/test_end2end_bnn_pynq.py
+++ b/tests/end2end/test_end2end_bnn_pynq.py
@@ -87,7 +87,7 @@ from finn.transformation.fpgadataflow.annotate_cycles import AnnotateCycles
 from finn.analysis.fpgadataflow.dataflow_performance import dataflow_performance
 from finn.core.modelwrapper import ModelWrapper
 from scipy.stats import linregress
-from finn.core.throughput_test import throughput_test_remote
+from finn.core.throughput_test import throughput_test_remote, throughput_test_rtlsim
 
 build_dir = "/tmp/" + os.environ["FINN_INST_NAME"]
 target_clk_ns = 10
@@ -342,6 +342,21 @@ class TestEnd2End:
         warnings.warn("Estimated & rtlsim performance: " + str(perf))
         assert np.isclose(y, output_tensor_npy).all()
 
+    @pytest.mark.slow
+    @pytest.mark.vivado
+    def test_throughput_rtlsim(self, topology, wbits, abits):
+        prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "ipstitch_rtlsim")
+        model = load_test_checkpoint_or_skip(prev_chkpt_name)
+        n_nodes = len(model.graph.node)
+        perf_est = model.analysis(dataflow_performance)
+        latency = int(model.get_metadata_prop("cycles_rtlsim"))
+        cycles_per_sample_est = perf_est["max_cycles"]
+        batchsize = 2 * n_nodes
+        ret = throughput_test_rtlsim(model, batchsize=batchsize)
+        res_cycles = ret["cycles"]
+        est_cycles = latency + cycles_per_sample_est * batchsize
+        assert (abs(res_cycles - est_cycles) / res_cycles) < 0.15
+
     @pytest.mark.slow
     @pytest.mark.vivado
     @pytest.mark.vitis