diff --git a/docker/finn_entrypoint.sh b/docker/finn_entrypoint.sh
index 4aaf5531065abbb2aa7edc8056d679d1dc7be902..035bba3b53d85a8457eff1e7c1a23e0efff60caa 100644
--- a/docker/finn_entrypoint.sh
+++ b/docker/finn_entrypoint.sh
@@ -17,7 +17,7 @@ BREVITAS_COMMIT=215cf44c76d562339fca368c8c3afee3110033e8
 BREVITAS_EXAMPLES_COMMIT=2059f96bd576bf71f32c757e7f92617a70190c90
 CNPY_COMMIT=4e8810b1a8637695171ed346ce68f6984e585ef4
 HLSLIB_COMMIT=6b88db826bb023937506913a23d964775a7606af
-PYVERILATOR_COMMIT=307fc5c82db748620836307a2002fdc9fe170226
+PYVERILATOR_COMMIT=fb1afefa5b207acf6fec28f8abb72a862f2ca1d2
 PYNQSHELL_COMMIT=0c82a61b0ec1a07fa275a14146233824ded7a13d
 
 
diff --git a/src/finn/custom_op/fpgadataflow/__init__.py b/src/finn/custom_op/fpgadataflow/__init__.py
index 4ef6f0212afa7b07f958942c072c27caeda3f48b..8430a56bc2688627f82da6ae92140f5cff82cb60 100644
--- a/src/finn/custom_op/fpgadataflow/__init__.py
+++ b/src/finn/custom_op/fpgadataflow/__init__.py
@@ -31,7 +31,12 @@ import numpy as np
 import os
 import subprocess
 from finn.custom_op import CustomOp
-from finn.util.basic import CppBuilder, make_build_dir, roundup_to_integer_multiple
+from finn.util.basic import (
+    CppBuilder,
+    make_build_dir,
+    roundup_to_integer_multiple,
+    get_rtlsim_trace_depth,
+)
 from finn.util.fpgadataflow import (
     IPGenBuilder,
     pyverilate_get_liveness_threshold_cycles,
@@ -128,6 +133,7 @@ class HLSCustomOp(CustomOp):
                     code_gen_dir, self.onnx_node.name
                 )
             ],
+            trace_depth=get_rtlsim_trace_depth(),
         )
         # save generated lib filename in attribute
         self.set_nodeattr("rtlsim_so", sim.lib._name)
diff --git a/src/finn/util/basic.py b/src/finn/util/basic.py
index 4eb0e6cb874f80620e3cb25017abcc29368b261b..bc413bf665e96be1d58a5de13b0744fd6a80f855 100644
--- a/src/finn/util/basic.py
+++ b/src/finn/util/basic.py
@@ -44,6 +44,19 @@ pynq_part_map["Pynq-Z2"] = "xc7z020clg400-1"
 pynq_part_map["ZCU104"] = "xczu7ev-ffvc1156-2-e"
 
 
+def get_rtlsim_trace_depth():
+    """Return the trace depth for rtlsim via PyVerilator. Controllable
+    via the RTLSIM_TRACE_DEPTH environment variable. If the env.var. is
+    undefined, the default value of 1 is returned. A trace depth of 1
+    will only show top-level signals and yield smaller .vcd files.
+    """
+
+    try:
+        return int(os.environ["RTLSIM_TRACE_DEPTH"])
+    except KeyError:
+        return 1
+
+
 def get_num_default_workers():
     """Return the number of workers for parallel transformations. Controllable
     via the NUM_DEFAULT_WORKERS environment variable. If the env.var. is
diff --git a/src/finn/util/fpgadataflow.py b/src/finn/util/fpgadataflow.py
index e84532d8d24909cc5add09fbc623a13c955ffb72..7a404cd53e0fcbc758a960fa2d31792fb6263a18 100644
--- a/src/finn/util/fpgadataflow.py
+++ b/src/finn/util/fpgadataflow.py
@@ -33,7 +33,7 @@ try:
     from pyverilator import PyVerilator
 except ModuleNotFoundError:
     PyVerilator = None
-from finn.util.basic import get_by_name, make_build_dir
+from finn.util.basic import get_by_name, make_build_dir, get_rtlsim_trace_depth
 
 
 class IPGenBuilder:
@@ -87,7 +87,10 @@ def pyverilate_stitched_ip(model):
     top_verilog = model.get_metadata_prop("wrapper_filename")
     build_dir = make_build_dir("pyverilator_ipstitched_")
     sim = PyVerilator.build(
-        top_verilog, verilog_path=all_verilog_dirs, build_dir=build_dir
+        top_verilog,
+        verilog_path=all_verilog_dirs,
+        build_dir=build_dir,
+        trace_depth=get_rtlsim_trace_depth(),
     )
     return sim
 
diff --git a/tests/end2end/test_end2end_tfc_w1a1_throughput_test.py b/tests/end2end/test_end2end_tfc_w1a1_throughput_test.py
index 6d72d9983d7a99d495b4e03e5ff0b5b633ee16ae..ded0bd107ab9f15a72018137c79eac640e09d3a2 100644
--- a/tests/end2end/test_end2end_tfc_w1a1_throughput_test.py
+++ b/tests/end2end/test_end2end_tfc_w1a1_throughput_test.py
@@ -206,15 +206,11 @@ def test_end2end_tfc_w1a1_verify_dataflow_part():
     # node-by-node rtlsim
     model = model.transform(SetExecMode("rtlsim"))
     model = model.transform(PrepareRTLSim())
-    fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch")
-    for fcl in fc_layers:
-        getCustomOp(fcl).set_nodeattr("rtlsim_trace", "default")
     model.save(build_dir + "/end2end_tfc_w1a1_ipstitch_nodebynode_rtlsim.onnx")
     ret_rtlsim_nodebynode = execute_onnx(model, inp_dict, True)
     res_rtlsim_nodebynode = ret_rtlsim_nodebynode[out_name]
     # whole-network (ip-stitched) rtlsim
     model.set_metadata_prop("exec_mode", "rtlsim")
-    model.set_metadata_prop("rtlsim_trace", "whole_trace.vcd")
     model.save(build_dir + "/end2end_tfc_w1a1_ipstitch_whole_rtlsim.onnx")
     ret_rtlsim_whole = execute_onnx(model, inp_dict, True)
     res_rtlsim_whole = ret_rtlsim_whole[out_name]
diff --git a/tests/end2end/test_end2end_tfc_w1a2.py b/tests/end2end/test_end2end_tfc_w1a2.py
index f4b5de21f501b0867af6cfbedd8a14beb14d150e..52771e6d149810d70f908ac2af07e1d81f8f46ec 100644
--- a/tests/end2end/test_end2end_tfc_w1a2.py
+++ b/tests/end2end/test_end2end_tfc_w1a2.py
@@ -175,15 +175,11 @@ def test_end2end_tfc_w1a2_verify_dataflow_part():
     # node-by-node rtlsim
     model = model.transform(SetExecMode("rtlsim"))
     model = model.transform(PrepareRTLSim())
-    fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch")
-    for fcl in fc_layers:
-        getCustomOp(fcl).set_nodeattr("rtlsim_trace", "default")
     model.save(build_dir + "/end2end_tfc_w1a2_ipstitch_nodebynode_rtlsim.onnx")
     ret_rtlsim_nodebynode = execute_onnx(model, inp_dict, True)
     res_rtlsim_nodebynode = ret_rtlsim_nodebynode[out_name]
     # whole-network (ip-stitched) rtlsim
     model.set_metadata_prop("exec_mode", "rtlsim")
-    model.set_metadata_prop("rtlsim_trace", "whole_trace.vcd")
     model.save(build_dir + "/end2end_tfc_w1a2_ipstitch_whole_rtlsim.onnx")
     ret_rtlsim_whole = execute_onnx(model, inp_dict, True)
     res_rtlsim_whole = ret_rtlsim_whole[out_name]
diff --git a/tests/end2end/test_end2end_tfc_w2a2.py b/tests/end2end/test_end2end_tfc_w2a2.py
index 10d4bcdf77884fd48e1b5092633fd7ce0ac22566..67111da400d475311cc29b45bb24573128981958 100644
--- a/tests/end2end/test_end2end_tfc_w2a2.py
+++ b/tests/end2end/test_end2end_tfc_w2a2.py
@@ -175,15 +175,11 @@ def test_end2end_tfc_w2a2_verify_dataflow_part():
     # node-by-node rtlsim
     model = model.transform(SetExecMode("rtlsim"))
     model = model.transform(PrepareRTLSim())
-    fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch")
-    for fcl in fc_layers:
-        getCustomOp(fcl).set_nodeattr("rtlsim_trace", "default")
     model.save(build_dir + "/end2end_tfc_w2a2_ipstitch_nodebynode_rtlsim.onnx")
     ret_rtlsim_nodebynode = execute_onnx(model, inp_dict, True)
     res_rtlsim_nodebynode = ret_rtlsim_nodebynode[out_name]
     # whole-network (ip-stitched) rtlsim
     model.set_metadata_prop("exec_mode", "rtlsim")
-    model.set_metadata_prop("rtlsim_trace", "whole_trace.vcd")
     model.save(build_dir + "/end2end_tfc_w2a2_ipstitch_whole_rtlsim.onnx")
     ret_rtlsim_whole = execute_onnx(model, inp_dict, True)
     res_rtlsim_whole = ret_rtlsim_whole[out_name]