diff --git a/finn-rtllib/memstream/hdl/Q_srl.v b/finn-rtllib/memstream/hdl/Q_srl.v
index b4e89628a44bb1f55c3445ee8e6866beada23585..3c884770e026ec90b16dfd562e1861d132e714bd 100644
--- a/finn-rtllib/memstream/hdl/Q_srl.v
+++ b/finn-rtllib/memstream/hdl/Q_srl.v
@@ -69,7 +69,7 @@
 `define Q_srl
 
 
-module Q_srl (clock, reset, i_d, i_v, i_r, o_d, o_v, o_r, count);
+module Q_srl (clock, reset, i_d, i_v, i_r, o_d, o_v, o_r, count, maxcount);
 
    parameter depth = 16;   // - greatest #items in queue  (2 <= depth <= 256)
    parameter width = 16;   // - width of data (i_d, o_d)
@@ -90,7 +90,9 @@ module Q_srl (clock, reset, i_d, i_v, i_r, o_d, o_v, o_r, count);
    wire               o_b;	// - output stream back-pressure
 
    output [addrwidth:0] count;  // - output number of elems in queue
+   output [addrwidth:0] maxcount;  // - maximum observed count since reset
 
+   reg [addrwidth:0] maxcount_reg;  // - maximum count seen until now
    reg    [addrwidth-1:0] addr, addr_, a_;		// - SRL16 address
 							//     for data output
    reg 			  shift_en_;			// - SRL16 shift enable
@@ -124,6 +126,7 @@ module Q_srl (clock, reset, i_d, i_v, i_r, o_d, o_v, o_r, count);
    assign o_d = srlo;				// - output data from queue
    assign o_v = o_v_reg;			// - output valid if non-empty
    assign i_b = i_b_reg;			// - input bp if full
+   assign maxcount = maxcount_reg;
 
    assign i_r = !i_b;
    assign o_b = !o_r;
@@ -140,6 +143,7 @@ module Q_srl (clock, reset, i_d, i_v, i_r, o_d, o_v, o_r, count);
          addr_full <= 0;
 	 o_v_reg   <= 0;
 	 i_b_reg   <= 1;
+	 maxcount_reg <= '0;
       end
       else begin
 	 state     <= state_;
@@ -147,6 +151,7 @@ module Q_srl (clock, reset, i_d, i_v, i_r, o_d, o_v, o_r, count);
          addr_full <= addr_full_;
 	 o_v_reg   <= o_v_reg_;
 	 i_b_reg   <= i_b_reg_;
+	 maxcount_reg <= (count > maxcount_reg ? count : maxcount_reg);
       end
    end // always @ (posedge clock)
 
diff --git a/src/finn/custom_op/fpgadataflow/streamingfifo.py b/src/finn/custom_op/fpgadataflow/streamingfifo.py
index a7c3cd0be59db4ba8665f8fba5be72282339b8c8..a0346f50bf6b7e88a79ba5ef4700039eb39c32ef 100644
--- a/src/finn/custom_op/fpgadataflow/streamingfifo.py
+++ b/src/finn/custom_op/fpgadataflow/streamingfifo.py
@@ -68,6 +68,8 @@ class StreamingFIFO(HLSCustomOp):
                 "auto",
                 {"auto", "block", "distributed", "ultra"},
             ),
+            # whether depth monitoring is enabled (impl_style=rtl only)
+            "depth_monitor": ("i", False, 0),
         }
         my_attrs.update(super().get_nodeattr_types())
 
@@ -97,6 +99,14 @@ class StreamingFIFO(HLSCustomOp):
     def verify_node(self):
         pass
 
+    def get_verilog_top_module_intf_names(self):
+        ret = super().get_verilog_top_module_intf_names()
+        is_rtl = self.get_nodeattr("impl_style") == "rtl"
+        is_depth_monitor = self.get_nodeattr("depth_monitor") == 1
+        if is_rtl and is_depth_monitor:
+            ret["ap_none"] = ["maxcount"]
+        return ret
+
     def get_verilog_top_module_name(self):
         "Return the Verilog top module name for this node."
 
diff --git a/src/finn/custom_op/fpgadataflow/templates.py b/src/finn/custom_op/fpgadataflow/templates.py
index e73fa9bb2872d4a5023afb0c4e6953b4e6866b8d..c7bbc3f139b64f57943b2b099083a9611951e9c4 100644
--- a/src/finn/custom_op/fpgadataflow/templates.py
+++ b/src/finn/custom_op/fpgadataflow/templates.py
@@ -319,6 +319,7 @@ module $TOPNAME$(
 ap_clk,
 ap_rst_n,
 count,
+maxcount,
 in0_$HLS_SNAME$_TDATA,
 in0_$HLS_SNAME$_TVALID,
 in0_$HLS_SNAME$_TREADY,
@@ -330,6 +331,7 @@ out_$HLS_SNAME$_TREADY
 input   ap_clk;
 input   ap_rst_n;
 output $COUNT_RANGE$ count;
+output $COUNT_RANGE$ maxcount;
 input  $IN_RANGE$ in0_$HLS_SNAME$_TDATA;
 input   in0_$HLS_SNAME$_TVALID;
 output   in0_$HLS_SNAME$_TREADY;
@@ -346,6 +348,7 @@ $LAYER_NAME$
  .clock(ap_clk),
  .reset(!ap_rst_n),
  .count(count),
+ .maxcount(maxcount),
  .i_d(in0_$HLS_SNAME$_TDATA),
  .i_v(in0_$HLS_SNAME$_TVALID),
  .i_r(in0_$HLS_SNAME$_TREADY),