diff --git a/src/finn/builder/build_dataflow_config.py b/src/finn/builder/build_dataflow_config.py
index 13946c9d1e3780f5e45ee2ca863190d36849ea4f..51e751610108289c04fa0ae72fe593cbb0c16a86 100644
--- a/src/finn/builder/build_dataflow_config.py
+++ b/src/finn/builder/build_dataflow_config.py
@@ -257,7 +257,7 @@ class DataflowBuildConfig:
     #: setting the FIFO sizes.
     auto_fifo_strategy: Optional[
         AutoFIFOSizingMethod
-    ] = AutoFIFOSizingMethod.CHARACTERIZE
+    ] = AutoFIFOSizingMethod.LARGEFIFO_RTLSIM
 
     #: Memory resource type for large FIFOs
     #: Only relevant when `auto_fifo_depths = True`
diff --git a/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py b/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py
index 3ed76db2982e411b711be5bd78e39dd866332714..9d08a2432834f0786695d6300c8bcd68c18452d5 100644
--- a/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py
+++ b/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py
@@ -103,8 +103,8 @@ class ChannelwiseOp_Batch(HLSCustomOp):
             "paramDataType": ("s", True, ""),
             "outputDataType": ("s", True, ""),
             # input and output FIFO depths
-            "inFIFODepth": ("i", False, 0),
-            "outFIFODepth": ("i", False, 0),
+            "inFIFODepths": ("ints", False, [0]),
+            "outFIFODepths": ("ints", False, [0]),
             # number of input vectors, examples:
             # [1] is a single vector (like a FC layer with batch=1)
             # [4] is four vectors (like a FC layer with batch=4)
diff --git a/src/finn/custom_op/fpgadataflow/hlscustomop.py b/src/finn/custom_op/fpgadataflow/hlscustomop.py
index 79ae6957564ce07c18b76552089f64107fe51356..7d322dc372ba91d1613fd5cf9601ff2fc0d023ea 100644
--- a/src/finn/custom_op/fpgadataflow/hlscustomop.py
+++ b/src/finn/custom_op/fpgadataflow/hlscustomop.py
@@ -108,12 +108,9 @@ class HLSCustomOp(CustomOp):
             # ID of FPGA device to which this Op is allocated, in
             # a multi-FPGA setting
             "device_id": ("i", False, 0),
-            # input and output FIFO depths
-            "inFIFODepth": ("i", False, 2),
-            "outFIFODepth": ("i", False, 2),
             # input and output FIFO depths for multi-I/O nodes
-            "inFIFODepths": ("ints", False, []),
-            "outFIFODepths": ("ints", False, []),
+            "inFIFODepths": ("ints", False, [2]),
+            "outFIFODepths": ("ints", False, [2]),
             "output_hook": ("s", False, ""),
             # accumulated characteristic function over two periods
             "io_chrc_in": ("t", False, np.asarray([], dtype=np.int32)),
diff --git a/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py b/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py
index d5dbc86c4ec2469f8588b82e76d0295bafb1328e..e78a918e81385a5b6e837a384c6c1a238073a80d 100644
--- a/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py
+++ b/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py
@@ -1227,8 +1227,11 @@ class MatrixVectorActivation(HLSCustomOp):
         self.code_gen_dict["$PRAGMAS$"].append(
             "#pragma HLS INTERFACE axis port=out name=out_" + self.hls_sname()
         )
-        in_fifo_depth = self.get_nodeattr("inFIFODepth")
-        out_fifo_depth = self.get_nodeattr("outFIFODepth")
+        # TODO can we deprecate this entirely? this looks like legacy code
+        # that does not really serve a purpose - FIFO sizes are not typically
+        # allocated at this point; at best they are set to 2 as the default
+        in_fifo_depth = 2
+        out_fifo_depth = 2
         # insert depth pragmas only if specified
         if in_fifo_depth != 0:
             self.code_gen_dict["$PRAGMAS$"].append(
diff --git a/src/finn/custom_op/fpgadataflow/thresholding_batch.py b/src/finn/custom_op/fpgadataflow/thresholding_batch.py
index 828ddd97375a8466079a44f297d9c4a0e2a59ea1..110e456cbd3885e1bf434743ee422a425c2d7b99 100644
--- a/src/finn/custom_op/fpgadataflow/thresholding_batch.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_batch.py
@@ -76,8 +76,8 @@ class Thresholding_Batch(HLSCustomOp):
             "weightDataType": ("s", True, ""),
             "outputDataType": ("s", True, ""),
             # input and output FIFO depths
-            "inFIFODepth": ("i", False, 0),
-            "outFIFODepth": ("i", False, 0),
+            "inFIFODepths": ("ints", False, [0]),
+            "outFIFODepths": ("ints", False, [0]),
             # number of input vectors, examples:
             # [1] is a single vector (like a FC layer with batch=1)
             # [4] is four vectors (like a FC layer with batch=4)
diff --git a/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py b/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py
index abcb1c756d6f2caad3a68e32c0409e5930929c6a..6391f27bbb80aa0d8cc8c30e752d8fe4ff142fae 100644
--- a/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py
+++ b/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py
@@ -578,8 +578,11 @@ class VectorVectorActivation(HLSCustomOp):
         self.code_gen_dict["$PRAGMAS$"].append(
             "#pragma HLS INTERFACE axis port=out name=out_" + self.hls_sname()
         )
-        in_fifo_depth = self.get_nodeattr("inFIFODepth")
-        out_fifo_depth = self.get_nodeattr("outFIFODepth")
+        # TODO can we deprecate this entirely? this looks like legacy code
+        # that does not really serve a purpose - FIFO sizes are not typically
+        # allocated at this point; at best they are set to 2 as the default
+        in_fifo_depth = 2
+        out_fifo_depth = 2
         # insert depth pragmas only if specified
         if in_fifo_depth != 0:
             self.code_gen_dict["$PRAGMAS$"].append(
diff --git a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
index 429bc34ffc59b5d98bb559f36ac557de4dbba92f..753dbb0f87a877f69a24bb520d09a173925cafd4 100644
--- a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
+++ b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
@@ -1234,6 +1234,7 @@ class InferDuplicateStreamsLayer(Transformation):
                     inputDataType=dt.name,
                     numInputVectors=vecs,
                     NumOutputStreams=n_outputs,
+                    outFIFODepths=[2] * n_outputs,
                     name="DuplicateStreams_Batch_" + node.name,
                 )
 
diff --git a/src/finn/transformation/fpgadataflow/insert_fifo.py b/src/finn/transformation/fpgadataflow/insert_fifo.py
index b9222cf3eef1a8566e698b4f58a85d100f824243..e75d1880cb1b7c5c07d3c7a4e155216e4adc740d 100644
--- a/src/finn/transformation/fpgadataflow/insert_fifo.py
+++ b/src/finn/transformation/fpgadataflow/insert_fifo.py
@@ -138,14 +138,8 @@ class InsertFIFO(Transformation):
 
                         # check if outFIFOdepth attribute of first node
                         # and inFIFOdepth attribute of consumer node is equal
-                        if idx_out == 0:
-                            n0_depth = n0.get_nodeattr("outFIFODepth")
-                        else:
-                            n0_depth = n0.get_nodeattr("outFIFODepths")[idx_out]
-                        if idx_inp == 0:
-                            n1_depth = n1.get_nodeattr("inFIFODepth")
-                        else:
-                            n1_depth = n1.get_nodeattr("inFIFODepths")[idx_inp]
+                        n0_depth = n0.get_nodeattr("outFIFODepths")[idx_out]
+                        n1_depth = n1.get_nodeattr("inFIFODepths")[idx_inp]
 
                         if n0_depth == n1_depth:
                             fifo_depth = n0_depth
@@ -187,18 +181,12 @@ class InsertFIFO(Transformation):
                                 if inp == output_name:
                                     consumer.input[idx] = fifo_output_tensor.name
                             # ensure created FIFO depth is reflected on both sides
-                            if idx_out == 0:
-                                n0.set_nodeattr("outFIFODepth", fifo_depth)
-                            else:
-                                odepths = n0.get_nodeattr("outFIFODepths")
-                                odepths[idx_out] = fifo_depth
-                                n0.set_nodeattr("outFIFODepths", odepths)
-                            if idx_inp == 0:
-                                n1.set_nodeattr("inFIFODepth", fifo_depth)
-                            else:
-                                idepths = n1.get_nodeattr("inFIFODepths")
-                                idepths[idx_inp] = fifo_depth
-                                n1.set_nodeattr("inFIFODepths", idepths)
+                            odepths = n0.get_nodeattr("outFIFODepths")
+                            odepths[idx_out] = fifo_depth
+                            n0.set_nodeattr("outFIFODepths", odepths)
+                            idepths = n1.get_nodeattr("inFIFODepths")
+                            idepths[idx_inp] = fifo_depth
+                            n1.set_nodeattr("inFIFODepths", idepths)
 
                             graph_modified = True
 
@@ -221,7 +209,7 @@ class InsertFIFO(Transformation):
                     else:
                         fld_shape = n0.get_folded_input_shape(inp_ind)
                         dtype = n0.get_input_datatype(inp_ind)
-                    fifo_depth = n0.get_nodeattr("inFIFODepth")
+                    fifo_depth = n0.get_nodeattr("inFIFODepths")[inp_ind]
 
                     if fifo_depth <= 2:
                         warnings.warn("Overriding input FIFO depth to 32")
diff --git a/src/finn/transformation/fpgadataflow/set_fifo_depths.py b/src/finn/transformation/fpgadataflow/set_fifo_depths.py
index 90ea853b6072b145df64a8a73ee93c65989fe447..f715aaeffb6d4d00f2e14c5fb25ec931443d5d97 100644
--- a/src/finn/transformation/fpgadataflow/set_fifo_depths.py
+++ b/src/finn/transformation/fpgadataflow/set_fifo_depths.py
@@ -250,14 +250,21 @@ class InsertAndSetFIFODepths(Transformation):
             )
             assert node.op_type != "StreamingFIFO", "Found existing StreamingFIFO node"
             node = getCustomOp(node)
+            ifd = node.get_nodeattr("inFIFODepths")
+            ofd = node.get_nodeattr("outFIFODepths")
             if self.max_depth is not None:
-                node.set_nodeattr("inFIFODepth", self.max_depth)
-                node.set_nodeattr("outFIFODepth", self.max_depth)
+                ifd = [self.max_depth] * len(ifd)
+                ofd = [self.max_depth] * len(ofd)
             else:
-                i_depth = np.prod(node.get_folded_input_shape()[:-1])
-                o_depth = np.prod(node.get_folded_output_shape()[:-1])
-                node.set_nodeattr("inFIFODepth", i_depth)
-                node.set_nodeattr("outFIFODepth", o_depth)
+                # set each FIFO to its tensor size
+                # (except stream width hence the :-1)
+                for i in range(len(ifd)):
+                    ifd[i] = np.prod(node.get_folded_input_shape(i)[:-1])
+                for o in range(len(ofd)):
+                    ofd[o] = np.prod(node.get_folded_output_shape(o)[:-1])
+            node.set_nodeattr("inFIFODepths", ifd)
+            node.set_nodeattr("outFIFODepths", ofd)
+
             if node.onnx_node.op_type in extw_optypes:
                 mmode = node.get_nodeattr("mem_mode")
                 if mmode == "external":
@@ -380,8 +387,11 @@ class InsertAndSetFIFODepths(Transformation):
                 reset_implementation(node_inst)
                 del fifos[node.name]
             else:
-                getCustomOp(node).set_nodeattr("inFIFODepth", 0)
-                getCustomOp(node).set_nodeattr("outFIFODepth", 0)
+                inst = getCustomOp(node)
+                ifd = inst.get_nodeattr("inFIFODepths")
+                ofd = inst.get_nodeattr("outFIFODepths")
+                inst.set_nodeattr("inFIFODepths", [0] * len(ifd))
+                inst.set_nodeattr("outFIFODepths", [0] * len(ofd))
                 # for every extw node we changed from external to decoupled,
                 # change back and reset implementation
                 if node.op_type in extw_optypes:
diff --git a/tests/end2end/test_end2end_bnn_pynq.py b/tests/end2end/test_end2end_bnn_pynq.py
index 103f18b514c23c4e1ad35a85d020dc0481aa9c47..5f787d1f889645d04884aed9b89a0b1c91d1f418 100644
--- a/tests/end2end/test_end2end_bnn_pynq.py
+++ b/tests/end2end/test_end2end_bnn_pynq.py
@@ -569,8 +569,8 @@ class TestEnd2End:
         for node in hls_layers:
             if node.op_type != "StreamingFIFO":
                 op_inst = getCustomOp(node)
-                assert op_inst.get_nodeattr("inFIFODepth") == 0
-                assert op_inst.get_nodeattr("outFIFODepth") == 0
+                assert op_inst.get_nodeattr("inFIFODepths") == [0]
+                assert op_inst.get_nodeattr("outFIFODepths") == [0]
         model.save(
             get_checkpoint_name(
                 topology, wbits, abits, QONNX_export, "fifodepth_" + kind