diff --git a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl_dynamic.py b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl_dynamic.py
index f2d51d9ea60e393e2c146cc8bb161a50d8a4d961..2a3413cb132c275d100d8b065313ed2eb33c1636 100644
--- a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl_dynamic.py
+++ b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl_dynamic.py
@@ -28,23 +28,220 @@
 
 import pytest
 
+import copy
+import numpy as np
+import onnx.parser as oprs
 from onnx import TensorProto, helper
 from pyverilator.util.axi_utils import axilite_write, reset_rtlsim
 from qonnx.core.datatype import DataType
 from qonnx.core.modelwrapper import ModelWrapper
 from qonnx.custom_op.general.im2col import compute_conv_output_dim
 from qonnx.custom_op.registry import getCustomOp
-from qonnx.transformation.general import GiveUniqueNodeNames
-from qonnx.util.basic import gen_finn_dt_tensor
+from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
+from qonnx.transformation.infer_datatypes import InferDataTypes
+from qonnx.transformation.infer_shapes import InferShapes
+from qonnx.transformation.lower_convs_to_matmul import LowerConvsToMatMul
+from qonnx.util.basic import gen_finn_dt_tensor, get_by_name
 
 import finn.core.onnx_exec as oxe
+import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
+import finn.transformation.streamline.absorb as absorb
+from finn.core.onnx_exec import execute_onnx
 from finn.core.rtlsim_exec import rtlsim_exec
+from finn.transformation.fpgadataflow.create_dataflow_partition import (
+    CreateDataflowPartition,
+)
 from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
 from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
 from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
 
 
+def create_conv_model(idim, ifm, k, stride, ofm, idt, wdt):
+    np.random.seed(0)
+    ishp = (1, ifm, idim, idim)
+    int_dim = compute_conv_output_dim(idim, k, stride)
+    odim = compute_conv_output_dim(int_dim, k, stride)
+    oshp = (1, ofm, odim, odim)
+    wshp = (ofm, ifm, k, k)
+    wshp_1 = (ofm, ofm, k, k)
+    ishp_str = str(list(ishp))
+    oshp_str = str(list(oshp))
+    wshp_str = str(list(wshp))
+    wshp_1_str = str(list(wshp_1))
+    kshp_str = str([k, k])
+    pad_str = str([0, 0, 0, 0])
+    stride_str = str([stride, stride])
+    dil_str = str([1, 1])
+
+    input = f"""
+    <
+        ir_version: 7,
+        opset_import: ["" : 9]
+    >
+    agraph (float{ishp_str} in0) => (float{oshp_str} out0)
+    <
+        float{wshp_str} param_c0_weight,
+        float{wshp_1_str} param_c1_weight
+    >
+    {{
+        conv0 = Conv<
+                dilations={dil_str},group=1,kernel_shape={kshp_str},pads={pad_str},
+                strides={stride_str}
+            >(in0, param_c0_weight)
+        out0 = Conv<
+                dilations={dil_str},group=1,kernel_shape={kshp_str},pads={pad_str},
+                strides={stride_str}
+            >(conv0, param_c1_weight)
+    }}
+    """
+    model = oprs.parse_model(input)
+    model = ModelWrapper(model)
+    model = model.transform(InferShapes())
+    model = model.transform(InferDataTypes())
+    model.set_tensor_datatype("in0", idt)
+    model.set_tensor_datatype("param_c0_weight", wdt)
+    model.set_tensor_datatype("param_c1_weight", wdt)
+    model.set_initializer("param_c0_weight", gen_finn_dt_tensor(wdt, wshp))
+    model.set_initializer("param_c1_weight", gen_finn_dt_tensor(wdt, wshp_1))
+    return model
+
+
+def update_conv_model_dims(model, idim_new):
+    cnode = model.get_nodes_by_op_type("Conv")[0]
+    k, _ = get_by_name(cnode.attribute, "kernel_shape").ints
+    stride, _ = get_by_name(cnode.attribute, "strides").ints
+    ishp = model.get_tensor_shape("in0")
+    n, ci, _, _ = ishp
+    n, co, _, _ = model.get_tensor_shape("out0")
+    int_dim = compute_conv_output_dim(idim_new, k, stride)
+    odim = compute_conv_output_dim(int_dim, k, stride)
+    model.set_tensor_shape("in0", (n, ci, idim_new, idim_new))
+    model.set_tensor_shape("out0", (n, co, odim, odim))
+    # remove all existing shapes
+    del model.graph.value_info[:]
+    model = model.transform(InferShapes())
+    model = model.transform(InferDataTypes())
+    return model
+
+
+# Helper function to update tensor dimensions manually because shape inference
+# does not work on FINN nodes (they assume well-defined tensor shapes).
+def update_tensor_dim(model, tensor_name, new_hw):
+    shape = model.get_tensor_shape(tensor_name)
+    shape[1] = new_hw[0]
+    shape[2] = new_hw[1]
+    model.set_tensor_shape(tensor_name, shape)
+
+
+# Helper function that delivers the hook to program the SWG via AXI-Lite
+def config_hook(configs):
+    if configs is None:
+        return None
+
+    def write_swg_config(sim):
+        for axi_name, config in configs:
+            # 1. Write config registers to the SWG, dict defines (addr, value) tuples
+            for config_entry in config.values():
+                axilite_write(sim, config_entry[0], config_entry[1], basename=axi_name)
+            # 2. Set cfg_valid flag (>= 1 cycle)
+            axilite_write(sim, 0, 1, basename=axi_name)
+        # 3. Reset component (>= 1 cycle)
+        reset_rtlsim(sim)
+
+    return write_swg_config
+
+
+@pytest.mark.slow
+@pytest.mark.vivado
+def test_fpgadataflow_conv_dynamic():
+    idims = [32, 16]
+    ifm = 4
+    k = 4
+    stride = 1
+    ofm = 8
+    idt = DataType["UINT8"]
+    wdt = DataType["INT2"]
+    exp_cfgs = []
+    largest_model = None
+    for idim in idims:
+        ishp = (1, ifm, idim, idim)
+        np.random.seed(0)
+        inp = gen_finn_dt_tensor(idt, ishp)
+        model = create_conv_model(idim, ifm, k, stride, ofm, idt, wdt)
+        _, _, int_dim, _ = model.get_tensor_shape("conv0")
+        _, _, odim, _ = model.get_tensor_shape("out0")
+        if idim == max(idims):
+            # use largest model for hardware conversion
+            largest_model = copy.deepcopy(model)
+        golden = execute_onnx(model, {"in0": inp})["out0"]
+        exp_cfg = (idim, int_dim, odim, inp, golden)
+        exp_cfgs.append(exp_cfg)
+
+    # convert to hardware and prepare simulation
+    model = largest_model.transform(LowerConvsToMatMul())
+    model = model.transform(to_hls.InferConvInpGen(use_rtl_variant=True))
+    model = model.transform(
+        to_hls.InferQuantizedMatrixVectorActivation(mem_mode="decoupled")
+    )
+    model = model.transform(absorb.AbsorbConsecutiveTransposes())
+    parent_model = model.transform(CreateDataflowPartition())
+    sdp_inst = getCustomOp(
+        parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
+    )
+    model = ModelWrapper(sdp_inst.get_nodeattr("model"))
+    for swg_node in model.get_nodes_by_op_type("ConvolutionInputGenerator_rtl"):
+        getCustomOp(swg_node).set_nodeattr("SIMD", 1)
+        getCustomOp(swg_node).set_nodeattr("dynamic_mode", 1)
+        getCustomOp(swg_node).set_nodeattr("inFIFODepth", 16)
+        getCustomOp(swg_node).set_nodeattr("outFIFODepth", 16)
+        print("SWG initial config:")
+        idim = getCustomOp(swg_node).get_nodeattr("IFMDim")
+        print(getCustomOp(swg_node).get_dynamic_config(idim))
+    model = model.transform(InsertFIFO())
+    model = model.transform(GiveUniqueNodeNames())
+    model = model.transform(GiveReadableTensorNames())
+    model = model.transform(PrepareIP("xc7z020clg400-1", 5))
+    model = model.transform(HLSSynthIP())
+    model = model.transform(CreateStitchedIP("xc7z020clg400-1", 5))
+    model.set_metadata_prop("exec_mode", "rtlsim")
+
+    # loop through experiment configurations
+    for exp_cfg in exp_cfgs:
+        idim, int_dim, odim, inp, golden = exp_cfg
+        # model.set_metadata_prop("rtlsim_trace", "trace_size0.vcd")
+        # get config for the new dimensions
+        swg_nodes = model.get_nodes_by_op_type("ConvolutionInputGenerator_rtl")
+        swg0 = getCustomOp(swg_nodes[0])
+        update_tensor_dim(model, swg0.onnx_node.input[0], (idim, idim))
+        update_tensor_dim(model, swg0.onnx_node.output[0], (int_dim, int_dim))
+        config0 = swg0.get_dynamic_config((idim, idim))
+        swg1 = getCustomOp(swg_nodes[1])
+        update_tensor_dim(model, swg1.onnx_node.input[0], (int_dim, int_dim))
+        update_tensor_dim(model, swg1.onnx_node.output[0], (odim, odim))
+        config1 = swg1.get_dynamic_config((int_dim, int_dim))
+        configs = [("s_axi_cfg_0_", config0), ("s_axi_cfg_1_", config1)]
+        # adjust folded shapes for I/O FIFOs
+        # (since rtlsim_exec uses folded shape info to fold global i/o tensors)
+        first_node = getCustomOp(model.graph.node[0])
+        first_node_shp = list(first_node.get_folded_input_shape())
+        first_node_shp[1] = idim
+        first_node_shp[2] = idim
+        first_node.set_nodeattr("folded_shape", first_node_shp)
+        update_tensor_dim(model, first_node.onnx_node.input[0], (idim, idim))
+        last_node = getCustomOp(model.graph.node[-1])
+        last_node_shp = list(last_node.get_folded_output_shape())
+        last_node_shp[1] = odim
+        last_node_shp[2] = odim
+        update_tensor_dim(model, last_node.onnx_node.output[0], (odim, odim))
+        last_node.set_nodeattr("folded_shape", last_node_shp)
+        model.set_metadata_prop("rtlsim_trace", "trace_size1.vcd")
+        ctx = {"global_in": inp.transpose(0, 2, 3, 1)}
+        rtlsim_exec(model, ctx, pre_hook=config_hook(configs))
+        ret = ctx["global_out"].transpose(0, 3, 1, 2)
+        assert np.isclose(golden, ret).all()
+
+
 def make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt):
     k_h, k_w = k
     ifm_dim_h, ifm_dim_w = ifm_dim
@@ -229,31 +426,6 @@ def test_fpgadataflow_slidingwindow_rtl_dynamic(
     model = model.transform(CreateStitchedIP("xc7z020clg400-1", 5))
     model.set_metadata_prop("exec_mode", "rtlsim")
 
-    # Helper function that delivers the hook to program the SWG via AXI-Lite
-    def config_hook(config):
-        if config is None:
-            return None
-
-        def write_swg_config(sim):
-            axi_name = "s_axi_cfg_0_"
-            # 1. Write config registers to the SWG, dict defines (addr, value) tuples
-            for config_entry in config.values():
-                axilite_write(sim, config_entry[0], config_entry[1], basename=axi_name)
-            # 2. Set cfg_valid flag (>= 1 cycle)
-            axilite_write(sim, 0, 1, basename=axi_name)
-            # 3. Reset component (>= 1 cycle)
-            reset_rtlsim(sim)
-
-        return write_swg_config
-
-    # Helper function to update tensor dimensions manually because shape inference
-    # does not work on FINN nodes (they assume well-defined tensor shapes).
-    def update_tensor_dim(model, tensor_name, new_hw):
-        shape = model.get_tensor_shape(tensor_name)
-        shape[1] = new_hw[0]
-        shape[2] = new_hw[1]
-        model.set_tensor_shape(tensor_name, shape)
-
     # Simulate 1 FM for each dimension in the series
     for i, ifm_dim in enumerate(ifm_dim_series):
         ifm_dim_h, ifm_dim_w = ifm_dim
@@ -261,7 +433,7 @@ def test_fpgadataflow_slidingwindow_rtl_dynamic(
         ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w)
         ofm_dim = [ofm_dim_h, ofm_dim_w]
 
-        config = None
+        configs = None
         if i > 0:  # skip re-programming for initial FM dimension
             # Necessary update of node and tensor attributes to make rtlsim work:
             swg_node = model.get_nodes_by_op_type("ConvolutionInputGenerator_rtl")[0]
@@ -271,6 +443,7 @@ def test_fpgadataflow_slidingwindow_rtl_dynamic(
 
             # Generate config, also overwrites IFMDim/OFMDim attributes:
             config = swg_inst.get_dynamic_config(ifm_dim)
+            configs = [("s_axi_cfg_0_", config)]
 
             # Also update FIFO nodes and corresponding tensors
             fifo_node = model.get_nodes_by_op_type("StreamingFIFO")[0]
@@ -292,7 +465,7 @@ def test_fpgadataflow_slidingwindow_rtl_dynamic(
         # Run rtlsim on stitched-ip
         x = gen_finn_dt_tensor(idt, (1, ifm_dim_h, ifm_dim_w, ifm_ch))
         context = prepare_inputs(x)
-        rtlsim_exec(model, context, pre_hook=config_hook(config))
+        rtlsim_exec(model, context, pre_hook=config_hook(configs))
         y_produced = context["outp"]
 
         # Generate golden result