diff --git a/finn-rtllib/swg/swg_template_wrapper.v b/finn-rtllib/swg/swg_template_wrapper.v
index 510418453f44e74d21695e44f7363254bd62dd4d..be5a93b9e63525f79fd37c3d10b4f9828c5bf98e 100644
--- a/finn-rtllib/swg/swg_template_wrapper.v
+++ b/finn-rtllib/swg/swg_template_wrapper.v
@@ -18,13 +18,13 @@ parameter MMV_OUT = $MMV_OUT$;
 parameter BUF_IN_WIDTH = BIT_WIDTH * SIMD * MMV_IN;
 parameter BUF_OUT_WIDTH = BIT_WIDTH * SIMD * MMV_OUT;
 
+(* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF in0_V:out_V" *)
 input  ap_clk;
+(* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF in0_V:out_V" *)
 input  ap_rst_n;
-(* X_INTERFACE_PARAMETER = "FREQ_HZ 100000000.000000" *) //todo: make configurable or set later
 input  [BUF_IN_WIDTH-1:0] in0_V_TDATA;
 input  in0_V_TVALID;
 output in0_V_TREADY;
-(* X_INTERFACE_PARAMETER = "FREQ_HZ 100000000.000000" *)
 output [BUF_OUT_WIDTH-1:0] out_V_TDATA;
 output out_V_TVALID;
 input  out_V_TREADY;
diff --git a/src/finn/transformation/fpgadataflow/synth_ooc.py b/src/finn/transformation/fpgadataflow/synth_ooc.py
index 8d4aec259c440e311f6e3a6fb4d0359d55d738ca..6070cce636f50473545ab8a33c7867b7e1eb7f9c 100644
--- a/src/finn/transformation/fpgadataflow/synth_ooc.py
+++ b/src/finn/transformation/fpgadataflow/synth_ooc.py
@@ -52,7 +52,7 @@ class SynthOutOfContext(Transformation):
         top_module_name = model.get_metadata_prop("wrapper_filename")
         top_module_name = file_to_basename(top_module_name).strip(".v")
         build_dir = make_build_dir("synth_out_of_context_")
-        verilog_extensions = [".v", ".vh"]
+        verilog_extensions = [".v", ".sv", ".vh"]
         with open(vivado_stitch_proj_dir + "/all_verilog_srcs.txt", "r") as f:
             all_verilog_srcs = f.read().split()
         for file in all_verilog_srcs:
diff --git a/tests/fpgadataflow/test_convert_to_hls_conv_layer.py b/tests/fpgadataflow/test_convert_to_hls_conv_layer.py
index 55dc77cafb898ead28a7cbb9641e0b40db276919..7dcae82afe29056cccf8d980e2206d6faab07bfb 100644
--- a/tests/fpgadataflow/test_convert_to_hls_conv_layer.py
+++ b/tests/fpgadataflow/test_convert_to_hls_conv_layer.py
@@ -57,11 +57,12 @@ from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
     "conv_config", [(1, 2, 0), (1, 3, 0), (3, 2, 1), (3, 1, 0), (3, 1, 1), (5, 2, 1)]
 )
 @pytest.mark.parametrize("depthwise", [False, True])
+@pytest.mark.parametrize("use_rtl_swg", [False, True])
 @pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"])
 @pytest.mark.fpgadataflow
 @pytest.mark.slow
 @pytest.mark.vivado
-def test_convert_to_hls_conv_layer(conv_config, depthwise, exec_mode):
+def test_convert_to_hls_conv_layer(conv_config, depthwise, use_rtl_swg, exec_mode):
     kernel_size, stride, pad = conv_config
     np.random.seed(0)
     idt = DataType["UINT4"]
@@ -69,6 +70,9 @@ def test_convert_to_hls_conv_layer(conv_config, depthwise, exec_mode):
     in_feature_dim = 7
     in_chn = 16
 
+    if use_rtl_swg and exec_mode == "cppsim":
+        pytest.skip("cppsim not supported for RTL SWG")
+
     if depthwise is True:
         group = out_chn = in_chn
         conv_param_shape = [out_chn, 1, kernel_size, kernel_size]
@@ -122,7 +126,7 @@ def test_convert_to_hls_conv_layer(conv_config, depthwise, exec_mode):
     model = model.transform(InferDataTypes())
 
     new_model = model.transform(LowerConvsToMatMul())
-    new_model = new_model.transform(to_hls.InferConvInpGen())
+    new_model = new_model.transform(to_hls.InferConvInpGen(use_rtl_variant=use_rtl_swg))
     if depthwise is True:
         new_model = new_model.transform(to_hls.InferVectorVectorActivation())
     else:
@@ -156,10 +160,15 @@ def test_convert_to_hls_conv_layer(conv_config, depthwise, exec_mode):
     x = gen_finn_dt_tensor(idt, input_shape)
     inp_dict = {model.graph.input[0].name: x}
     assert oxe.compare_execution(model, new_model, inp_dict)
+
+    if use_rtl_swg:
+        downsampler_op_type = "ConvolutionInputGenerator_rtl"
+    else:
+        downsampler_op_type = "DownSampler"
     if kernel_size == 1 and stride > 1 and pad == 0:
-        assert new_model.graph.node[1].op_type == "DownSampler"
+        assert new_model.graph.node[1].op_type == downsampler_op_type
         if exec_mode == "rtlsim":
-            node = new_model.get_nodes_by_op_type("DownSampler")[0]
+            node = new_model.get_nodes_by_op_type(downsampler_op_type)[0]
             inst = getCustomOp(node)
             cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim")
             exp_cycles_dict = new_model.analysis(exp_cycles_per_layer)
diff --git a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl.py b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl.py
index c0bf799fa85f289cbe2a90c7aee5c9cf443a4deb..c94aa1eab6d045f84adce16fd9aadc8a6961e202 100755
--- a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl.py
+++ b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl.py
@@ -173,10 +173,10 @@ def prepare_inputs(input_tensor):
 
 # input channel parallelism ("SIMD")
 @pytest.mark.parametrize("simd", [1,2,3,6])
+# parallel_window enable (MMV_out = M*K)
+@pytest.mark.parametrize("parallel_window", [0,1])
 # in/out MMV ("M")
 @pytest.mark.parametrize("m", [1])
-# paralle_window enable (MMV_out = M*K)
-@pytest.mark.parametrize("parallel_window", [0])
 
 # Flip dimensions
 @pytest.mark.parametrize("flip", [False,True])
@@ -210,9 +210,9 @@ def test_fpgadataflow_slidingwindow_rtl(
         pytest.skip("SIMD cannot be larger than number of input channels")
     if ifm_ch % simd != 0:
         pytest.skip("SIMD must divide number of input channels")
-    if kernel_width > ifm_dim_h or stride_h > ifm_dim_h:
+    if kernel_height > ifm_dim_h or stride_h > ifm_dim_h:
         pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension")
-    if kernel_height > ifm_dim_w or stride_w > ifm_dim_w:
+    if kernel_width > ifm_dim_w or stride_w > ifm_dim_w:
         pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension")
     if (k_h==1 and (stride_h!=1 or dilation_h!=1)) or (k_w==1 and (stride_w!=1 or dilation_w!=1)):
         pytest.skip("Illegal convolution configuration: stride or dilation defined for unitary kernel dim")