diff --git a/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py b/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py
index f18fd8d1019337e7b87ae9e47ba3a5b53ec849f7..e0fe87ee1d3aa3d9dd6de327ad129eb70148830f 100644
--- a/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py
+++ b/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py
@@ -81,7 +81,7 @@ def make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt):
     return model
 
 
-def make_single_streamingmaxpool_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt):
+def make_single_streamingmaxpool_modelwrapper(k, ifm_ch, pe, ifm_dim, ofm_dim, idt):
     k_h, k_w = k
     ifm_dim_h, ifm_dim_w = ifm_dim
     ofm_dim_h, ofm_dim_w = ofm_dim
@@ -101,6 +101,7 @@ def make_single_streamingmaxpool_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt):
         backend="fpgadataflow",
         PoolDim=[k_h, k_w],
         NumChannels=ifm_ch,
+        PE=pe,
         ImgDim=[ifm_dim_h, ifm_dim_w],
         dataType=idt.name,
     )
@@ -122,20 +123,29 @@ def prepare_inputs(input_tensor):
 
 
 # input datatype
-@pytest.mark.parametrize("idt", [DataType["BIPOLAR"], DataType["INT4"]])
+#@pytest.mark.parametrize("idt", [DataType["BIPOLAR"], DataType["INT4"]])
+@pytest.mark.parametrize("idt", [DataType["UINT4"]])
 # 1d maxpool
-@pytest.mark.parametrize("dim_1d", [False, True])
+#@pytest.mark.parametrize("dim_1d", [False, True])
+@pytest.mark.parametrize("dim_1d", [True])
 # kernel size
-@pytest.mark.parametrize("k", [2, 4])
+##@pytest.mark.parametrize("k", [2, 4])
+@pytest.mark.parametrize("k", [6])
 # input dimension
-@pytest.mark.parametrize("ifm_dim", [4, 8])
+#@pytest.mark.parametrize("ifm_dim", [4, 8])
+@pytest.mark.parametrize("ifm_dim", [60])
 # input channels
-@pytest.mark.parametrize("ifm_ch", [1, 3])  # 1,3
+#@pytest.mark.parametrize("ifm_ch", [1, 3])  # 1,3
+@pytest.mark.parametrize("ifm_ch", [1024])  # 1,3
+# pe
+#@pytest.mark.parametrize("pe", [1, 3])
+@pytest.mark.parametrize("pe", [1])
 # execution mode
-@pytest.mark.parametrize("exec_mode", ["rtlsim", "cppsim"])
+#@pytest.mark.parametrize("exec_mode", ["rtlsim", "cppsim"])
+@pytest.mark.parametrize("exec_mode", ["rtlsim"])
 @pytest.mark.slow
 @pytest.mark.vivado
-def test_fpgadataflow_streamingmaxpool(idt, dim_1d, k, ifm_dim, ifm_ch, exec_mode):
+def test_fpgadataflow_streamingmaxpool(idt, dim_1d, k, ifm_dim, ifm_ch, pe, exec_mode):
     ifm_dim_h = ifm_dim
     k_h = k
     if dim_1d:
@@ -156,6 +166,8 @@ def test_fpgadataflow_streamingmaxpool(idt, dim_1d, k, ifm_dim, ifm_ch, exec_mod
         pytest.skip("Skipping binary StreamingMaxPool_1d (not implemented)")
     if ifm_dim_h % k_h != 0 or ifm_dim_w % k_w != 0:
         pytest.skip("Skipping StreamingMaxPool test w/ ImgDim % PoolDim != 0")
+    if pe > ifm_ch:
+        pytest.skip("SIMD cannot be larger than number of input channels")
 
     x = gen_finn_dt_tensor(idt, (1, ifm_dim_h, ifm_dim_w, ifm_ch))
     # prepare input data
@@ -164,7 +176,7 @@ def test_fpgadataflow_streamingmaxpool(idt, dim_1d, k, ifm_dim, ifm_ch, exec_mod
     golden = make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt)
     y_expected = oxe.execute_onnx(golden, input_dict)["outp"]
 
-    model = make_single_streamingmaxpool_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt)
+    model = make_single_streamingmaxpool_modelwrapper(k, ifm_ch, pe, ifm_dim, ofm_dim, idt)
 
     if exec_mode == "cppsim":
         model = model.transform(SetExecMode("cppsim"))
@@ -173,7 +185,8 @@ def test_fpgadataflow_streamingmaxpool(idt, dim_1d, k, ifm_dim, ifm_ch, exec_mod
     elif exec_mode == "rtlsim":
         model = model.transform(SetExecMode("rtlsim"))
         model = model.transform(GiveUniqueNodeNames())
-        model = model.transform(PrepareIP("xc7z020clg400-1", 5))
+        #model = model.transform(PrepareIP("xc7z020clg400-1", 5))
+        model = model.transform(PrepareIP("xczu3eg-sbva484-1-e", 5))
         model = model.transform(HLSSynthIP())
         model = model.transform(PrepareRTLSim())
     else: