diff --git a/tests/fpgadataflow/test_convert_to_hls_layers.py b/tests/fpgadataflow/test_convert_to_hls_layers.py
index e4da36566ee86f4be319bb4c81929ac59e7ad82a..32792e1364229199286a7012105442f3bbfb05df 100644
--- a/tests/fpgadataflow/test_convert_to_hls_layers.py
+++ b/tests/fpgadataflow/test_convert_to_hls_layers.py
@@ -1,17 +1,26 @@
 import os
+from pkgutil import get_data
 
 import brevitas.onnx as bo
+import numpy as np
+import onnx
+import onnx.numpy_helper as nph
 import torch
 from models.LFC import LFC
 
+import finn.core.onnx_exec as oxe
 import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
 import finn.transformation.streamline.absorb as absorb
 from finn.core.modelwrapper import ModelWrapper
+from finn.custom_op.fpgadataflow.streamingfclayer_batch import StreamingFCLayer_Batch
 from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
 from finn.transformation.fold_constants import FoldConstants
+from finn.transformation.fpgadataflow.codegen import CodeGen
+from finn.transformation.fpgadataflow.compile import Compile
 from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
 from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.streamline import Streamline
+from finn.transformation.streamline.round_thresholds import RoundAndClipThresholds
 
 export_onnx_path = "test_output_lfc.onnx"
 # TODO get from config instead, hardcoded to Docker path for now
@@ -34,6 +43,7 @@ def test_convert_to_hls_layers_lfc_w1a1():
     model = model.transform(ConvertBipolarMatMulToXnorPopcount())
     model = model.transform(absorb.AbsorbAddIntoMultiThreshold())
     model = model.transform(absorb.AbsorbMulIntoMultiThreshold())
+    model = model.transform(RoundAndClipThresholds())
     model = model.transform(to_hls.InferBinaryStreamingFCLayer())
     fc0 = model.graph.node[2]
     assert fc0.op_type == "StreamingFCLayer_Batch"
@@ -55,3 +65,35 @@ def test_convert_to_hls_layers_lfc_w1a1():
     assert model.get_tensor_shape(fc3.input[0]) == [1, 1024]
     assert model.get_tensor_shape(fc3.input[1]) == [1024, 10]
     os.remove(export_onnx_path)
+
+    fc0w = StreamingFCLayer_Batch(fc0)
+    fc0w.set_nodeattr("SIMD", 784)
+    fc0w.set_nodeattr("PE", 32)
+
+    fc1w = StreamingFCLayer_Batch(fc1)
+    fc1w.set_nodeattr("SIMD", 1024)
+    fc1w.set_nodeattr("PE", 32)
+
+    fc2w = StreamingFCLayer_Batch(fc2)
+    fc2w.set_nodeattr("SIMD", 1024)
+    fc2w.set_nodeattr("PE", 32)
+
+    fc3w = StreamingFCLayer_Batch(fc3)
+    fc3w.set_nodeattr("SIMD", 1024)
+    fc3w.set_nodeattr("PE", 10)
+
+    model = model.transform(CodeGen())
+    model = model.transform(Compile())
+
+    raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb")
+    input_tensor = onnx.load_tensor_from_string(raw_i)
+    # run using FINN-based execution
+    input_dict = {"global_in": nph.to_array(input_tensor)}
+    output_dict = oxe.execute_onnx(model, input_dict)
+    produced = output_dict[list(output_dict.keys())[0]]
+    # run using PyTorch/Brevitas
+    input_tensor = torch.from_numpy(nph.to_array(input_tensor)).float()
+    assert input_tensor.shape == (1, 1, 28, 28)
+    # do forward pass in PyTorch/Brevitas
+    expected = lfc.forward(input_tensor).detach().numpy()
+    assert np.isclose(produced, expected, atol=1e-3).all()