diff --git a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
index 478c22b526d4836856d865179862b94595cb9974..8fc1a221d200ebcfe4cca2fdb03645349874c4ef 100644
--- a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
+++ b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
@@ -224,6 +224,13 @@ class StreamingFCLayer_Batch(HLSCustomOp):
 
     def execute_node(self, context, graph):
         node = self.onnx_node
+        mw = self.get_nodeattr("MW")
+        mh = self.get_nodeattr("MH")
+        simd = self.get_nodeattr("SIMD")
+        pe = self.get_nodeattr("PE")
+        sf = mw // simd
+        nf = mh // pe
+
         # TODO ensure codegen dir exists
         code_gen_dir = self.get_nodeattr("code_gen_dir")
         # create a npy file fore each input of the node (in_ind is input index)
@@ -233,21 +240,19 @@ class StreamingFCLayer_Batch(HLSCustomOp):
             # the second input are the weights
             # the third input are the thresholds
             if in_ind == 0:
-                simd = self.get_nodeattr("SIMD")
-                sf = int(self.get_nodeattr("MW") / simd)
-                assert context[inputs].shape == (1, sf, simd)
                 assert str(context[inputs].dtype) == "float32"
+                expected_inp_shape = (1, sf, simd)
+                reshaped_input = context[inputs].reshape(expected_inp_shape)
+                # flip SIMD (innermost) dimension of input tensor, there's some reversal
+                # going on somewhere with a mistmatch between npy and hls...
+                reshaped_input = np.flip(reshaped_input, -1)
                 if self.get_input_datatype() == DataType.BIPOLAR:
                     # store bipolar activations as binary
-                    np.save(
-                        os.path.join(code_gen_dir, "input_{}.npy".format(in_ind)),
-                        (context[inputs] + 1) / 2,
-                    )
-                else:
-                    np.save(
-                        os.path.join(code_gen_dir, "input_{}.npy".format(in_ind)),
-                        context[inputs],
-                    )
+                    reshaped_input = (reshaped_input + 1) / 2
+                np.save(
+                    os.path.join(code_gen_dir, "input_{}.npy".format(in_ind)),
+                    reshaped_input,
+                )
             elif in_ind > 2:
                 raise Exception("Unexpected input found for StreamingFCLayer")
             in_ind += 1
@@ -260,6 +265,9 @@ class StreamingFCLayer_Batch(HLSCustomOp):
             out = context[node.output[0]]
             out = 2 * out - 1
             context[node.output[0]] = out
+        assert context[node.output[0]].shape == (1, nf, pe)
+        # reshape output to have expected shape
+        context[node.output[0]] = context[node.output[0]].reshape(1, mh)
 
     def global_includes(self):
         self.code_gen_dict["$GLOBALS$"] = ['#include "weights.hpp"']
diff --git a/tests/fpgadataflow/test_fpgadataflow_fclayer.py b/tests/fpgadataflow/test_fpgadataflow_fclayer.py
index dc78d0043d5ac40f6d5d5da6325d4a7e0ed52e3a..c45dc5b90d127acf181213d9adcd241deb3b97f7 100644
--- a/tests/fpgadataflow/test_fpgadataflow_fclayer.py
+++ b/tests/fpgadataflow/test_fpgadataflow_fclayer.py
@@ -22,14 +22,13 @@ def make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T=None, tdt=Non
     wmem = mw * mh // (pe * simd)
     assert mw * mh == wmem * pe * simd
     nf = mh // pe
-    sf = mw // simd
     if T is not None:
         tmem = nf
     else:
         tmem = 0
 
-    inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, sf, simd])
-    outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, nf, pe])
+    inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, mw])
+    outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, mh])
     if T is not None:
         node_inp_list = ["inp", "weights", "thresh"]
         if odt == DataType.BIPOLAR:
@@ -76,11 +75,6 @@ def make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T=None, tdt=Non
 
 
 def prepare_inputs(model, input_tensor, idt):
-    ishape = model.get_tensor_shape("inp")
-    input_tensor = (np.asarray(input_tensor, dtype=np.float32)).reshape(*ishape)
-    # flip SIMD (innermost) dimension of input tensor, there's some reversal
-    # going on somewhere with a mistmatch between npy and hls...
-    input_tensor = np.flip(input_tensor, -1)
     return {"inp": input_tensor}