diff --git a/src/finn/backend/fpgadataflow/code_gen_for_single_node_execution.py b/src/finn/backend/fpgadataflow/code_gen_for_single_node_execution.py
index d6ae9e86ffdf2ff35235aaac9421ecb0dc102baf..185e1aacc232075bafba571fb2882da3f1d830cf 100644
--- a/src/finn/backend/fpgadataflow/code_gen_for_single_node_execution.py
+++ b/src/finn/backend/fpgadataflow/code_gen_for_single_node_execution.py
@@ -1,4 +1,36 @@
+import finn.core.utils as utils
 
+def global_includes(node, code_gen_dict):
+    code_gen_dict["$GLOBALS$"]=[]
+    if node.op_type == 'StreamingMaxPool':
+        code_gen_dict["$GLOBALS$"].append('#include "maxpool.h"')
+
+def defines(node, code_gen_dict):
+    code_gen_dict["$DEFINES$"]=[]
+    if node.op_type == 'StreamingMaxPool':
+        ImgDim = utils.get_by_name(node.attribute, 'ImgDim').i
+        PoolDim = utils.get_by_name(node.attribute, 'PoolDim').i
+        NumChannels = utils.get_by_name(node.attribute, 'NumChannels').i        
+        code_gen_dict["$DEFINES$"].append('#define ImgDim '+str(ImgDim)+'\n #define PoolDim ' +str(PoolDim)+'\n #define NumChannels ' +str(NumChannels))
+
+def read_npy_data(node, code_gen_dict):
+    code_gen_dict["$READNPDATA$"]=[]
+    input_ind = 0
+    input_file_names = []
+    for inputs in node.input:
+        input_file_names.append("input_{}.npy".format(input_ind))
+        input_ind += 1
+
+    if node.op_type == 'StreamingMaxPool':
+        NumChannels = utils.get_by_name(node.attribute, 'NumChannels').i
+
+        input_ind = 0
+        for input_file in input_file_names:
+            code_gen_dict["$READNPDATA$"].append('cnpy::NpyArray arr = cnpy::npy_load("{}");\n float* loaded_data{} = arr.data<float>();'.format(input_file, input_ind))
+            code_gen_dict["$READNPDATA$"].append('int num_values = 1; \n for(int i = 0; i < arr.shape.size(); i++){\n num_values *= arr.shape[i]; \n }')
+
+            input_ind+=1
+        
 
 
 def execute(node, context, graph):
@@ -22,12 +54,10 @@ def execute(node, context, graph):
 
     output_interface k;
 
-    $READNPYDATA$
-
     $STREAMDECLARATIONS$
     $STREAMPRAGMAS$
     
-    $DATAINSTREAM$
+    $READNPYDATA$
 
     $DOCOMPUTE$
     
@@ -38,6 +68,11 @@ def execute(node, context, graph):
     }
 
     """
+    code_gen_dict={}
+    global_includes(node, code_gen_dict)
+    defines(node, code_gen_dict)
+    read_npy_data(node, code_gen_dict)
+    print(code_gen_dict)
 
     print("\n\n Set up for code generation of single not in progress! \n\n") 
    
diff --git a/src/finn/core/execute_custom_node.py b/src/finn/core/execute_custom_node.py
index 20e151d0999805b6c0f19da4280a63d0ee216eea..6583f7cc9695f4a481883b9fb5bbba9233c59bcf 100644
--- a/src/finn/core/execute_custom_node.py
+++ b/src/finn/core/execute_custom_node.py
@@ -21,10 +21,15 @@ def execute_custom_node(node, context, graph):
                 temp_files.append("input_{}.npy".format(in_ind))
                 in_ind += 1
 
-            output = np.load("output.npy")
-            print(output)
             code_gen.execute(node, context, graph)
             
+            output = np.load("output.npy")
+            for i in range(output.shape[0]):
+                print(np.transpose(output[i]))
+
+
+
+            
             ## deleting temporary files
             #for temp_file in temp_files:
             #    os.remove(temp_file)
diff --git a/tests/layer_streaming_maxpool.cpp b/tests/layer_streaming_maxpool.cpp
index 0951ab7e84e959000791735ad6327248d46b6496..4b2455ef61cf81a05dacb010aa56327202bc9aae 100755
--- a/tests/layer_streaming_maxpool.cpp
+++ b/tests/layer_streaming_maxpool.cpp
@@ -15,25 +15,25 @@ int main(){
 	output_interface k;
 	cnpy::NpyArray arr = cnpy::npy_load("input_0.npy");
 	float* loaded_data = arr.data<float>();
-	int Nx = arr.shape[0];
-	int Ny = arr.shape[1];
-	int Nz = arr.shape[2];
-	
+	int num_values = 1;
+	for(int i = 0; i < arr.shape.size(); i++){
+		num_values *= arr.shape[i];
+	}
+
 	hls::stream<ap_uint<2>> in ("in");
 	hls::stream<ap_uint<2>> out ("out");
+	ap_uint<2> in_data;
 	#pragma HLS DATAFLOW
 	#pragma HLS stream depth=1024 variable=in
 	#pragma HLS stream depth=1024 variable=out
-	for(int i=0;i < Nx*Ny*Nz;i++){
-		in << loaded_data[i];
+	ap_uint<2> dat;
+	for(int i=0;i < num_values; i+=2){
+		dat.range(0,0) = loaded_data[i];
+		dat.range(1,1) = loaded_data[i+1];
+		in << loaded_data[dat];
 	}
-        //while(in.read_nb(i.last_data)){
-        //       i.data.push_back(i.last_data);
-        //}
-	//for(std::vector<ap_uint<2>>::iterator it = i.data.begin(); it!= i.data.end(); ++it){
-        //        std::cout << "Next value: " << *it << std::endl;
-        //}
-	
+
+
 	StreamingMaxPool<ImgDim, PoolDim, NumChannels>(in, out);
 	while(out.read_nb(k.last_data)){
 		k.data.push_back(k.last_data);
@@ -43,9 +43,9 @@ int main(){
 		ap_uint<2> test = *it;
 		output_data.push_back(test.range(0,0));
 		output_data.push_back(test.range(1,1));
+	}
 
 
-	}
 	cnpy::npy_save("output.npy",&output_data[0],{2,2,2},"w");
 
 
diff --git a/tests/output.npy b/tests/output.npy
index 013e68979bf26a22c79cbf0dc7ac0045c75eb317..7ceeebacea70251ffc97e93d59651ed1108573ec 100644
Binary files a/tests/output.npy and b/tests/output.npy differ
diff --git a/tests/test_layer_streaming_maxpool.py b/tests/test_layer_streaming_maxpool.py
index 83c8726d7c724c3be027c17fc895323e31e62779..7b87138877959b5b1c7e6485363108518eb401df 100644
--- a/tests/test_layer_streaming_maxpool.py
+++ b/tests/test_layer_streaming_maxpool.py
@@ -77,3 +77,4 @@ def test_layer_streaming_maxpool():
 
     input_dict = {"in": input_tensor}
     output_dict = oxe.execute_onnx(model, input_dict)
+    print(output_dict[outp])