diff --git a/src/finn/custom_op/fpgadataflow/checksum.py b/src/finn/custom_op/fpgadataflow/checksum.py
index 0c264fcce6a6bdaaf70ff8ed8d0daa2784dfd9f6..22f9a92bd8cd856561b21659b3f828135b0cd08f 100644
--- a/src/finn/custom_op/fpgadataflow/checksum.py
+++ b/src/finn/custom_op/fpgadataflow/checksum.py
@@ -124,6 +124,9 @@ class checksum(HLSCustomOp):
 
         return normal_ishape
 
+    def get_ap_int_max_w(self):
+        return max(super().get_ap_int_max_w(), 32)
+
     def get_normal_output_shape(self):
         # same shape as input
         return self.get_normal_input_shape()
@@ -132,11 +135,17 @@ class checksum(HLSCustomOp):
         folded_oshape = self.get_folded_output_shape()
         return np.prod(folded_oshape[:-1])
 
+    def npy_to_dynamic_output(self, context):
+        super().npy_to_dynamic_output(context)
+        node = self.onnx_node
+        code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
+        output_checksum = np.load("{}/output_checksum.npy".format(code_gen_dir))
+        context[node.output[1]] = output_checksum
+
     def execute_node(self, context, graph):
         mode = self.get_nodeattr("exec_mode")
         node = self.onnx_node
         inp = context[node.input[0]]
-        exp_shape = self.get_normal_input_shape()
 
         # TODO ensure codegen dir exists
         if mode == "cppsim":
@@ -152,9 +161,9 @@ class checksum(HLSCustomOp):
             )
 
         if mode == "cppsim":
-            output = inp
-            output = np.asarray([output], dtype=np.float32).reshape(*exp_shape)
-            context[node.output[0]] = output
+            self.dynamic_input_to_npy(context, 1)
+            self.exec_precompiled_singlenode_model()
+            self.npy_to_dynamic_output(context)
         elif mode == "rtlsim":
             # create a npy file for the input of the node
             assert (
@@ -221,10 +230,30 @@ class checksum(HLSCustomOp):
         self.code_gen_dict["$DEFINES$"] = my_defines
 
     def read_npy_data(self):
-        pass
+        code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
+        dtype = self.get_input_datatype()
+        elem_bits = dtype.bitwidth()
+        packed_bits = self.get_instream_width()
+        packed_hls_type = "ap_uint<%d>" % packed_bits
+        elem_hls_type = dtype.get_hls_datatype_str()
+        npy_type = "float"
+        npy_in = "%s/input_0.npy" % code_gen_dir
+        self.code_gen_dict["$READNPYDATA$"] = []
+        # note: the innermost dim is reversed for the input
+        self.code_gen_dict["$READNPYDATA$"].append(
+            'npy2apintstream<%s, %s, %d, %s>("%s", in0, false);'
+            % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in)
+        )
 
     def strm_decl(self):
-        pass
+        self.code_gen_dict["$STREAMDECLARATIONS$"] = []
+        self.code_gen_dict["$STREAMDECLARATIONS$"].append(
+            'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width())
+        )
+        self.code_gen_dict["$STREAMDECLARATIONS$"].append(
+            'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width())
+        )
+        self.code_gen_dict["$STREAMDECLARATIONS$"].append("ap_uint<32> chk;")
 
     def docompute(self):
         self.code_gen_dict["$DOCOMPUTE$"] = [
@@ -232,10 +261,39 @@ class checksum(HLSCustomOp):
         ]
 
     def dataoutstrm(self):
-        pass
+        code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
+        dtype = self.get_output_datatype()
+        if dtype == DataType["BIPOLAR"]:
+            # use binary for bipolar storage
+            dtype = DataType["BINARY"]
+        elem_bits = dtype.bitwidth()
+        packed_bits = self.get_outstream_width()
+        packed_hls_type = "ap_uint<%d>" % packed_bits
+        elem_hls_type = dtype.get_hls_datatype_str()
+        npy_type = "float"
+        npy_out = "%s/output.npy" % code_gen_dir
+        shape = tuple(self.get_folded_output_shape())
+        shape_cpp_str = str(shape).replace("(", "{").replace(")", "}")
+
+        # note: the innermost dim is not reversed for the output
+        self.code_gen_dict["$DATAOUTSTREAM$"] = [
+            'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s", false);'
+            % (
+                packed_hls_type,
+                elem_hls_type,
+                elem_bits,
+                npy_type,
+                shape_cpp_str,
+                npy_out,
+            ),
+            "std::vector<unsigned int> checksum(1);",
+            "checksum[0] = chk;",
+            'cnpy::npy_save("%s/output_checksum.npy",&checksum[0],{1},"w");'
+            % code_gen_dir,
+        ]
 
     def save_as_npy(self):
-        pass
+        self.code_gen_dict["$SAVEASCNPY$"] = []
 
     def blackboxfunction(self):
         self.code_gen_dict["$BLACKBOXFUNCTION$"] = [