[StreamingFC] Enable larger mem stream lengths for decoupled mode

0279388b · auphelia · 9ccb1ddb · 0279388b
Commit 0279388b authored 4 years ago by auphelia
--- a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
+++ b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
@@ -40,7 +40,10 @@ except ModuleNotFoundError:
 from onnx import TensorProto, helper
 from finn.core.datatype import DataType
 from finn.custom_op.fpgadataflow import HLSCustomOp
-from finn.util.basic import interleave_matrix_outer_dim_from_partitions
+from finn.util.basic import (
+    interleave_matrix_outer_dim_from_partitions,
+    roundup_to_integer_multiple,
+)
 from finn.util.data_packing import (
    npy_to_rtlsim_input,
    numpy_to_hls_code,
@@ -513,20 +516,37 @@ class StreamingFCLayer_Batch(HLSCustomOp):
                weight_tensor_unflipped, export_wdt, weight_width, prefix=""
            )
            weight_stream_len = np.prod(weight_tensor_unflipped.shape)
-            assert (
-                weight_stream_len <= 1024
-            ), """Decoupled mem mode needs
-            weight stream length <= 1024 for now"""
+            factor = math.ceil(weight_stream_len / 1024)
            # add zeroes to pad out file to 1024 entries
            weight_stream = weight_tensor_unflipped.flatten()
-            pad_amt = 1024 - weight_stream_len
+            pad_amt = (factor * 1024) - weight_stream_len
            weight_stream = np.pad(
                weight_stream, (0, pad_amt), mode="constant", constant_values="0"
            )
            weight_stream = weight_stream.copy()
-            with open("{}/memblock_0.dat".format(code_gen_dir), "w+") as f:
-                for val in weight_stream:
+            i = 0
+            j = 0
+            for val in weight_stream:
+                if i == 1024:
+                    i = 0
+                    j += 1
+                with open("{}/memblock_{}.dat".format(code_gen_dir, j), "a+") as f:
                    f.write(val + "\n")
+                i += 1
+	    #assert (
+            #    weight_stream_len <= 1024
+            #), """Decoupled mem mode needs
+            #weight stream length <= 1024 for now"""
+            # add zeroes to pad out file to 1024 entries
+            #weight_stream = weight_tensor_unflipped.flatten()
+            #pad_amt = 1024 - weight_stream_len
+            #weight_stream = np.pad(
+            #    weight_stream, (0, pad_amt), mode="constant", constant_values="0"
+            #)
+            #weight_stream = weight_stream.copy()
+            #with open("{}/memblock_0.dat".format(code_gen_dir), "w+") as f:
+            #    for val in weight_stream:
+            #        f.write(val + "\n")

        else:
            raise Exception(
@@ -997,7 +1017,9 @@ class StreamingFCLayer_Batch(HLSCustomOp):
            self.code_gen_dict["$WEIGHT_WIDTH$"] = [str(weight_width)]
            mw = self.get_nodeattr("MW")
            mh = self.get_nodeattr("MH")
-            self.code_gen_dict["$WEIGHT_DEPTH$"] = [str(int(mw * mh))]
+            depth = int(mw * mh)
+            self.code_gen_dict["$WEIGHT_DEPTH$"] = [str(depth)]
+            self.code_gen_dict["$MEM_DEPTH$"] = [str(roundup_to_integer_multiple(depth, 1024))]

            template = self.decoupled_wrapper

@@ -1034,10 +1056,12 @@ class StreamingFCLayer_Batch(HLSCustomOp):
                if file.endswith(".v"):
                    verilog_file = os.path.join(memstream_dir, file)
                    copy(verilog_file, verilog_folder)
-            # copy .dat file of weights
-            dat_file = "{}/memblock_0.dat".format(code_gen_dir)
-            copy(dat_file, verilog_folder)
-            # copy verilog wrapper
+            # copy .dat files of weights
+            for file in os.listdir(code_gen_dir):
+                if file.endswith(".dat"):
+                    dat_file = os.path.join(code_gen_dir, file)
+                    copy(dat_file, verilog_folder) 
+	    # copy verilog wrapper
            verilog_wrapper = "{}/{}_memstream.v".format(
                code_gen_dir, self.onnx_node.name
            )