diff --git a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py index 7784024aae102989338df9b040fcfc1f9dc36983..692f67f715857e4455f33cb88c618dbfb2b2e356 100644 --- a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py +++ b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py @@ -40,7 +40,10 @@ except ModuleNotFoundError: from onnx import TensorProto, helper from finn.core.datatype import DataType from finn.custom_op.fpgadataflow import HLSCustomOp -from finn.util.basic import interleave_matrix_outer_dim_from_partitions +from finn.util.basic import ( + interleave_matrix_outer_dim_from_partitions, + roundup_to_integer_multiple, +) from finn.util.data_packing import ( npy_to_rtlsim_input, numpy_to_hls_code, @@ -513,20 +516,37 @@ class StreamingFCLayer_Batch(HLSCustomOp): weight_tensor_unflipped, export_wdt, weight_width, prefix="" ) weight_stream_len = np.prod(weight_tensor_unflipped.shape) - assert ( - weight_stream_len <= 1024 - ), """Decoupled mem mode needs - weight stream length <= 1024 for now""" + factor = math.ceil(weight_stream_len / 1024) # add zeroes to pad out file to 1024 entries weight_stream = weight_tensor_unflipped.flatten() - pad_amt = 1024 - weight_stream_len + pad_amt = (factor * 1024) - weight_stream_len weight_stream = np.pad( weight_stream, (0, pad_amt), mode="constant", constant_values="0" ) weight_stream = weight_stream.copy() - with open("{}/memblock_0.dat".format(code_gen_dir), "w+") as f: - for val in weight_stream: + i = 0 + j = 0 + for val in weight_stream: + if i == 1024: + i = 0 + j += 1 + with open("{}/memblock_{}.dat".format(code_gen_dir, j), "a+") as f: f.write(val + "\n") + i += 1 + #assert ( + # weight_stream_len <= 1024 + #), """Decoupled mem mode needs + #weight stream length <= 1024 for now""" + # add zeroes to pad out file to 1024 entries + #weight_stream = weight_tensor_unflipped.flatten() + #pad_amt = 1024 - weight_stream_len + #weight_stream = np.pad( + # weight_stream, (0, pad_amt), mode="constant", constant_values="0" + #) + #weight_stream = weight_stream.copy() + #with open("{}/memblock_0.dat".format(code_gen_dir), "w+") as f: + # for val in weight_stream: + # f.write(val + "\n") else: raise Exception( @@ -997,7 +1017,9 @@ class StreamingFCLayer_Batch(HLSCustomOp): self.code_gen_dict["$WEIGHT_WIDTH$"] = [str(weight_width)] mw = self.get_nodeattr("MW") mh = self.get_nodeattr("MH") - self.code_gen_dict["$WEIGHT_DEPTH$"] = [str(int(mw * mh))] + depth = int(mw * mh) + self.code_gen_dict["$WEIGHT_DEPTH$"] = [str(depth)] + self.code_gen_dict["$MEM_DEPTH$"] = [str(roundup_to_integer_multiple(depth, 1024))] template = self.decoupled_wrapper @@ -1034,10 +1056,12 @@ class StreamingFCLayer_Batch(HLSCustomOp): if file.endswith(".v"): verilog_file = os.path.join(memstream_dir, file) copy(verilog_file, verilog_folder) - # copy .dat file of weights - dat_file = "{}/memblock_0.dat".format(code_gen_dir) - copy(dat_file, verilog_folder) - # copy verilog wrapper + # copy .dat files of weights + for file in os.listdir(code_gen_dir): + if file.endswith(".dat"): + dat_file = os.path.join(code_gen_dir, file) + copy(dat_file, verilog_folder) + # copy verilog wrapper verilog_wrapper = "{}/{}_memstream.v".format( code_gen_dir, self.onnx_node.name )