Skip to content
Snippets Groups Projects
Commit 0279388b authored by auphelia's avatar auphelia
Browse files

[StreamingFC] Enable larger mem stream lengths for decoupled mode

parent 9ccb1ddb
No related branches found
No related tags found
No related merge requests found
......@@ -40,7 +40,10 @@ except ModuleNotFoundError:
from onnx import TensorProto, helper
from finn.core.datatype import DataType
from finn.custom_op.fpgadataflow import HLSCustomOp
from finn.util.basic import interleave_matrix_outer_dim_from_partitions
from finn.util.basic import (
interleave_matrix_outer_dim_from_partitions,
roundup_to_integer_multiple,
)
from finn.util.data_packing import (
npy_to_rtlsim_input,
numpy_to_hls_code,
......@@ -513,20 +516,37 @@ class StreamingFCLayer_Batch(HLSCustomOp):
weight_tensor_unflipped, export_wdt, weight_width, prefix=""
)
weight_stream_len = np.prod(weight_tensor_unflipped.shape)
assert (
weight_stream_len <= 1024
), """Decoupled mem mode needs
weight stream length <= 1024 for now"""
factor = math.ceil(weight_stream_len / 1024)
# add zeroes to pad out file to 1024 entries
weight_stream = weight_tensor_unflipped.flatten()
pad_amt = 1024 - weight_stream_len
pad_amt = (factor * 1024) - weight_stream_len
weight_stream = np.pad(
weight_stream, (0, pad_amt), mode="constant", constant_values="0"
)
weight_stream = weight_stream.copy()
with open("{}/memblock_0.dat".format(code_gen_dir), "w+") as f:
for val in weight_stream:
i = 0
j = 0
for val in weight_stream:
if i == 1024:
i = 0
j += 1
with open("{}/memblock_{}.dat".format(code_gen_dir, j), "a+") as f:
f.write(val + "\n")
i += 1
#assert (
# weight_stream_len <= 1024
#), """Decoupled mem mode needs
#weight stream length <= 1024 for now"""
# add zeroes to pad out file to 1024 entries
#weight_stream = weight_tensor_unflipped.flatten()
#pad_amt = 1024 - weight_stream_len
#weight_stream = np.pad(
# weight_stream, (0, pad_amt), mode="constant", constant_values="0"
#)
#weight_stream = weight_stream.copy()
#with open("{}/memblock_0.dat".format(code_gen_dir), "w+") as f:
# for val in weight_stream:
# f.write(val + "\n")
else:
raise Exception(
......@@ -997,7 +1017,9 @@ class StreamingFCLayer_Batch(HLSCustomOp):
self.code_gen_dict["$WEIGHT_WIDTH$"] = [str(weight_width)]
mw = self.get_nodeattr("MW")
mh = self.get_nodeattr("MH")
self.code_gen_dict["$WEIGHT_DEPTH$"] = [str(int(mw * mh))]
depth = int(mw * mh)
self.code_gen_dict["$WEIGHT_DEPTH$"] = [str(depth)]
self.code_gen_dict["$MEM_DEPTH$"] = [str(roundup_to_integer_multiple(depth, 1024))]
template = self.decoupled_wrapper
......@@ -1034,10 +1056,12 @@ class StreamingFCLayer_Batch(HLSCustomOp):
if file.endswith(".v"):
verilog_file = os.path.join(memstream_dir, file)
copy(verilog_file, verilog_folder)
# copy .dat file of weights
dat_file = "{}/memblock_0.dat".format(code_gen_dir)
copy(dat_file, verilog_folder)
# copy verilog wrapper
# copy .dat files of weights
for file in os.listdir(code_gen_dir):
if file.endswith(".dat"):
dat_file = os.path.join(code_gen_dir, file)
copy(dat_file, verilog_folder)
# copy verilog wrapper
verilog_wrapper = "{}/{}_memstream.v".format(
code_gen_dir, self.onnx_node.name
)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment