Skip to content
Snippets Groups Projects
Commit ae79de21 authored by tobi's avatar tobi
Browse files

Merge driver generation code with support for external weights

parents 21767795 4e377369
No related branches found
No related tags found
No related merge requests found
...@@ -36,6 +36,7 @@ from finn.util.data_packing import ( ...@@ -36,6 +36,7 @@ from finn.util.data_packing import (
finnpy_to_packed_bytearray, finnpy_to_packed_bytearray,
packed_bytearray_to_finnpy, packed_bytearray_to_finnpy,
) )
from warnings import warn
# Driver base class for FINN-generated dataflow accelerators. # Driver base class for FINN-generated dataflow accelerators.
# The particulars of the generated accelerator are specified via the # The particulars of the generated accelerator are specified via the
...@@ -84,11 +85,17 @@ class FINNExampleOverlay(Overlay): ...@@ -84,11 +85,17 @@ class FINNExampleOverlay(Overlay):
self.batch_size = batch_size self.batch_size = batch_size
self.fclk_mhz = fclk_mhz self.fclk_mhz = fclk_mhz
if self.platform == "alveo": if self.platform == "alveo":
self.idma = self.idma0 if "input_dma_name" in io_shape_dict.keys():
self.idma = getattr(self,io_shape_dict["input_dma_name"])
else:
self.idma = self.idma0
self.odma = self.odma0 self.odma = self.odma0
self.odma_handle = None self.odma_handle = None
elif self.platform == "zynq-iodma": elif self.platform == "zynq-iodma":
self.idma = self.idma0 if "input_dma_name" in io_shape_dict.keys():
self.idma = getattr(self,io_shape_dict["input_dma_name"])
else:
self.idma = self.idma0
self.odma = self.odma0 self.odma = self.odma0
# set the clock frequency as specified by user during transformations # set the clock frequency as specified by user during transformations
if self.fclk_mhz > 0: if self.fclk_mhz > 0:
...@@ -96,8 +103,67 @@ class FINNExampleOverlay(Overlay): ...@@ -96,8 +103,67 @@ class FINNExampleOverlay(Overlay):
else: else:
raise ValueError("Supported platforms are zynq-iodma alveo") raise ValueError("Supported platforms are zynq-iodma alveo")
# load any runtime weights # load any runtime weights
self.external_weights = []
self.load_external_weights()
self.load_runtime_weights() self.load_runtime_weights()
def load_external_weights(self):
"""Load any existing runtime weights from the specified dir into the
appropriate layer of the accelerator. Note that this must be enabled
during the accelerator build process. The runtime weights directory
is specified as the class member ``runtime_weight_dir``.
Parameters
----------
flush_accel: bool
Run the accelerator with dummy input after weights are written to
flush any stale weight data in the weight streamer FIFOs.
verify: bool
Whether the written weights will be re-read and verified.
"""
w_filenames = []
if not os.path.isdir(self.runtime_weight_dir):
return
for (dirpath, dirnames, filenames) in os.walk(self.runtime_weight_dir):
w_filenames.extend(filenames)
tmp_weight_dict = {}
for w_filename in w_filenames:
if w_filename.endswith(".npy"):
weight_tensor = np.load(self.runtime_weight_dir + "/" + w_filename)
else:
continue
idma_name = w_filename.split(".")[0]
tmp_weight_dict[idma_name] = weight_tensor
if self.platform != "alveo" and len(tmp_weight_dict)>0:
#Todo: add zynq support pynq API is different
warn("external_weights are not yet supported for non-Alveo builds")
return
for idma_name in tmp_weight_dict.keys():
if idma_name in self.ip_dict.keys():
iwdma = getattr(self, idma_name)
weight_tensor = tmp_weight_dict[idma_name]
weight_buf = allocate(weight_tensor.shape, dtype=np.uint8)
weight_buf[:] = weight_tensor
weight_buf.sync_to_device()
self.external_weights +=[(iwdma,weight_buf)]
if "number_of_external_weights" in self._io_shape_dict:
hw_ext_weights = self._io_shape_dict["number_of_external_weights"]
assert len(self.external_weights) == hw_ext_weights, (
"Number of hardware external weights and number of external " +
"weight tensors available do not match. \n"+
"Is runtime_weight_dir pointing to the correct folder?")
def load_runtime_weights(self, flush_accel=True, verify=True): def load_runtime_weights(self, flush_accel=True, verify=True):
"""Load any existing runtime weights from the specified dir into the """Load any existing runtime weights from the specified dir into the
appropriate layer of the accelerator. Note that this must be enabled appropriate layer of the accelerator. Note that this must be enabled
...@@ -122,6 +188,8 @@ class FINNExampleOverlay(Overlay): ...@@ -122,6 +188,8 @@ class FINNExampleOverlay(Overlay):
if w_filename.endswith(".dat"): if w_filename.endswith(".dat"):
with open(self.runtime_weight_dir + "/" + w_filename, "r") as f: with open(self.runtime_weight_dir + "/" + w_filename, "r") as f:
dat = f.read() dat = f.read()
else:
continue
layer_w = np.fromiter( layer_w = np.fromiter(
[int(x, 16) for x in dat.strip().split()], dtype=np.uint32 [int(x, 16) for x in dat.strip().split()], dtype=np.uint32
) )
...@@ -288,6 +356,8 @@ class FINNExampleOverlay(Overlay): ...@@ -288,6 +356,8 @@ class FINNExampleOverlay(Overlay):
elif self.platform == "alveo": elif self.platform == "alveo":
assert self.odma_handle is None, "Output DMA is already running" assert self.odma_handle is None, "Output DMA is already running"
self.idma.start(self.ibuf_packed_device, batch_size) self.idma.start(self.ibuf_packed_device, batch_size)
for iwdma, iwbuf in self.external_weights:
iwdma.start(iwbuf,batch_size)
self.odma_handle = self.odma.start(self.obuf_packed_device, batch_size) self.odma_handle = self.odma.start(self.obuf_packed_device, batch_size)
else: else:
raise Exception("Unrecognized platform: %s" % self.platform) raise Exception("Unrecognized platform: %s" % self.platform)
......
...@@ -38,6 +38,24 @@ import warnings ...@@ -38,6 +38,24 @@ import warnings
import pkg_resources as pk import pkg_resources as pk
from . import template_driver from . import template_driver
from finn.core.modelwrapper import ModelWrapper from finn.core.modelwrapper import ModelWrapper
import numpy as np
from finn.util.data_packing import pack_innermost_dim_as_hex_string, hexstring2npbytearray
from finn.util.basic import (
roundup_to_integer_multiple,
)
def to_external_tensor(init,w_dtype):
weight_width = init.shape[1]*w_dtype.bitwidth()
weight_width_padded = roundup_to_integer_multiple(weight_width, 4)
hex_init = pack_innermost_dim_as_hex_string(init,w_dtype,weight_width_padded, prefix="0x")
ext_weight=np.array([], dtype=np.uint8)
for line in hex_init:
array_line = [x for x in reversed(hexstring2npbytearray(line, remove_prefix="0x"))]
ext_weight = np.append( ext_weight, array_line)
return ext_weight
class MakePYNQDriver(Transformation): class MakePYNQDriver(Transformation):
"""Create PYNQ Python code to correctly interface the generated """Create PYNQ Python code to correctly interface the generated
...@@ -98,7 +116,38 @@ class MakePYNQDriver(Transformation): ...@@ -98,7 +116,38 @@ class MakePYNQDriver(Transformation):
) )
i_tensor_shape_packed = i_tensor_dummy_packed.shape i_tensor_shape_packed = i_tensor_dummy_packed.shape
o_tensor_shape_packed = o_tensor_dummy_packed.shape o_tensor_shape_packed = o_tensor_dummy_packed.shape
#generate external weights npy files
weights_dir = pynq_driver_dir + "/runtime_weights"
os.makedirs(weights_dir)
idma_idx = 0
ext_weight_dma_cnt = 0
for node in model.graph.node:
assert node.op_type == "StreamingDataflowPartition", (
"CreateDataflowPartition needs to be applied before driver generation")
producer = model.find_producer(node.input[0])
init_tensor = model.get_initializer(node.input[0])
if producer is None : # input dma?
idma_name = "idma" + str(idma_idx)
if init_tensor is not None: # input weights dma?
ext_weight_dma_cnt += 1
w_dtype = model.get_tensor_datatype(node.input[0])
init_external_tensor = to_external_tensor(init_tensor,w_dtype)
np.save(weights_dir+"/"+ idma_name+".npy",init_external_tensor)
if self.platform != "alveo":
#Todo: add support in driver_base.py
warn("external_weights are not yet supported for non-Alveo builds")
else:
net_input_name = idma_name
idma_idx += 1
# fill in the driver template # fill in the driver template
driver_py = pynq_driver_dir + "/driver.py" driver_py = pynq_driver_dir + "/driver.py"
driver = template_driver.pynq_driver_template driver = template_driver.pynq_driver_template
...@@ -122,6 +171,8 @@ class MakePYNQDriver(Transformation): ...@@ -122,6 +171,8 @@ class MakePYNQDriver(Transformation):
driver = driver.replace("$OUTPUT_SHAPE_NORMAL$", mss(o_tensor_shape_normal)) driver = driver.replace("$OUTPUT_SHAPE_NORMAL$", mss(o_tensor_shape_normal))
driver = driver.replace("$OUTPUT_SHAPE_FOLDED$", mss(o_tensor_shape_folded)) driver = driver.replace("$OUTPUT_SHAPE_FOLDED$", mss(o_tensor_shape_folded))
driver = driver.replace("$OUTPUT_SHAPE_PACKED$", mss(o_tensor_shape_packed)) driver = driver.replace("$OUTPUT_SHAPE_PACKED$", mss(o_tensor_shape_packed))
driver = driver.replace("$INPUT_DMA_NAME$", "'%s'" % net_input_name)
driver = driver.replace("$EXT_WEIGHT_NUM$", str(ext_weight_dma_cnt) )
with open(driver_py, "w") as f: with open(driver_py, "w") as f:
f.write(driver) f.write(driver)
...@@ -148,9 +199,7 @@ class MakePYNQDriver(Transformation): ...@@ -148,9 +199,7 @@ class MakePYNQDriver(Transformation):
shutil.copytree(dtp_root, pynq_driver_dir + "/finn/core") shutil.copytree(dtp_root, pynq_driver_dir + "/finn/core")
# generate weight files for runtime-writable layers # generate weight files for runtime-writable layers
weights_dir = pynq_driver_dir + "/runtime_weights"
os.makedirs(weights_dir)
for sdp_ind, sdp_node in enumerate(model.graph.node): for sdp_ind, sdp_node in enumerate(model.graph.node):
assert sdp_node.op_type == "StreamingDataflowPartition" assert sdp_node.op_type == "StreamingDataflowPartition"
# get dataflow model # get dataflow model
...@@ -174,4 +223,7 @@ class MakePYNQDriver(Transformation): ...@@ -174,4 +223,7 @@ class MakePYNQDriver(Transformation):
) )
else: else:
continue continue
return (model, False) return (model, False)
\ No newline at end of file
...@@ -77,7 +77,9 @@ io_shape_dict = { ...@@ -77,7 +77,9 @@ io_shape_dict = {
"ishape_folded" : $INPUT_SHAPE_FOLDED$, "ishape_folded" : $INPUT_SHAPE_FOLDED$,
"oshape_folded" : $OUTPUT_SHAPE_FOLDED$, "oshape_folded" : $OUTPUT_SHAPE_FOLDED$,
"ishape_packed" : $INPUT_SHAPE_PACKED$, "ishape_packed" : $INPUT_SHAPE_PACKED$,
"oshape_packed" : $OUTPUT_SHAPE_PACKED$ "oshape_packed" : $OUTPUT_SHAPE_PACKED$,
"input_dma_name" : $INPUT_DMA_NAME$,
"number_of_external_weights": $EXT_WEIGHT_NUM$
} }
if __name__ == "__main__": if __name__ == "__main__":
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment