diff --git a/src/finn/qnn-data/templates/driver/driver_base.py b/src/finn/qnn-data/templates/driver/driver_base.py index 58ef94bfd7a03bb0d5fb8e5a7bf5695f6b961527..01f332b3b353ff89c1b70b2cd7e749374fd392e1 100644 --- a/src/finn/qnn-data/templates/driver/driver_base.py +++ b/src/finn/qnn-data/templates/driver/driver_base.py @@ -36,6 +36,7 @@ from finn.util.data_packing import ( finnpy_to_packed_bytearray, packed_bytearray_to_finnpy, ) +from warnings import warn # Driver base class for FINN-generated dataflow accelerators. # The particulars of the generated accelerator are specified via the @@ -84,11 +85,17 @@ class FINNExampleOverlay(Overlay): self.batch_size = batch_size self.fclk_mhz = fclk_mhz if self.platform == "alveo": - self.idma = self.idma0 + if "input_dma_name" in io_shape_dict.keys(): + self.idma = getattr(self,io_shape_dict["input_dma_name"]) + else: + self.idma = self.idma0 self.odma = self.odma0 self.odma_handle = None elif self.platform == "zynq-iodma": - self.idma = self.idma0 + if "input_dma_name" in io_shape_dict.keys(): + self.idma = getattr(self,io_shape_dict["input_dma_name"]) + else: + self.idma = self.idma0 self.odma = self.odma0 # set the clock frequency as specified by user during transformations if self.fclk_mhz > 0: @@ -96,8 +103,67 @@ class FINNExampleOverlay(Overlay): else: raise ValueError("Supported platforms are zynq-iodma alveo") # load any runtime weights + self.external_weights = [] + self.load_external_weights() self.load_runtime_weights() + def load_external_weights(self): + """Load any existing runtime weights from the specified dir into the + appropriate layer of the accelerator. Note that this must be enabled + during the accelerator build process. The runtime weights directory + is specified as the class member ``runtime_weight_dir``. + + Parameters + ---------- + flush_accel: bool + Run the accelerator with dummy input after weights are written to + flush any stale weight data in the weight streamer FIFOs. + verify: bool + Whether the written weights will be re-read and verified. + """ + + + w_filenames = [] + if not os.path.isdir(self.runtime_weight_dir): + return + for (dirpath, dirnames, filenames) in os.walk(self.runtime_weight_dir): + w_filenames.extend(filenames) + + tmp_weight_dict = {} + + for w_filename in w_filenames: + if w_filename.endswith(".npy"): + weight_tensor = np.load(self.runtime_weight_dir + "/" + w_filename) + else: + continue + + idma_name = w_filename.split(".")[0] + tmp_weight_dict[idma_name] = weight_tensor + + + if self.platform != "alveo" and len(tmp_weight_dict)>0: + #Todo: add zynq support pynq API is different + warn("external_weights are not yet supported for non-Alveo builds") + return + + for idma_name in tmp_weight_dict.keys(): + if idma_name in self.ip_dict.keys(): + iwdma = getattr(self, idma_name) + weight_tensor = tmp_weight_dict[idma_name] + weight_buf = allocate(weight_tensor.shape, dtype=np.uint8) + weight_buf[:] = weight_tensor + weight_buf.sync_to_device() + + self.external_weights +=[(iwdma,weight_buf)] + + if "number_of_external_weights" in self._io_shape_dict: + hw_ext_weights = self._io_shape_dict["number_of_external_weights"] + assert len(self.external_weights) == hw_ext_weights, ( + "Number of hardware external weights and number of external " + + "weight tensors available do not match. \n"+ + "Is runtime_weight_dir pointing to the correct folder?") + + def load_runtime_weights(self, flush_accel=True, verify=True): """Load any existing runtime weights from the specified dir into the appropriate layer of the accelerator. Note that this must be enabled @@ -122,6 +188,8 @@ class FINNExampleOverlay(Overlay): if w_filename.endswith(".dat"): with open(self.runtime_weight_dir + "/" + w_filename, "r") as f: dat = f.read() + else: + continue layer_w = np.fromiter( [int(x, 16) for x in dat.strip().split()], dtype=np.uint32 ) @@ -288,6 +356,8 @@ class FINNExampleOverlay(Overlay): elif self.platform == "alveo": assert self.odma_handle is None, "Output DMA is already running" self.idma.start(self.ibuf_packed_device, batch_size) + for iwdma, iwbuf in self.external_weights: + iwdma.start(iwbuf,batch_size) self.odma_handle = self.odma.start(self.obuf_packed_device, batch_size) else: raise Exception("Unrecognized platform: %s" % self.platform) diff --git a/src/finn/transformation/fpgadataflow/make_pynq_driver.py b/src/finn/transformation/fpgadataflow/make_pynq_driver.py index ac5cdb4f5389cd2f30f180773741f140b7145822..de5e180f59b1d731e71e523b342ccd8c0282c464 100644 --- a/src/finn/transformation/fpgadataflow/make_pynq_driver.py +++ b/src/finn/transformation/fpgadataflow/make_pynq_driver.py @@ -38,6 +38,24 @@ import warnings import pkg_resources as pk from . import template_driver from finn.core.modelwrapper import ModelWrapper +import numpy as np + +from finn.util.data_packing import pack_innermost_dim_as_hex_string, hexstring2npbytearray +from finn.util.basic import ( + roundup_to_integer_multiple, +) + + +def to_external_tensor(init,w_dtype): + weight_width = init.shape[1]*w_dtype.bitwidth() + weight_width_padded = roundup_to_integer_multiple(weight_width, 4) + hex_init = pack_innermost_dim_as_hex_string(init,w_dtype,weight_width_padded, prefix="0x") + ext_weight=np.array([], dtype=np.uint8) + for line in hex_init: + array_line = [x for x in reversed(hexstring2npbytearray(line, remove_prefix="0x"))] + ext_weight = np.append( ext_weight, array_line) + + return ext_weight class MakePYNQDriver(Transformation): """Create PYNQ Python code to correctly interface the generated @@ -98,7 +116,38 @@ class MakePYNQDriver(Transformation): ) i_tensor_shape_packed = i_tensor_dummy_packed.shape o_tensor_shape_packed = o_tensor_dummy_packed.shape - + + + #generate external weights npy files + weights_dir = pynq_driver_dir + "/runtime_weights" + + os.makedirs(weights_dir) + idma_idx = 0 + ext_weight_dma_cnt = 0 + + for node in model.graph.node: + assert node.op_type == "StreamingDataflowPartition", ( + "CreateDataflowPartition needs to be applied before driver generation") + + producer = model.find_producer(node.input[0]) + init_tensor = model.get_initializer(node.input[0]) + + if producer is None : # input dma? + idma_name = "idma" + str(idma_idx) + if init_tensor is not None: # input weights dma? + ext_weight_dma_cnt += 1 + w_dtype = model.get_tensor_datatype(node.input[0]) + init_external_tensor = to_external_tensor(init_tensor,w_dtype) + np.save(weights_dir+"/"+ idma_name+".npy",init_external_tensor) + if self.platform != "alveo": + #Todo: add support in driver_base.py + warn("external_weights are not yet supported for non-Alveo builds") + else: + net_input_name = idma_name + + idma_idx += 1 + + # fill in the driver template driver_py = pynq_driver_dir + "/driver.py" driver = template_driver.pynq_driver_template @@ -122,6 +171,8 @@ class MakePYNQDriver(Transformation): driver = driver.replace("$OUTPUT_SHAPE_NORMAL$", mss(o_tensor_shape_normal)) driver = driver.replace("$OUTPUT_SHAPE_FOLDED$", mss(o_tensor_shape_folded)) driver = driver.replace("$OUTPUT_SHAPE_PACKED$", mss(o_tensor_shape_packed)) + driver = driver.replace("$INPUT_DMA_NAME$", "'%s'" % net_input_name) + driver = driver.replace("$EXT_WEIGHT_NUM$", str(ext_weight_dma_cnt) ) with open(driver_py, "w") as f: f.write(driver) @@ -148,9 +199,7 @@ class MakePYNQDriver(Transformation): shutil.copytree(dtp_root, pynq_driver_dir + "/finn/core") # generate weight files for runtime-writable layers - weights_dir = pynq_driver_dir + "/runtime_weights" - os.makedirs(weights_dir) for sdp_ind, sdp_node in enumerate(model.graph.node): assert sdp_node.op_type == "StreamingDataflowPartition" # get dataflow model @@ -174,4 +223,7 @@ class MakePYNQDriver(Transformation): ) else: continue + + + return (model, False) \ No newline at end of file diff --git a/src/finn/transformation/fpgadataflow/template_driver.py b/src/finn/transformation/fpgadataflow/template_driver.py index b595205714d8cb630816d2b42fe96640e49e506e..5265835dd2530a5c93ceefbef629a43d6f33de52 100644 --- a/src/finn/transformation/fpgadataflow/template_driver.py +++ b/src/finn/transformation/fpgadataflow/template_driver.py @@ -77,7 +77,9 @@ io_shape_dict = { "ishape_folded" : $INPUT_SHAPE_FOLDED$, "oshape_folded" : $OUTPUT_SHAPE_FOLDED$, "ishape_packed" : $INPUT_SHAPE_PACKED$, - "oshape_packed" : $OUTPUT_SHAPE_PACKED$ + "oshape_packed" : $OUTPUT_SHAPE_PACKED$, + "input_dma_name" : $INPUT_DMA_NAME$, + "number_of_external_weights": $EXT_WEIGHT_NUM$ } if __name__ == "__main__":