diff --git a/src/finn/qnn-data/templates/driver/driver_base.py b/src/finn/qnn-data/templates/driver/driver_base.py index 58ef94bfd7a03bb0d5fb8e5a7bf5695f6b961527..ac33705f0e14a0493f044c1bdaace271f860d70f 100644 --- a/src/finn/qnn-data/templates/driver/driver_base.py +++ b/src/finn/qnn-data/templates/driver/driver_base.py @@ -36,6 +36,7 @@ from finn.util.data_packing import ( finnpy_to_packed_bytearray, packed_bytearray_to_finnpy, ) +from warnings import warn # Driver base class for FINN-generated dataflow accelerators. # The particulars of the generated accelerator are specified via the @@ -84,11 +85,17 @@ class FINNExampleOverlay(Overlay): self.batch_size = batch_size self.fclk_mhz = fclk_mhz if self.platform == "alveo": - self.idma = self.idma0 + if "input_dma_name" in runtime_weight_dir.keys(): + self.idma = getattr(self,runtime_weight_dir["input_dma_name"]) + else: + self.idma = self.idma0 self.odma = self.odma0 self.odma_handle = None elif self.platform == "zynq-iodma": - self.idma = self.idma0 + if "input_dma_name" in runtime_weight_dir.keys(): + self.idma = getattr(self,runtime_weight_dir["input_dma_name"]) + else: + self.idma = self.idma0 self.odma = self.odma0 # set the clock frequency as specified by user during transformations if self.fclk_mhz > 0: @@ -97,6 +104,59 @@ class FINNExampleOverlay(Overlay): raise ValueError("Supported platforms are zynq-iodma alveo") # load any runtime weights self.load_runtime_weights() + self.external_weights = [] + self.load_external_weights() + + def load_external_weights(self): + """Load any existing runtime weights from the specified dir into the + appropriate layer of the accelerator. Note that this must be enabled + during the accelerator build process. The runtime weights directory + is specified as the class member ``runtime_weight_dir``. + + Parameters + ---------- + flush_accel: bool + Run the accelerator with dummy input after weights are written to + flush any stale weight data in the weight streamer FIFOs. + verify: bool + Whether the written weights will be re-read and verified. + """ + + + w_filenames = [] + if not os.path.isdir(self.runtime_weight_dir): + return + for (dirpath, dirnames, filenames) in os.walk(self.runtime_weight_dir): + w_filenames.extend(filenames) + + tmp_weight_dict = {} + + for w_filename in w_filenames: + if w_filename.endswith(".npy"): + weight_tensor = np.load(self.runtime_weight_dir + "/" + w_filename) + else: + continue + + idma_name = w_filename.split(".")[0] + tmp_weight_dict[idma_name] = weight_tensor + + + if self.platform != "alveo" and len(tmp_weight_dict)>0: + #Todo: add zynq support pynq API is different + warn("external_weights are not yet supported for non-Alveo builds") + return + + for idma_name in tmp_weight_dict.keys(): + if idma_name in self.ip_dict.keys(): + iwdma = getattr(self, idma_name) + weight_tensor = tmp_weight_dict[idma_name] + weight_buf = pynq.allocate(weight_tensor.shape, dtype=np.uint8) + weight_buf[:] = weight_tensor + weight_buf.sync_to_device() + + self.external_weights +=[(iwdma,weight_buf)] + + def load_runtime_weights(self, flush_accel=True, verify=True): """Load any existing runtime weights from the specified dir into the @@ -122,6 +182,8 @@ class FINNExampleOverlay(Overlay): if w_filename.endswith(".dat"): with open(self.runtime_weight_dir + "/" + w_filename, "r") as f: dat = f.read() + else: + continue layer_w = np.fromiter( [int(x, 16) for x in dat.strip().split()], dtype=np.uint32 ) @@ -288,6 +350,8 @@ class FINNExampleOverlay(Overlay): elif self.platform == "alveo": assert self.odma_handle is None, "Output DMA is already running" self.idma.start(self.ibuf_packed_device, batch_size) + for iwdma, iwbuf in self.external_weights: + iwdma.start(iwbuf,batch_size) self.odma_handle = self.odma.start(self.obuf_packed_device, batch_size) else: raise Exception("Unrecognized platform: %s" % self.platform) diff --git a/src/finn/transformation/fpgadataflow/make_pynq_driver.py b/src/finn/transformation/fpgadataflow/make_pynq_driver.py index ac5cdb4f5389cd2f30f180773741f140b7145822..5906fbc9ac452960e2b25f3f8748825e7cce6650 100644 --- a/src/finn/transformation/fpgadataflow/make_pynq_driver.py +++ b/src/finn/transformation/fpgadataflow/make_pynq_driver.py @@ -38,6 +38,37 @@ import warnings import pkg_resources as pk from . import template_driver from finn.core.modelwrapper import ModelWrapper +import numpy as np +from bitstring import BitArray + + +def to_external_tensor(init,w_dtype): + driver_datatype_width = 8 # driver_base.py assumes uint8 + + init_f =init.flatten() + dim = np.prod(init.shape) + weight_width = w_dtype.bitwidth() + assert (dim*weight_width) %8 == 0, "Weight tensor not supported as external weight" + + ext_weight_size = int(dim*weight_width/8) + ext_weight_tensor = np.zeros(ext_weight_size) + + ext_weight_ptr = 0 + mem_line = BitArray(length=0) + for w in init_f: + if w_dtype.signed(): + mem_line.prepend(BitArray(int=int(w), length=weight_width)) + else: + mem_line.prepend(BitArray(uint=int(w), length=weight_width)) + + if mem_line.len == driver_datatype_width: + ext_weight_tensor[ext_weight_ptr] = mem_line.uint + + mem_line = BitArray(length=0) + ext_weight_ptr +=1 + + assert ext_weight_ptr == ext_weight_size + return ext_weight_tensor class MakePYNQDriver(Transformation): """Create PYNQ Python code to correctly interface the generated @@ -98,7 +129,36 @@ class MakePYNQDriver(Transformation): ) i_tensor_shape_packed = i_tensor_dummy_packed.shape o_tensor_shape_packed = o_tensor_dummy_packed.shape - + + + #generate external weights npy files + weights_dir = pynq_driver_dir + "/runtime_weights" + + os.makedirs(weights_dir) + idma_idx = 0 + + for node in model.graph.node: + assert node.op_type == "StreamingDataflowPartition", ( + "CreateDataflowPartition needs to be applied before driver generation") + + producer = model.find_producer(node.input[0]) + init_tensor = model.get_initializer(node.input[0]) + + if producer is None : # input dma? + idma_name = "idma" + str(idma_idx) + if init_tensor is not None: # input weights dma? + w_dtype = model.get_tensor_datatype(node.input[0]) + init_external_tensor = to_external_tensor(init_tensor,w_dtype) + np.save(weights_dir+"/"+ idma_name+".npy",init_external_tensor) + if self.platform != "alveo": + #Todo: add support in driver_base.py + warn("external_weights are not yet supported for non-Alveo builds") + else: + net_input_name = idma_name + + idma_idx += 1 + + # fill in the driver template driver_py = pynq_driver_dir + "/driver.py" driver = template_driver.pynq_driver_template @@ -122,6 +182,7 @@ class MakePYNQDriver(Transformation): driver = driver.replace("$OUTPUT_SHAPE_NORMAL$", mss(o_tensor_shape_normal)) driver = driver.replace("$OUTPUT_SHAPE_FOLDED$", mss(o_tensor_shape_folded)) driver = driver.replace("$OUTPUT_SHAPE_PACKED$", mss(o_tensor_shape_packed)) + driver = driver.replace("$INPUT_DMA_NAME$", "'%s'" %net_input_name) with open(driver_py, "w") as f: f.write(driver) @@ -148,9 +209,7 @@ class MakePYNQDriver(Transformation): shutil.copytree(dtp_root, pynq_driver_dir + "/finn/core") # generate weight files for runtime-writable layers - weights_dir = pynq_driver_dir + "/runtime_weights" - os.makedirs(weights_dir) for sdp_ind, sdp_node in enumerate(model.graph.node): assert sdp_node.op_type == "StreamingDataflowPartition" # get dataflow model @@ -174,4 +233,7 @@ class MakePYNQDriver(Transformation): ) else: continue + + + return (model, False) \ No newline at end of file diff --git a/src/finn/transformation/fpgadataflow/template_driver.py b/src/finn/transformation/fpgadataflow/template_driver.py index b595205714d8cb630816d2b42fe96640e49e506e..11415a0224879c342c7a1cd08dadbfc43f3fa645 100644 --- a/src/finn/transformation/fpgadataflow/template_driver.py +++ b/src/finn/transformation/fpgadataflow/template_driver.py @@ -78,6 +78,7 @@ io_shape_dict = { "oshape_folded" : $OUTPUT_SHAPE_FOLDED$, "ishape_packed" : $INPUT_SHAPE_PACKED$, "oshape_packed" : $OUTPUT_SHAPE_PACKED$ + "input_dma_name" : $INPUT_DMA_NAME$ } if __name__ == "__main__":