diff --git a/src/finn/qnn-data/templates/driver/driver_base.py b/src/finn/qnn-data/templates/driver/driver_base.py index 6700019a4a430d785967a684ad1ca8d186d32bae..dcc4ea55a03bf01a06df2853bd8c1430733eccee 100644 --- a/src/finn/qnn-data/templates/driver/driver_base.py +++ b/src/finn/qnn-data/templates/driver/driver_base.py @@ -36,7 +36,6 @@ from finn.util.data_packing import ( finnpy_to_packed_bytearray, packed_bytearray_to_finnpy, ) -from warnings import warn from finn.util.basic import gen_finn_dt_tensor @@ -141,20 +140,16 @@ class FINNExampleOverlay(Overlay): idma_name = w_filename.split(".")[0] tmp_weight_dict[idma_name] = weight_tensor - if self.platform != "alveo" and len(tmp_weight_dict) > 0: - # Todo: add zynq support pynq API is different - warn("external_weights are not yet supported for non-Alveo builds") - return - for idma_name in tmp_weight_dict.keys(): if idma_name in self.ip_dict.keys(): iwdma = getattr(self, idma_name) weight_tensor = tmp_weight_dict[idma_name] weight_buf = allocate(weight_tensor.shape, dtype=np.uint8) weight_buf[:] = weight_tensor - weight_buf.sync_to_device() + # weight_buf.sync_to_device() + weight_buf.flush() - self.external_weights += [(iwdma, weight_buf)] + self.external_weights += [(iwdma, weight_buf, idma_name)] if "number_of_external_weights" in self._io_shape_dict: hw_ext_weights = self._io_shape_dict["number_of_external_weights"] @@ -351,6 +346,10 @@ class FINNExampleOverlay(Overlay): if self.platform == "zynq-iodma": assert self.odma.read(0x00) & 0x4 != 0, "Output DMA is not idle" # manually launch IODMAs since signatures are missing + for iwdma, iwbuf, iwdma_name in self.external_weights: + iwdma.write(0x10, iwbuf.device_address) + iwdma.write(0x1C, batch_size) + iwdma.write(0x00, 1) self.idma.write(0x10, self.ibuf_packed_device.device_address) self.idma.write(0x1C, batch_size) self.odma.write(0x10, self.obuf_packed_device.device_address) @@ -360,7 +359,7 @@ class FINNExampleOverlay(Overlay): elif self.platform == "alveo": assert self.odma_handle is None, "Output DMA is already running" self.idma.start(self.ibuf_packed_device, batch_size) - for iwdma, iwbuf in self.external_weights: + for iwdma, iwbuf, iwdma_name in self.external_weights: iwdma.start(iwbuf, batch_size) self.odma_handle = self.odma.start(self.obuf_packed_device, batch_size) else: @@ -413,6 +412,10 @@ class FINNExampleOverlay(Overlay): res["DRAM_out_bandwidth[Mb/s]"] = ( np.prod(self.oshape_packed) * 0.000001 / runtime ) + for iwdma, iwbuf, iwdma_name in self.external_weights: + res["DRAM_extw_%s_bandwidth[Mb/s]" % iwdma_name] = ( + self.batch_size * np.prod(iwbuf.shape) * 0.000001 / runtime + ) if self.platform == "zynq-iodma": res["fclk[mhz]"] = Clocks.fclk0_mhz elif self.platform == "alveo":