diff --git a/src/finn/transformation/fpgadataflow/make_pynq_driver.py b/src/finn/transformation/fpgadataflow/make_pynq_driver.py index 18d3db18da089a5dda4dbb6d97180dd4a20613b5..c76a38804f8f06b8ba184df28b2395c7bdf1aa40 100644 --- a/src/finn/transformation/fpgadataflow/make_pynq_driver.py +++ b/src/finn/transformation/fpgadataflow/make_pynq_driver.py @@ -28,6 +28,7 @@ import os import shutil +import warnings from finn.custom_op.registry import getCustomOp from finn.transformation import Transformation @@ -53,7 +54,7 @@ class MakePYNQDriver(Transformation): def apply(self, model): vivado_pynq_proj = model.get_metadata_prop("vivado_pynq_proj") if vivado_pynq_proj is None or (not os.path.isdir(vivado_pynq_proj)): - raise Exception("No PYNQ project found, apply MakePYNQProject first.") + warnings.warn("No PYNQ project found, apply MakePYNQProject first.") # create a temporary folder for the generated driver pynq_driver_dir = make_build_dir(prefix="pynq_driver_") @@ -109,6 +110,11 @@ class MakePYNQDriver(Transformation): # clock settings for driver clk_ns = float(model.get_metadata_prop("clk_ns")) + # default to 10ns / 100 MHz if property not set + if clk_ns is None: + clk_ns = 10.0 + else: + clk_ns = float(clk_ns) fclk_mhz = 1 / (clk_ns * 0.001) # TODO change according to PYNQ board? driver = driver.replace("$CLK_NAME$", "fclk0_mhz") diff --git a/src/finn/transformation/fpgadataflow/templates.py b/src/finn/transformation/fpgadataflow/templates.py index ab9fd03251819aee72f74cc0c1fa17b99b1e05a4..5c351f75c8ce8512db8874105a1b14f223f7b50d 100644 --- a/src/finn/transformation/fpgadataflow/templates.py +++ b/src/finn/transformation/fpgadataflow/templates.py @@ -104,7 +104,7 @@ from finn.core.datatype import DataType from pynq.ps import Clocks class FINNAccelDriver(): - def __init__(self, N, bitfile): + def __init__(self, N, bitfile, platform="zynq"): \"\"\"Instantiate the FINN accelerator driver. Gets batchsize (N) as integer and path to bitfile as string.\"\"\" self.N = N @@ -119,21 +119,29 @@ class FINNAccelDriver(): self.oshape_folded = $OUTPUT_SHAPE_FOLDED$ self.ishape_packed = $INPUT_SHAPE_PACKED$ # datatype np.uint8 self.oshape_packed = $OUTPUT_SHAPE_PACKED$ # datatype np.uint8 - # clock frequency - self.fclk_mhz = $CLOCK_FREQ_MHZ$ # load bitfile and set up accelerator self.ol = Overlay(bitfile) - # set the clock frequency as specified by user during transformations - Clocks.$CLK_NAME$ = self.fclk_mhz - self.dma = self.ol.axi_dma_0 - self.ctrl_regs = self.ol.resize_accel_0 # neuron folding factor of output = iterations per sample self.itersPerSample = self.oshape_packed[-2] - # AXI lite register offset for number of iterations - # used by TLastMarker to signal end of transmission for AXI CDMA - self.REG_OFFSET_NUM_ITERS = 0x10 - # set up TLastMarker with correct num. samples - self.ctrl_regs.write(self.REG_OFFSET_NUM_ITERS, self.N*self.itersPerSample) + if self.platform == "zynq": + # clock frequency + self.fclk_mhz = $CLOCK_FREQ_MHZ$ + # set the clock frequency as specified by user during transformations + if fclk_mhz > 0: + Clocks.$CLK_NAME$ = self.fclk_mhz + self.dma = self.ol.axi_dma_0 + self.ctrl_regs = self.ol.resize_accel_0 + + # AXI lite register offset for number of iterations + # used by TLastMarker to signal end of transmission for AXI CDMA + self.REG_OFFSET_NUM_ITERS = 0x10 + # set up TLastMarker with correct num. samples + self.ctrl_regs.write(self.REG_OFFSET_NUM_ITERS, self.N*self.itersPerSample) + elif self.platform == "alveo": + self.idma = self.ol.idma0 + self.odma = self.ol.odma0 + else: + raise ValueError("Supported platforms are zynq and alveo") # allocate a PYNQ buffer for the packed input and buffer self.ibuf_packed_device = allocate(shape=self.ishape_packed, dtype=np.uint8) @@ -176,19 +184,29 @@ class FINNAccelDriver(): np.copyto(self.ibuf_packed_device, data) def execute(self): - \"\"\"Executes accelerator by setting up the DMA and - waiting until all transfers complete. Uses only member variables and + \"\"\"Executes accelerator by setting up the DMA(s) and + waiting until all transfers/calls complete. Uses only member variables and returns nothing.\"\"\" - dma = self.dma - dma.sendchannel.transfer(self.ibuf_packed_device) - dma.recvchannel.transfer(self.obuf_packed_device) - dma.sendchannel.wait() - dma.recvchannel.wait() + if self.platform == "zynq": + dma = self.dma + dma.sendchannel.transfer(self.ibuf_packed_device) + dma.recvchannel.transfer(self.obuf_packed_device) + dma.sendchannel.wait() + dma.recvchannel.wait() + else: + self.ibuf_packed_device.sync_to_device() + self.idma.start(self.ibuf_packed_device, self.N) + self.odma.start(self.obuf_packed_device, self.N) + self.idma.wait() + self.odma.wait() + self.obuf_packed_device.sync_from_device() + if __name__ == "__main__": parser = argparse.ArgumentParser(description='Set exec mode, batchsize N, bitfile name, inputfile name and outputfile name') parser.add_argument('--exec_mode', help='Please select functional verification ("execute") or throughput test ("throughput_test")', default="execute") + parser.add_argument('--platform', help='Target platform, zynq or alveo', default="zynq") parser.add_argument('--batchsize', help='number of samples for inference', type=int, default=1) parser.add_argument('--bitfile', help='name of bitfile (i.e. "resizer.bit")', default="resizer.bit") parser.add_argument('--inputfile', help='name of input npy file (i.e. "input.npy")', default="input.npy") @@ -196,13 +214,14 @@ if __name__ == "__main__": # parse arguments args = parser.parse_args() exec_mode = args.exec_mode + platform = args.platform N = args.batchsize bitfile = args.bitfile inputfile = args.inputfile outputfile = args.outputfile # instantiate FINN accelerator driver and pass batchsize and bitfile - finnDriver = FINNAccelDriver(N, bitfile) + finnDriver = FINNAccelDriver(N, bitfile, platform) # for the remote execution the data from the input npy file has to be loaded, # packed and copied to the PYNQ buffer