diff --git a/.gitignore b/.gitignore index 8b3166a44070a4575aac86c445c4504b594cda08..d7ee7e014a0c175a8a88060f2aa320efeb501ddc 100644 --- a/.gitignore +++ b/.gitignore @@ -81,3 +81,6 @@ MANIFEST # SSH key dir mounted into Docker /ssh_keys/ + +# PYNQ board files +/board_files/ diff --git a/docker/Dockerfile.finn_ci b/docker/Dockerfile.finn_ci index 7d5772d9f5118d1f1238dd14a6b57a1b4fd5004d..0d122133a6446cb77160c9447e16ff13d4d4b9c5 100644 --- a/docker/Dockerfile.finn_ci +++ b/docker/Dockerfile.finn_ci @@ -37,7 +37,7 @@ RUN apt-get update RUN apt-get -y upgrade RUN apt-get install -y build-essential libglib2.0-0 libsm6 libxext6 libxrender-dev RUN apt-get install -y verilator zsh -RUN apt-get -y install sshpass +RUN apt-get -y install sshpass wget unzip RUN echo "StrictHostKeyChecking no" >> /etc/ssh/ssh_config # cloning dependency repos diff --git a/docker/Dockerfile.finn_dev b/docker/Dockerfile.finn_dev index 8c1502eb4a1941061bd58e6f9a18106f98f259e2..f8e15f34fb4da3dc4ee353a29d26866b68879144 100644 --- a/docker/Dockerfile.finn_dev +++ b/docker/Dockerfile.finn_dev @@ -43,7 +43,7 @@ RUN apt-get update RUN apt-get -y upgrade RUN apt-get install -y build-essential libglib2.0-0 libsm6 libxext6 libxrender-dev RUN apt-get install -y verilator nano zsh rsync -RUN apt-get -y install sshpass +RUN apt-get -y install sshpass wget unzip RUN echo "StrictHostKeyChecking no" >> /etc/ssh/ssh_config COPY requirements.txt . diff --git a/docker/finn_entrypoint.sh b/docker/finn_entrypoint.sh index ee75089c657e4fad1e4a455ac7bd5fe4976e5d4c..72751817383dbdb441970e5816247cfa7760ef5b 100644 --- a/docker/finn_entrypoint.sh +++ b/docker/finn_entrypoint.sh @@ -57,4 +57,19 @@ if [ ! -z "$VITIS_PATH" ];then export XILINX_VITIS=$VITIS_PATH source $VITIS_PATH/settings64.sh fi + +# download PYNQ board files if not already there +if [ ! -d "/workspace/finn/board_files" ]; then + gecho "Downloading PYNQ board files for Vivado" + wget -q https://github.com/cathalmccabe/pynq-z1_board_files/raw/master/pynq-z1.zip + wget -q https://d2m32eurp10079.cloudfront.net/Download/pynq-z2.zip + unzip -q pynq-z1.zip + unzip -q pynq-z2.zip + mkdir /workspace/finn/board_files + mv pynq-z1/ board_files/ + mv pynq-z2/ board_files/ + rm pynq-z1.zip + rm pynq-z2.zip +fi + exec "$@" diff --git a/finn-rtllib/memstream/component.xml b/finn-rtllib/memstream/component.xml index 6b728c0555a4889b8e76d5759233d1109a3002bd..7910a8284dad3674b8665136506a60c498e0547f 100644 --- a/finn-rtllib/memstream/component.xml +++ b/finn-rtllib/memstream/component.xml @@ -1051,6 +1051,7 @@ <xilinx:family xilinx:lifeCycle="Beta">azynq</xilinx:family> <xilinx:family xilinx:lifeCycle="Beta">zynquplus</xilinx:family> <xilinx:family xilinx:lifeCycle="Production">virtexuplus</xilinx:family> + <xilinx:family xilinx:lifeCycle="Production">virtexuplusHBM</xilinx:family> </xilinx:supportedFamilies> <xilinx:taxonomies> <xilinx:taxonomy>/UserIP</xilinx:taxonomy> diff --git a/src/finn/analysis/fpgadataflow/post_synth_res.py b/src/finn/analysis/fpgadataflow/post_synth_res.py index 508c34aaed50f2935f4915cdcea29a3e92641b3c..81accba23220d3f25e8560443ff22cf59d3733e9 100644 --- a/src/finn/analysis/fpgadataflow/post_synth_res.py +++ b/src/finn/analysis/fpgadataflow/post_synth_res.py @@ -30,15 +30,20 @@ import os import xml.etree.ElementTree as ET from finn.transformation.move_reshape import _is_fpgadataflow_node +from finn.core.modelwrapper import ModelWrapper +from finn.custom_op.registry import getCustomOp -def post_synth_res(model): +def post_synth_res(model, override_synth_report_filename=None): """Extracts the FPGA resource results from the Vivado synthesis. Returns {node name : resources_dict}.""" res_dict = {} - synth_report_filename = model.get_metadata_prop("vivado_synth_rpt") + if override_synth_report_filename is not None: + synth_report_filename = override_synth_report_filename + else: + synth_report_filename = model.get_metadata_prop("vivado_synth_rpt") if os.path.isfile(synth_report_filename): tree = ET.parse(synth_report_filename) root = tree.getroot() @@ -50,7 +55,11 @@ def post_synth_res(model): raise Exception("Please run synthesis first") for node in model.graph.node: - if _is_fpgadataflow_node(node): + if node.op_type == "StreamingDataflowPartition": + sdp_model = ModelWrapper(getCustomOp(node).get_nodeattr("model")) + sdp_res_dict = post_synth_res(sdp_model, synth_report_filename) + res_dict.update(sdp_res_dict) + elif _is_fpgadataflow_node(node): row = root.findall(".//*[@contents='%s']/.." % node.name) if row != []: node_dict = {} diff --git a/src/finn/core/onnx_exec.py b/src/finn/core/onnx_exec.py index 7c3123cd5eb29a54dc5cbfb912225ad3fdb0f219..0c01a48a07608dcd760447e8f569128f58d86f28 100644 --- a/src/finn/core/onnx_exec.py +++ b/src/finn/core/onnx_exec.py @@ -51,8 +51,20 @@ def execute_node(node, context, graph): if node.op_type == "StreamingDataflowPartition": sdp_node = getCustomOp(node) model = ModelWrapper(sdp_node.get_nodeattr("model")) - ret = execute_onnx(model, context, True) - context.update(ret) + inp_ctx = dict(filter(lambda x: x[0] in node.input, context.items())) + # input may have been renamed in partition + assert len(inp_ctx) == 1 + old_iname = node.input[0] + new_iname = model.graph.input[0].name + if old_iname != new_iname: + inp_ctx[new_iname] = inp_ctx[old_iname] + del inp_ctx[old_iname] + ret = execute_onnx(model, inp_ctx, False) + # output may have been renamed in partition + assert len(ret) == 1 + node_oname = node.output[0] + model_oname = model.graph.output[0].name + context[node_oname] = ret[model_oname] else: if node.domain == "finn": diff --git a/src/finn/core/remote_exec.py b/src/finn/core/remote_exec.py index a533e4d36629f57f7c4a576570d75a1e051de5be..214358608c43a868f9ef414dcbf6eb33e3f45a5b 100644 --- a/src/finn/core/remote_exec.py +++ b/src/finn/core/remote_exec.py @@ -62,11 +62,15 @@ def remote_exec(model, execution_context): bash_command = ["/bin/bash", "-c", cmd] process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE) process_compile.communicate() + # set platform attribute for correct remote execution + platform = model.get_metadata_prop("platform") + assert platform in ["alveo", "zynq", "zynq-iodma"] cmd = ( "sshpass -p {} ssh {}@{} -p {} " '"cd {}/{}; echo "{}" | ' 'sudo -S python3.6 driver.py --exec_mode="execute" --batchsize=1" ' - '--bitfile="resizer.bit" --inputfile="input.npy" --outputfile="output.npy"' + '--bitfile="resizer.bit" --inputfile="input.npy" --outputfile="output.npy" ' + '--platform="{}" ' ).format( pynq_password, pynq_username, @@ -75,6 +79,7 @@ def remote_exec(model, execution_context): pynq_target_dir, deployment_folder, pynq_password, + platform, ) bash_command = ["/bin/bash", "-c", cmd] process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE) diff --git a/src/finn/custom_op/fpgadataflow/iodma.py b/src/finn/custom_op/fpgadataflow/iodma.py index 05870b8d9d5d3a11bad7882c9a7d122f8cd34cf6..7d0374445d816f1e8d49ed92cf7aa67b024f9ac1 100644 --- a/src/finn/custom_op/fpgadataflow/iodma.py +++ b/src/finn/custom_op/fpgadataflow/iodma.py @@ -197,11 +197,13 @@ class IODMA(HLSCustomOp): def get_number_output_values(self): oshape = self.get_normal_output_shape() itype_bits = self.get_input_datatype().bitwidth() - intfw = self.get_nodeattr("intfWidth") + stream_width = self.get_nodeattr("streamWidth") nelems = np.prod(oshape) nbits = nelems * itype_bits - assert nbits % intfw == 0, "DMA: total transfer size must be word multiple" - ovalues = nbits // intfw + assert ( + nbits % stream_width == 0 + ), "DMA: total transfer size must be word multiple" + ovalues = nbits // stream_width return ovalues def global_includes(self): diff --git a/src/finn/custom_op/fpgadataflow/templates.py b/src/finn/custom_op/fpgadataflow/templates.py index 1da60a5124fa86b4336bae8fd1a587672f2f2e6f..319731df70d5bd1cb80d42932f08acdcec80c074 100644 --- a/src/finn/custom_op/fpgadataflow/templates.py +++ b/src/finn/custom_op/fpgadataflow/templates.py @@ -344,6 +344,7 @@ set_property supported_families { \ virtex7 Production \ virtexu Production \ virtexuplus Production \ + virtexuplusHBM Production \ zynq Production \ zynquplus Production \ aartix7 Production \ diff --git a/src/finn/custom_op/streamingdataflowpartition.py b/src/finn/custom_op/streamingdataflowpartition.py index bce4dde426b8838d6c86638a3641d51ab259a6db..31cd38fea3c5a9e88084c3332d46aebdb065f800 100644 --- a/src/finn/custom_op/streamingdataflowpartition.py +++ b/src/finn/custom_op/streamingdataflowpartition.py @@ -36,7 +36,12 @@ class StreamingDataflowPartition(CustomOp): bitfile by itself.""" def get_nodeattr_types(self): - return {"model": ("s", True, "")} + return { + "model": ("s", True, ""), + "res_estimate": ("s", False, ""), + "res_hls": ("s", False, ""), + "res_synth": ("s", False, ""), + } def make_shape_compatible_op(self, model): pass diff --git a/src/finn/transformation/fpgadataflow/annotate_resources.py b/src/finn/transformation/fpgadataflow/annotate_resources.py index 62ee92df54eee2b63d84657515d7fbc3a8808b81..da6fa1ff738690308a9b7686a5c92d7395ab50c8 100644 --- a/src/finn/transformation/fpgadataflow/annotate_resources.py +++ b/src/finn/transformation/fpgadataflow/annotate_resources.py @@ -32,6 +32,8 @@ from finn.transformation.move_reshape import _is_fpgadataflow_node from finn.analysis.fpgadataflow.res_estimation import res_estimation from finn.analysis.fpgadataflow.hls_synth_res_estimation import hls_synth_res_estimation from finn.analysis.fpgadataflow.post_synth_res import post_synth_res +from finn.core.modelwrapper import ModelWrapper +from finn.custom_op.registry import getCustomOp class AnnotateResources(Transformation): @@ -44,9 +46,10 @@ class AnnotateResources(Transformation): chosen mode (e.g. HLSSynthIP for hls) was previously run. """ - def __init__(self, mode): + def __init__(self, mode, override_res_dict=None): super().__init__() self.mode = mode + self.res_dict = override_res_dict def apply(self, model): graph = model.graph @@ -58,10 +61,33 @@ class AnnotateResources(Transformation): res_fxn = post_synth_res else: raise Exception("Unrecognized mode for AnnotateResources") - res_dict = model.analysis(res_fxn) + if self.res_dict is None: + self.res_dict = model.analysis(res_fxn) + children_dict = {} + # annotate node resources + for node in graph.node: + if _is_fpgadataflow_node(node) and node.name in self.res_dict.keys(): + op_inst = registry.getCustomOp(node) + op_inst.set_nodeattr("res_" + self.mode, str(self.res_dict[node.name])) + children_dict[node.name] = self.res_dict[node.name] + elif node.op_type == "StreamingDataflowPartition": + # recurse into model to manually annotate per-layer resources + sdp_model_filename = getCustomOp(node).get_nodeattr("model") + sdp_model = ModelWrapper(sdp_model_filename) + sdp_model = sdp_model.transform( + AnnotateResources(self.mode, self.res_dict) + ) + sdp_dict = sdp_model.get_metadata_prop("res_total_" + self.mode) + sdp_dict = eval(sdp_dict) + # save transformed model + sdp_model.save(sdp_model_filename) + # set res attribute for sdp node + getCustomOp(node).set_nodeattr("res_" + self.mode, str(sdp_dict)) + children_dict[node.name] = sdp_dict + self.res_dict.update(children_dict) total_dict = {} - for lname in res_dict.keys(): - layer_res_dict = res_dict[lname] + for lname in children_dict.keys(): + layer_res_dict = self.res_dict[lname] for r_type in layer_res_dict.keys(): r_amount = layer_res_dict[r_type] r_amount = float(r_amount) @@ -73,9 +99,4 @@ class AnnotateResources(Transformation): if "efficiency" in k: total_dict[k] = total_dict[k] / len(graph.node) model.set_metadata_prop("res_total_" + self.mode, str(total_dict)) - for node in graph.node: - if _is_fpgadataflow_node(node) and node.name in res_dict.keys(): - op_inst = registry.getCustomOp(node) - op_inst.set_nodeattr("res_" + self.mode, str(res_dict[node.name])) - return (model, False) diff --git a/src/finn/transformation/fpgadataflow/create_dataflow_partition.py b/src/finn/transformation/fpgadataflow/create_dataflow_partition.py index 7197e68be2fbdf5fc39b7ed202e88672614514ec..5ec4ab14d65d63523856a6bb107bf75c1ca5a261 100644 --- a/src/finn/transformation/fpgadataflow/create_dataflow_partition.py +++ b/src/finn/transformation/fpgadataflow/create_dataflow_partition.py @@ -125,6 +125,7 @@ class CreateDataflowPartition(Transformation): [df_out], # use the model attribute to mark the df model model=df_model_filename, + domain="finn", ) non_df_model.graph.node.insert(df_start_ind, df_node) model = non_df_model diff --git a/src/finn/transformation/fpgadataflow/create_stitched_ip.py b/src/finn/transformation/fpgadataflow/create_stitched_ip.py index 018ad385f33a8e0aea4aa42599fd47fe5dae57dd..90b4b6c47e6e353c1b606d6918eb271e9c0619c5 100644 --- a/src/finn/transformation/fpgadataflow/create_stitched_ip.py +++ b/src/finn/transformation/fpgadataflow/create_stitched_ip.py @@ -210,7 +210,8 @@ class CreateStitchedIP(Transformation): assert ( node_inst.get_nodeattr("Direction") == "in" ), """Output TLastMarker incorrect direction""" - elif node.op_type == "IODMA": + elif node.op_type == "IODMA" and len(model.graph.node) != 1: + # don't apply this check for a 1-node partition assert ( node_inst.get_nodeattr("direction") == "in" ), """Input DMA incorrect direction""" @@ -241,17 +242,11 @@ class CreateStitchedIP(Transformation): if model.find_consumers(node.output[0]) is None: # last node in graph self.connect_m_axis_external(node) - # ensure it is a TLastMarker to have a valid TLast signal - assert ( - node.op_type == "TLastMarker" or node.op_type == "IODMA" - ), """Last node is not TLastMarker or DMA. - Please run transformation InsertTLastMarker/InsertIODMA to ensure - a valid TLast signal""" if node.op_type == "TLastMarker": assert ( node_inst.get_nodeattr("Direction") == "out" ), """Output TLastMarker incorrect direction""" - elif node.op_type == "IODMA": + elif node.op_type == "IODMA" and len(model.graph.node) != 1: assert ( node_inst.get_nodeattr("direction") == "out" ), """Output DMA incorrect direction""" diff --git a/src/finn/transformation/fpgadataflow/insert_fifo.py b/src/finn/transformation/fpgadataflow/insert_fifo.py index 85a2d47be0599a852b223f1a65d3ec04efe9bda7..6f7fde0c4faba09e584eb578819f44c18639bc9d 100644 --- a/src/finn/transformation/fpgadataflow/insert_fifo.py +++ b/src/finn/transformation/fpgadataflow/insert_fifo.py @@ -159,7 +159,7 @@ class InsertFIFO(Transformation): # insert FIFO as last node, except when last node is DMA if ( graph.node[-1].op_type != "StreamingFIFO" - and graph.node[0].op_type != "IODMA" + and graph.node[-1].op_type != "IODMA" ): n = graph.node[-1] assert ( diff --git a/src/finn/transformation/fpgadataflow/insert_iodma.py b/src/finn/transformation/fpgadataflow/insert_iodma.py index 0cd7c0d4d41accf8cdba8adfaf4dbb00fc0cab7a..72e5ec4fdd721ecf549adaf7ddd38db4636bce27 100644 --- a/src/finn/transformation/fpgadataflow/insert_iodma.py +++ b/src/finn/transformation/fpgadataflow/insert_iodma.py @@ -81,8 +81,8 @@ class InsertIODMA(Transformation): # check if tensor is NHWC assert ( model.get_tensor_layout(graph_out_name) == DataLayout.NHWC - or model.get_tensor_layout(graph_in_name) == DataLayout.NC - ), "Data layout of tensors must be NHWC or NC" + or model.get_tensor_layout(graph_out_name) == DataLayout.NC + ), "Data layout of output tensor must be NHWC or NC" out_shape = model.get_tensor_shape(graph_out_name) out_dtype = model.get_tensor_datatype(graph_out_name) # determine the feasible interface width @@ -120,7 +120,7 @@ class InsertIODMA(Transformation): assert ( model.get_tensor_layout(graph_in_name) == DataLayout.NHWC or model.get_tensor_layout(graph_in_name) == DataLayout.NC - ), "Data layout of tensors must be NHWC or NC" + ), "Data layout of input tensor must be NHWC or NC" in_shape = model.get_tensor_shape(graph_in_name) in_dtype = model.get_tensor_datatype(graph_in_name) # determine the feasible interface width diff --git a/src/finn/transformation/fpgadataflow/make_pynq_driver.py b/src/finn/transformation/fpgadataflow/make_pynq_driver.py index 1e45a65720604144f67245b98dcbe3f6dc8363f5..a7bf9e6e6279923764009a00e2f805be1b1fa9c0 100644 --- a/src/finn/transformation/fpgadataflow/make_pynq_driver.py +++ b/src/finn/transformation/fpgadataflow/make_pynq_driver.py @@ -26,10 +26,8 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -import os -import shutil -import warnings +import shutil from finn.custom_op.registry import getCustomOp from finn.transformation import Transformation from finn.util.basic import gen_finn_dt_tensor, get_finn_root, make_build_dir @@ -48,14 +46,11 @@ class MakePYNQDriver(Transformation): value. """ - def __init__(self): + def __init__(self, platform): super().__init__() + self.platform = platform def apply(self, model): - vivado_pynq_proj = model.get_metadata_prop("vivado_pynq_proj") - if vivado_pynq_proj is None or (not os.path.isdir(vivado_pynq_proj)): - warnings.warn("No PYNQ project found, apply MakePYNQProject first.") - # create a temporary folder for the generated driver pynq_driver_dir = make_build_dir(prefix="pynq_driver_") model.set_metadata_prop("pynq_driver_dir", pynq_driver_dir) @@ -68,11 +63,21 @@ class MakePYNQDriver(Transformation): o_tensor_shape_normal = tuple(model.get_tensor_shape(o_tensor_name)) i_tensor_dt = model.get_tensor_datatype(i_tensor_name) o_tensor_dt = model.get_tensor_datatype(o_tensor_name) - # extract HLSCustomOp instances to get folded i/o shapes - first_node = getCustomOp(model.find_consumer(i_tensor_name)) - last_node = getCustomOp(model.find_producer(o_tensor_name)) - i_tensor_shape_folded = tuple(first_node.get_folded_input_shape()) - o_tensor_shape_folded = tuple(last_node.get_folded_output_shape()) + # handle folded i/o shapes due to differences in DMA engines + if self.platform == "zynq": + # extract HLSCustomOp instances to get folded i/o shapes + first_node = getCustomOp(model.find_consumer(i_tensor_name)) + last_node = getCustomOp(model.find_producer(o_tensor_name)) + i_tensor_shape_folded = tuple(first_node.get_folded_input_shape()) + o_tensor_shape_folded = tuple(last_node.get_folded_output_shape()) + else: + i_tensor_shape_folded = list(i_tensor_shape_normal) + i_tensor_shape_folded.insert(-1, 1) + i_tensor_shape_folded = tuple(i_tensor_shape_folded) + o_tensor_shape_folded = list(o_tensor_shape_normal) + o_tensor_shape_folded.insert(-1, 1) + o_tensor_shape_folded = tuple(o_tensor_shape_folded) + # generate dummy folded i/o tensors and their packed versions i_tensor_dummy_folded = gen_finn_dt_tensor(i_tensor_dt, i_tensor_shape_folded) o_tensor_dummy_folded = gen_finn_dt_tensor(o_tensor_dt, o_tensor_shape_folded) @@ -99,6 +104,7 @@ class MakePYNQDriver(Transformation): ret = ret.replace("[1,", "[%s," % batch_var_name) return ret + driver = driver.replace("$PLATFORM$", self.platform) driver = driver.replace("$INPUT_FINN_DATATYPE$", str(i_tensor_dt)) driver = driver.replace("$INPUT_SHAPE_NORMAL$", mss(i_tensor_shape_normal)) driver = driver.replace("$INPUT_SHAPE_FOLDED$", mss(i_tensor_shape_folded)) diff --git a/src/finn/transformation/fpgadataflow/make_pynq_proj.py b/src/finn/transformation/fpgadataflow/make_pynq_proj.py index a874d7a7c702e1b3e9125fc031aa65dc287a407d..5e45d6f230503668a15d784e3c6afa45560fe004 100644 --- a/src/finn/transformation/fpgadataflow/make_pynq_proj.py +++ b/src/finn/transformation/fpgadataflow/make_pynq_proj.py @@ -128,6 +128,8 @@ class MakePYNQProject(Transformation): # filename for the synth utilization report synth_report_filename = vivado_pynq_proj_dir + "/synth_report.xml" model.set_metadata_prop("vivado_synth_rpt", synth_report_filename) + # set platform attribute for correct remote execution + model.set_metadata_prop("platform", "zynq") # get metadata property clk_ns to calculate clock frequency clk_ns = float(model.get_metadata_prop("clk_ns")) diff --git a/src/finn/transformation/fpgadataflow/make_zynq_proj.py b/src/finn/transformation/fpgadataflow/make_zynq_proj.py new file mode 100644 index 0000000000000000000000000000000000000000..dfc076ba52ab5911267d807a7513e4840f01edaf --- /dev/null +++ b/src/finn/transformation/fpgadataflow/make_zynq_proj.py @@ -0,0 +1,316 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os +import subprocess + +from finn.custom_op.registry import getCustomOp +from finn.transformation import Transformation +from finn.core.modelwrapper import ModelWrapper +from finn.util.basic import get_by_name, make_build_dir +from finn.util.basic import get_num_default_workers +from finn.util.basic import pynq_part_map + +from finn.transformation.fpgadataflow.create_dataflow_partition import ( + CreateDataflowPartition, +) +from finn.transformation.fpgadataflow.insert_dwc import InsertDWC +from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO +from finn.transformation.fpgadataflow.insert_iodma import InsertIODMA +from finn.transformation.fpgadataflow.prepare_ip import PrepareIP +from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP +from finn.transformation.fpgadataflow.replace_verilog_relpaths import ( + ReplaceVerilogRelPaths, +) +from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP +from finn.transformation.fpgadataflow.floorplan import Floorplan +from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames +from finn.transformation.infer_data_layouts import InferDataLayouts +from shutil import copy + +from . import templates + + +def collect_ip_dirs(model, ipstitch_path): + # collect list of all IP dirs + ip_dirs = [] + for node in model.graph.node: + ip_dir_attribute = get_by_name(node.attribute, "ip_path") + assert ( + ip_dir_attribute is not None + ), """Node attribute "ip_path" is + empty. Please run transformation HLSSynth_ipgen first.""" + ip_dir_value = ip_dir_attribute.s.decode("UTF-8") + assert os.path.isdir( + ip_dir_value + ), """The directory that should + contain the generated ip blocks doesn't exist.""" + ip_dirs += [ip_dir_value] + ip_dirs += [ipstitch_path + "/ip"] + return ip_dirs + + +class MakeZYNQProject(Transformation): + """Create a Vivado overlay project (including the shell infrastructure) + from the already-stitched IP block for this graph. + All nodes in the graph must have the fpgadataflow backend attribute, + and the CreateStitchedIP transformation must have been previously run on + the graph. This is functionally equivalent with MakePYNQProject but does + not use Pynq infrastructure and instead creates a fully custom block design. + However, this transform requires DMAs in the accelerator design. + + Outcome if successful: sets the vivado_pynq_proj attribute in the ONNX + ModelProto's metadata_props field, with the created project dir as the + value. + """ + + def __init__(self, platform, enable_debug=False): + super().__init__() + self.platform = platform + self.enable_debug = 1 if enable_debug else 0 + + def apply(self, model): + + # create a config file and empty list of xo files + config = [] + idma_idx = 0 + odma_idx = 0 + aximm_idx = 0 + axilite_idx = 0 + global_clk_ns = 0 + instance_names = {} + for node in model.graph.node: + assert node.op_type == "StreamingDataflowPartition", "Invalid link graph" + sdp_node = getCustomOp(node) + dataflow_model_filename = sdp_node.get_nodeattr("model") + kernel_model = ModelWrapper(dataflow_model_filename) + + ipstitch_path = kernel_model.get_metadata_prop("vivado_stitch_proj") + if ipstitch_path is None or (not os.path.isdir(ipstitch_path)): + raise Exception( + "No stitched IPI design found for %s, apply CreateStitchedIP first." + % node.name + ) + + vivado_stitch_vlnv = kernel_model.get_metadata_prop("vivado_stitch_vlnv") + if vivado_stitch_vlnv is None: + raise Exception( + "No vlnv found for %s, apply CreateStitchedIP first." % node.name + ) + + ip_dirs = ["list"] + ip_dirs += collect_ip_dirs(kernel_model, ipstitch_path) + ip_dirs_str = "[%s]" % (" ".join(ip_dirs)) + config.append( + "set_property ip_repo_paths " + "[concat [get_property ip_repo_paths [current_project]] %s] " + "[current_project]" % ip_dirs_str + ) + config.append("update_ip_catalog -rebuild -scan_changes") + + # get metadata property clk_ns to calculate clock frequency + clk_ns = float(kernel_model.get_metadata_prop("clk_ns")) + if clk_ns > global_clk_ns: + global_clk_ns = clk_ns + + # gather info on connectivity + # assume each node connected to outputs/inputs is DMA: + # has axis, aximm and axilite + # everything else is axis-only + # assume only one connection from each ip to the next + # all aximm allocated to DDR[0] + # all kernels allocated to SLR0 + producer = model.find_producer(node.input[0]) + consumer = model.find_consumers(node.output[0]) + # define kernel instances + # name kernels connected to graph inputs as idmaxx + # name kernels connected to graph inputs as odmaxx + if producer is None or consumer is None: + if producer is None: + instance_names[node.name] = "idma" + str(idma_idx) + elif consumer is None: + instance_names[node.name] = "odma" + str(odma_idx) + config.append( + "create_bd_cell -type ip -vlnv %s %s" + % (vivado_stitch_vlnv, instance_names[node.name]) + ) + config.append( + "connect_bd_intf_net [get_bd_intf_pins %s/m_axi_gmem0] " + "[get_bd_intf_pins smartconnect_0/S%02d_AXI]" + % (instance_names[node.name], aximm_idx) + ) + config.append( + "connect_bd_intf_net [get_bd_intf_pins %s/s_axi_control] " + "[get_bd_intf_pins axi_interconnect_0/M%02d_AXI]" + % (instance_names[node.name], axilite_idx) + ) + idma_idx += 1 + aximm_idx += 1 + axilite_idx += 1 + else: + instance_names[node.name] = node.name + config.append( + "create_bd_cell -type ip -vlnv %s %s" + % (vivado_stitch_vlnv, instance_names[node.name]) + ) + config.append( + "connect_bd_net [get_bd_pins %s/ap_clk] " + "[get_bd_pins smartconnect_0/aclk]" % instance_names[node.name] + ) + config.append( + "connect_bd_net [get_bd_pins %s/ap_rst_n] " + "[get_bd_pins smartconnect_0/aresetn]" % instance_names[node.name] + ) + # connect streams + if producer is not None: + for i in range(len(node.input)): + producer = model.find_producer(node.input[i]) + if producer is not None: + j = list(producer.output).index(node.input[i]) + config.append( + "connect_bd_intf_net [get_bd_intf_pins %s/s_axis_%d] " + "[get_bd_intf_pins %s/m_axis_%d]" + % ( + instance_names[node.name], + i, + instance_names[producer.name], + j, + ) + ) + + # create a temporary folder for the project + vivado_pynq_proj_dir = make_build_dir(prefix="vivado_zynq_proj_") + model.set_metadata_prop("vivado_pynq_proj", vivado_pynq_proj_dir) + + fclk_mhz = int(1 / (global_clk_ns * 0.001)) + + # create a TCL recipe for the project + ipcfg = vivado_pynq_proj_dir + "/ip_config.tcl" + config = "\n".join(config) + "\n" + with open(ipcfg, "w") as f: + f.write( + templates.custom_zynq_shell_template + % ( + fclk_mhz, + axilite_idx, + aximm_idx, + self.platform, + pynq_part_map[self.platform], + config, + self.enable_debug, + get_num_default_workers(), + ) + ) + + # create a TCL recipe for the project + synth_project_sh = vivado_pynq_proj_dir + "/synth_project.sh" + working_dir = os.environ["PWD"] + with open(synth_project_sh, "w") as f: + f.write("#!/bin/bash \n") + f.write("cd {}\n".format(vivado_pynq_proj_dir)) + f.write("vivado -mode tcl -source %s\n" % ipcfg) + f.write("cd {}\n".format(working_dir)) + + # call the synthesis script + bash_command = ["bash", synth_project_sh] + process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE) + process_compile.communicate() + bitfile_name = ( + vivado_pynq_proj_dir + "/finn_zynq_link.runs/impl_1/top_wrapper.bit" + ) + if not os.path.isfile(bitfile_name): + raise Exception("Synthesis failed, no bitfile found") + deploy_bitfile_name = vivado_pynq_proj_dir + "/resizer.bit" + copy(bitfile_name, deploy_bitfile_name) + # set bitfile attribute + model.set_metadata_prop("vivado_pynq_bitfile", deploy_bitfile_name) + # set platform attribute for correct remote execution + model.set_metadata_prop("platform", "zynq-iodma") + hwh_name = ( + vivado_pynq_proj_dir + + "/finn_zynq_link.srcs/sources_1/bd/top/hw_handoff/top.hwh" + ) + if not os.path.isfile(hwh_name): + raise Exception("Synthesis failed, no hardware handoff file found") + deploy_hwh_name = vivado_pynq_proj_dir + "/resizer.hwh" + copy(hwh_name, deploy_hwh_name) + # filename for the synth utilization report + synth_report_filename = vivado_pynq_proj_dir + "/synth_report.xml" + model.set_metadata_prop("vivado_synth_rpt", synth_report_filename) + return (model, False) + + +class ZynqBuild(Transformation): + """Best-effort attempt at building the accelerator for Zynq.""" + + def __init__(self, platform, period_ns, enable_debug=False): + super().__init__() + self.fpga_part = pynq_part_map[platform] + self.period_ns = period_ns + self.platform = platform + self.enable_debug = enable_debug + + def apply(self, model): + # first infer layouts + model = model.transform(InferDataLayouts()) + # prepare at global level, then break up into kernels + prep_transforms = [ + InsertIODMA(64), + InsertDWC(), + Floorplan(), + CreateDataflowPartition(), + ] + for trn in prep_transforms: + model = model.transform(trn) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(GiveReadableTensorNames()) + # Build each kernel individually + sdp_nodes = model.get_nodes_by_op_type("StreamingDataflowPartition") + for sdp_node in sdp_nodes: + sdp_node = getCustomOp(sdp_node) + dataflow_model_filename = sdp_node.get_nodeattr("model") + kernel_model = ModelWrapper(dataflow_model_filename) + kernel_model = kernel_model.transform(InsertFIFO()) + kernel_model = kernel_model.transform(GiveUniqueNodeNames()) + kernel_model.save(dataflow_model_filename) + kernel_model = kernel_model.transform( + PrepareIP(self.fpga_part, self.period_ns) + ) + kernel_model = kernel_model.transform(HLSSynthIP()) + kernel_model = kernel_model.transform(ReplaceVerilogRelPaths()) + kernel_model = kernel_model.transform( + CreateStitchedIP( + self.fpga_part, self.period_ns, sdp_node.onnx_node.name, True + ) + ) + kernel_model.save(dataflow_model_filename) + # Assemble design from IPs + model = model.transform( + MakeZYNQProject(self.platform, enable_debug=self.enable_debug) + ) + return (model, False) diff --git a/src/finn/transformation/fpgadataflow/templates.py b/src/finn/transformation/fpgadataflow/templates.py index 48895f35da1285516467b515b8ef518febbe2f12..eaeadc7b38b14b2d2eaa761b3cd46220b9fe6bbe 100644 --- a/src/finn/transformation/fpgadataflow/templates.py +++ b/src/finn/transformation/fpgadataflow/templates.py @@ -104,7 +104,7 @@ from finn.core.datatype import DataType from pynq.ps import Clocks class FINNAccelDriver(): - def __init__(self, N, bitfile, platform="zynq"): + def __init__(self, N, bitfile, platform="$PLATFORM$"): \"\"\"Instantiate the FINN accelerator driver. Gets batchsize (N) as integer and path to bitfile as string.\"\"\" self.platform = platform @@ -141,8 +141,16 @@ class FINNAccelDriver(): elif self.platform == "alveo": self.idma = self.ol.idma0 self.odma = self.ol.odma0 + elif self.platform == "zynq-iodma": + self.idma = self.ol.idma0 + self.odma = self.ol.odma0 + # clock frequency + self.fclk_mhz = $CLOCK_FREQ_MHZ$ + # set the clock frequency as specified by user during transformations + if self.fclk_mhz > 0: + Clocks.$CLK_NAME$ = self.fclk_mhz else: - raise ValueError("Supported platforms are zynq and alveo") + raise ValueError("Supported platforms are zynq zynq-iodma alveo") # allocate a PYNQ buffer for the packed input and buffer self.ibuf_packed_device = allocate(shape=self.ishape_packed, dtype=np.uint8) @@ -194,7 +202,20 @@ class FINNAccelDriver(): dma.recvchannel.transfer(self.obuf_packed_device) dma.sendchannel.wait() dma.recvchannel.wait() - else: + elif self.platform == "zynq-iodma": + # manually launch IODMAs since signatures are missing + self.idma.write(0x10, self.ibuf_packed_device.device_address) + self.idma.write(0x1c, self.N) + self.odma.write(0x10, self.obuf_packed_device.device_address) + self.odma.write(0x1c, self.N) + self.idma.write(0x00, 1) + self.odma.write(0x00, 1) + # wait until output IODMA is finished + status = self.odma.read(0x00) + while status & 0x2 == 0: + status = self.odma.read(0x00) + + elif self.platform == "alveo": self.ibuf_packed_device.sync_to_device() self.idma.start(self.ibuf_packed_device, self.N) self.odma.start(self.obuf_packed_device, self.N) @@ -207,7 +228,7 @@ class FINNAccelDriver(): if __name__ == "__main__": parser = argparse.ArgumentParser(description='Set exec mode, batchsize N, bitfile name, inputfile name and outputfile name') parser.add_argument('--exec_mode', help='Please select functional verification ("execute") or throughput test ("throughput_test")', default="execute") - parser.add_argument('--platform', help='Target platform, zynq or alveo', default="zynq") + parser.add_argument('--platform', help='Target platform: zynq zynq-iodma alveo', default="zynq") parser.add_argument('--batchsize', help='number of samples for inference', type=int, default=1) parser.add_argument('--bitfile', help='name of bitfile (i.e. "resizer.bit")', default="resizer.bit") parser.add_argument('--inputfile', help='name of input npy file (i.e. "input.npy")', default="input.npy") @@ -278,3 +299,117 @@ if __name__ == "__main__": """ + +custom_zynq_shell_template = """ +set FREQ_MHZ %s +set NUM_AXILITE %d +if {$NUM_AXILITE > 9} { + error "Maximum 10 AXI-Lite interfaces supported" +} +set NUM_AXIMM %d +set BOARD %s +set FPGA_PART %s +create_project finn_zynq_link ./ -part $FPGA_PART + +# set board part repo paths to find PYNQ-Z1/Z2 +set paths_prop [get_property BOARD_PART_REPO_PATHS [current_project]] +set paths_param [get_param board.repoPaths] +lappend paths_prop /workspace/finn/board_files +lappend paths_param /workspace/finn/board_files +set_property BOARD_PART_REPO_PATHS $paths_prop [current_project] +set_param board.repoPaths $paths_param + +if {$BOARD == "ZCU104"} { + set_property board_part xilinx.com:zcu104:part0:1.1 [current_project] + set ZYNQ_TYPE "zynq_us+" +} elseif {$BOARD == "Ultra96"} { + set ZYNQ_TYPE "zynq_us+" +} elseif {$BOARD == "Pynq-Z2"} { + set ZYNQ_TYPE "zynq_7000" +} elseif {$BOARD == "Pynq-Z1"} { + set ZYNQ_TYPE "zynq_7000" + set_property board_part www.digilentinc.com:pynq-z1:part0:1.0 [current_project] +} else { + puts "Unrecognized board" +} + +create_bd_design "top" +if {$ZYNQ_TYPE == "zynq_us+"} { + create_bd_cell -type ip -vlnv xilinx.com:ip:zynq_ultra_ps_e:3.3 zynq_ps + apply_bd_automation -rule xilinx.com:bd_rule:zynq_ultra_ps_e -config {apply_board_preset "1" } [get_bd_cells zynq_ps] + #activate one slave port, deactivate the second master port + set_property -dict [list CONFIG.PSU__USE__S_AXI_GP2 {1}] [get_bd_cells zynq_ps] + set_property -dict [list CONFIG.PSU__USE__M_AXI_GP1 {0}] [get_bd_cells zynq_ps] + #set frequency of PS clock (this can't always be exactly met) + set_property -dict [list CONFIG.PSU__CRL_APB__PL0_REF_CTRL__FREQMHZ [expr int($FREQ_MHZ)]] [get_bd_cells zynq_ps] +} elseif {$ZYNQ_TYPE == "zynq_7000"} { + create_bd_cell -type ip -vlnv xilinx.com:ip:processing_system7:5.5 zynq_ps + apply_bd_automation -rule xilinx.com:bd_rule:processing_system7 -config {make_external "FIXED_IO, DDR" apply_board_preset "1" Master "Disable" Slave "Disable" } [get_bd_cells zynq_ps] + set_property -dict [list CONFIG.PCW_USE_S_AXI_HP0 {1}] [get_bd_cells zynq_ps] + set_property -dict [list CONFIG.PCW_FPGA0_PERIPHERAL_FREQMHZ [expr int($FREQ_MHZ)]] [get_bd_cells zynq_ps] +} else { + puts "Unrecognized Zynq type" +} + +#instantiate axi interconnect, axi smartconnect +create_bd_cell -type ip -vlnv xilinx.com:ip:axi_interconnect:2.1 axi_interconnect_0 +create_bd_cell -type ip -vlnv xilinx.com:ip:smartconnect:1.0 smartconnect_0 +#set number of axilite interfaces, and number of axi master interfaces +set_property -dict [list CONFIG.NUM_SI $NUM_AXILITE] [get_bd_cells smartconnect_0] +set_property -dict [list CONFIG.NUM_MI $NUM_AXIMM] [get_bd_cells axi_interconnect_0] + +#create reset controller and connect interconnects to PS +if {$ZYNQ_TYPE == "zynq_us+"} { + connect_bd_intf_net [get_bd_intf_pins smartconnect_0/M00_AXI] [get_bd_intf_pins zynq_ps/S_AXI_HP0_FPD] + connect_bd_intf_net [get_bd_intf_pins zynq_ps/M_AXI_HPM0_FPD] -boundary_type upper [get_bd_intf_pins axi_interconnect_0/S00_AXI] + #connect interconnect clocks and resets + apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config { Clk {/zynq_ps/pl_clk0} Freq {} Ref_Clk0 {} Ref_Clk1 {} Ref_Clk2 {}} [get_bd_pins axi_interconnect_0/ACLK] + apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config { Clk {/zynq_ps/pl_clk0} Freq {} Ref_Clk0 {} Ref_Clk1 {} Ref_Clk2 {}} [get_bd_pins axi_interconnect_0/S00_ACLK] + apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config { Clk {/zynq_ps/pl_clk0} Freq {} Ref_Clk0 {} Ref_Clk1 {} Ref_Clk2 {}} [get_bd_pins zynq_ps/saxihp0_fpd_aclk] +} elseif {$ZYNQ_TYPE == "zynq_7000"} { + connect_bd_intf_net -boundary_type upper [get_bd_intf_pins zynq_ps/M_AXI_GP0] [get_bd_intf_pins axi_interconnect_0/S00_AXI] + connect_bd_intf_net [get_bd_intf_pins smartconnect_0/M00_AXI] [get_bd_intf_pins zynq_ps/S_AXI_HP0] + apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config { Clk {/zynq_ps/FCLK_CLK0} Freq {} Ref_Clk0 {} Ref_Clk1 {} Ref_Clk2 {}} [get_bd_pins axi_interconnect_0/ACLK] + apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config { Clk {/zynq_ps/FCLK_CLK0} Freq {} Ref_Clk0 {} Ref_Clk1 {} Ref_Clk2 {}} [get_bd_pins axi_interconnect_0/S00_ACLK] + apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config { Clk {/zynq_ps/FCLK_CLK0} Freq {} Ref_Clk0 {} Ref_Clk1 {} Ref_Clk2 {}} [get_bd_pins zynq_ps/S_AXI_HP0_ACLK] +} +connect_bd_net [get_bd_pins axi_interconnect_0/ARESETN] [get_bd_pins smartconnect_0/aresetn] + +#custom IP instantiations/connections start here +%s + +# set up debug +if {%d == 1} { + set_property HDL_ATTRIBUTE.DEBUG true [get_bd_intf_nets {idma0_m_axis_0}] + set_property HDL_ATTRIBUTE.DEBUG true [get_bd_intf_nets {StreamingDataflowPartition_1_m_axis_0}] + set_property HDL_ATTRIBUTE.DEBUG true [get_bd_intf_nets {smartconnect_0_M00_AXI}] + apply_bd_automation -rule xilinx.com:bd_rule:debug -dict [list \ + [get_bd_intf_nets smartconnect_0_M00_AXI] {AXI_R_ADDRESS "Data and Trigger" AXI_R_DATA "Data and Trigger" AXI_W_ADDRESS "Data and Trigger" AXI_W_DATA "Data and Trigger" AXI_W_RESPONSE "Data and Trigger" CLK_SRC "/zynq_ps/FCLK_CLK0" SYSTEM_ILA "Auto" APC_EN "0" } \ + [get_bd_intf_nets idma0_m_axis_0] {AXIS_SIGNALS "Data and Trigger" CLK_SRC "/zynq_ps/FCLK_CLK0" SYSTEM_ILA "Auto" APC_EN "0" } \ + [get_bd_intf_nets StreamingDataflowPartition_1_m_axis_0] {AXIS_SIGNALS "Data and Trigger" CLK_SRC "/zynq_ps/FCLK_CLK0" SYSTEM_ILA "Auto" APC_EN "0" } \ + ] +} + +#finalize clock and reset connections for interconnects +set i 0 +while {$i < $NUM_AXILITE} { + apply_bd_automation -quiet -rule xilinx.com:bd_rule:clkrst -config { Clk {/zynq_ps/FCLK_CLK0} Freq {} Ref_Clk0 {} Ref_Clk1 {} Ref_Clk2 {}} [get_bd_pins axi_interconnect_0/M0${i}_ACLK] + incr i +} + +save_bd_design +assign_bd_address +validate_bd_design + +set_property SYNTH_CHECKPOINT_MODE "Hierarchical" [ get_files top.bd ] +make_wrapper -files [get_files top.bd] -import -fileset sources_1 -top + +# out-of-context synth can't be used for bitstream generation +# set_property -name {STEPS.SYNTH_DESIGN.ARGS.MORE OPTIONS} -value {-mode out_of_context} -objects [get_runs synth_1] +launch_runs -to_step write_bitstream impl_1 -jobs %d +wait_on_run [get_runs impl_1] + +# generate synthesis report +open_run synth_1 -name synth_1 +report_utilization -hierarchical -hierarchical_depth 4 -file synth_report.xml -format xml +""" diff --git a/src/finn/transformation/fpgadataflow/vitis_build.py b/src/finn/transformation/fpgadataflow/vitis_build.py index ae529f2f4a165a732627befea0675073bc490996..0fb85f25e4f8d652a87f1e832c6b41fd67a7406e 100644 --- a/src/finn/transformation/fpgadataflow/vitis_build.py +++ b/src/finn/transformation/fpgadataflow/vitis_build.py @@ -52,10 +52,16 @@ from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeN from finn.util.basic import make_build_dir from finn.transformation.infer_data_layouts import InferDataLayouts + def _check_vitis_envvars(): assert "VITIS_PATH" in os.environ, "VITIS_PATH must be set for Vitis" - assert "PLATFORM_REPO_PATHS" in os.environ, "PLATFORM_REPO_PATHS must be set for Vitis" - assert "XILINX_XRT" in os.environ, "XILINX_XRT must be set for Vitis, ensure the XRT env is sourced" + assert ( + "PLATFORM_REPO_PATHS" in os.environ + ), "PLATFORM_REPO_PATHS must be set for Vitis" + assert ( + "XILINX_XRT" in os.environ + ), "XILINX_XRT must be set for Vitis, ensure the XRT env is sourced" + class CreateVitisXO(Transformation): """Create a Vitis object file from a stitched FINN ip. @@ -145,7 +151,9 @@ class CreateVitisXO(Transformation): bash_command = ["bash", package_xo_sh] process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE) process_compile.communicate() - assert os.path.isfile(xo_path), "Vitis .xo file not created, check logs under %s" % vivado_proj_dir + assert os.path.isfile(xo_path), ( + "Vitis .xo file not created, check logs under %s" % vivado_proj_dir + ) return (model, False) @@ -238,7 +246,7 @@ class VitisLink(Transformation): f.write("cd {}\n".format(link_dir)) f.write( "v++ -t hw --platform %s --link %s" - " --kernel_frequency %d --config config.txt\n" + " --kernel_frequency %d --config config.txt --optimize 2 --save-temps -R2\n" % (self.platform, " ".join(object_files), self.f_mhz) ) f.write("cd {}\n".format(working_dir)) @@ -247,7 +255,9 @@ class VitisLink(Transformation): process_compile.communicate() # TODO rename xclbin appropriately here? xclbin = link_dir + "/a.xclbin" - assert os.path.isfile(xclbin), "Vitis .xclbin file not created, check logs under %s" % link_dir + assert os.path.isfile(xclbin), ( + "Vitis .xclbin file not created, check logs under %s" % link_dir + ) model.set_metadata_prop("vitis_xclbin", xclbin) return (model, False) @@ -305,5 +315,7 @@ class VitisBuild(Transformation): kernel_model.save(dataflow_model_filename) # Assemble design from kernels model = model.transform(VitisLink(self.platform, round(1000 / self.period_ns))) + # set platform attribute for correct remote execution + model.set_metadata_prop("platform", "alveo") return (model, False) diff --git a/src/finn/util/vcd.py b/src/finn/util/vcd.py index d9e244422065314ceb790dc6719b57688ff76828..a4400f7bd7e75549189f081ce255fd67c49b3746 100644 --- a/src/finn/util/vcd.py +++ b/src/finn/util/vcd.py @@ -162,16 +162,23 @@ def _get_stats(x): return (x[0], get_stream_if_stats(x[1], x[0])) -def get_all_stream_if_stats(vcd_file, stream_ifs=None, sort_by="{'V': 1, 'R': 0}"): +def get_all_stream_if_stats(vcd_file, stream_ifs=None, sort_by="{'V': 1, 'R': 0}", num_workers=None): """Return a list of streaming interface stats, sorted by the percentage - for the given sort_by key. If stream_ifs is None, all streamin interface + for the given sort_by key. If stream_ifs is None, all streaming interface stats will be returned, otherwise treated as a list of interface names to - return the stats for.""" + return the stats for. + By default the number of parallel workers from the environment variable + NUM_DEFAULT_WORKERS will be used. This behavior can be changed on a per + call basis by supplying the optional parameter: num_workers + """ if stream_ifs is None: stream_ifs = list_stream_if(vcd_file) - with mp.Pool(get_num_default_workers()) as p: + if num_workers is None: + num_workers = get_num_default_workers() + + with mp.Pool(num_workers) as p: stream_ifs = map(lambda x: (x, vcd_file), stream_ifs) all_stats = p.map(_get_stats, stream_ifs) diff --git a/tests/end2end/test_zynqbuild_end2end_cnv_w1a1.py b/tests/end2end/test_zynqbuild_end2end_cnv_w1a1.py new file mode 100644 index 0000000000000000000000000000000000000000..1a1b21afa1aee5db97add9b3eadba1b750a967cc --- /dev/null +++ b/tests/end2end/test_zynqbuild_end2end_cnv_w1a1.py @@ -0,0 +1,252 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os +import pytest +import numpy as np + +# as of Feb'20 there is a bug that segfaults ONNX shape inference if we +# import pytorch before onnx, so we make sure to import onnx first +import onnx # NOQA +import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls +import finn.transformation.streamline.absorb as absorb +from finn.core.onnx_exec import execute_onnx +from finn.custom_op.registry import getCustomOp +from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount +from finn.transformation.fold_constants import FoldConstants + +from finn.transformation.fpgadataflow.create_dataflow_partition import ( + CreateDataflowPartition, +) +from finn.transformation.fpgadataflow.make_deployment import DeployToPYNQ +from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver +from finn.transformation.general import ( + RemoveUnusedTensors, + RemoveStaticGraphInputs, + GiveReadableTensorNames, + GiveUniqueNodeNames, +) +from finn.transformation.infer_shapes import InferShapes +from finn.transformation.streamline import Streamline +from finn.util.basic import pynq_part_map +from finn.util.test import get_test_model_trained, load_test_checkpoint_or_skip +from finn.transformation.fpgadataflow.annotate_resources import AnnotateResources +from finn.transformation.fpgadataflow.make_zynq_proj import ZynqBuild +import pkg_resources as pk +from finn.transformation.double_to_single_float import DoubleToSingleFloat +from finn.transformation.move_reshape import RemoveCNVtoFCFlatten +from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul +from finn.transformation.streamline.reorder import MakeMaxPoolNHWC +from finn.transformation.infer_data_layouts import InferDataLayouts + + +build_dir = "/tmp/" + os.environ["FINN_INST_NAME"] +test_pynq_board = os.getenv("PYNQ_BOARD", default="Pynq-Z1") +test_fpga_part = pynq_part_map[test_pynq_board] +target_clk_ns = 10 +mem_mode = "decoupled" + + +def test_end2end_zynqbuild_cnv_w1a1_export(): + import brevitas.onnx as bo + + tfc = get_test_model_trained("CNV", 1, 1) + bo.export_finn_onnx( + tfc, (1, 3, 32, 32), build_dir + "/end2end_zynqbuild_cnv_w1a1_export.onnx" + ) + + +def test_end2end_zynqbuild_cnv_w1a1_import_and_tidy(): + model = load_test_checkpoint_or_skip( + build_dir + "/end2end_zynqbuild_cnv_w1a1_export.onnx" + ) + model = model.transform(DoubleToSingleFloat()) + model = model.transform(InferShapes()) + model = model.transform(FoldConstants()) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(GiveReadableTensorNames()) + model = model.transform(RemoveStaticGraphInputs()) + model.save(build_dir + "/end2end_zynqbuild_cnv_w1a1_tidy.onnx") + + +def test_end2end_zynqbuild_cnv_w1a1_streamline(): + model = load_test_checkpoint_or_skip( + build_dir + "/end2end_zynqbuild_cnv_w1a1_tidy.onnx" + ) + model = model.transform(Streamline()) + model = model.transform(LowerConvsToMatMul()) + model = model.transform(MakeMaxPoolNHWC()) + model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) + model = model.transform(ConvertBipolarMatMulToXnorPopcount()) + model = model.transform(Streamline()) + model = model.transform(RemoveUnusedTensors()) + model.save(build_dir + "/end2end_zynqbuild_cnv_w1a1_streamlined.onnx") + + +def test_end2end_zynqbuild_cnv_w1a1_convert_to_hls_layers(): + model = load_test_checkpoint_or_skip( + build_dir + "/end2end_zynqbuild_cnv_w1a1_streamlined.onnx" + ) + model = model.transform(to_hls.InferBinaryStreamingFCLayer(mem_mode)) + model = model.transform(to_hls.InferQuantizedStreamingFCLayer(mem_mode)) + model = model.transform(to_hls.InferConvInpGen()) + model = model.transform(to_hls.InferStreamingMaxPool()) + model = model.transform(RemoveCNVtoFCFlatten()) + model = model.transform(InferDataLayouts()) + model.save(build_dir + "/end2end_zynqbuild_cnv_w1a1_hls_layers.onnx") + + +def test_end2end_zynqbuild_cnv_w1a1_create_dataflow_partition(): + model = load_test_checkpoint_or_skip( + build_dir + "/end2end_zynqbuild_cnv_w1a1_hls_layers.onnx" + ) + parent_model = model.transform(CreateDataflowPartition()) + parent_model.save(build_dir + "/end2end_zynqbuild_cnv_w1a1_dataflow_parent.onnx") + sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0] + sdp_node = getCustomOp(sdp_node) + dataflow_model_filename = sdp_node.get_nodeattr("model") + dataflow_model = load_test_checkpoint_or_skip(dataflow_model_filename) + dataflow_model.save(build_dir + "/end2end_zynqbuild_cnv_w1a1_dataflow_model.onnx") + + +def test_end2end_zynqbuild_cnv_w1a1_fold(): + model = load_test_checkpoint_or_skip( + build_dir + "/end2end_zynqbuild_cnv_w1a1_dataflow_model.onnx" + ) + fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch") + # each tuple is (PE, SIMD, in_fifo_depth) for a layer + folding = [ + (16, 3, 256), + (32, 32, 256), + (16, 32, 256), + (16, 32, 256), + (4, 32, 214), + (1, 32, 2), + (1, 4, 126), + (1, 8, 62), + (5, 1, 6), + ] + for fcl, (pe, simd, ififodepth) in zip(fc_layers, folding): + fcl_inst = getCustomOp(fcl) + fcl_inst.set_nodeattr("PE", pe) + fcl_inst.set_nodeattr("SIMD", simd) + fcl_inst.set_nodeattr("inFIFODepth", ififodepth) + + swg_layers = model.get_nodes_by_op_type("ConvolutionInputGenerator") + swg_idepth = [2, 51, 9, 106, 2, 2] + for i in range(len(swg_layers)): + swg_inst = getCustomOp(swg_layers[i]) + simd = folding[i][1] + swg_inst.set_nodeattr("SIMD", simd) + swg_inst.set_nodeattr("inFIFODepth", swg_idepth[i]) + model = model.transform(AnnotateResources("estimate")) + model.save(build_dir + "/end2end_zynqbuild_cnv_w1a1_folded.onnx") + + +def test_end2end_zynqbuild_cnv_w1a1_make_driver(): + model = load_test_checkpoint_or_skip( + build_dir + "/end2end_zynqbuild_cnv_w1a1_folded.onnx" + ) + model = model.transform(MakePYNQDriver(platform="zynq-iodma")) + model.save(build_dir + "/end2end_zynqbuild_cnv_w1a1_pynq_driver.onnx") + + +@pytest.mark.slow +@pytest.mark.vivado +def test_end2end_zynqbuild_cnv_w1a1_build(): + model = load_test_checkpoint_or_skip( + build_dir + "/end2end_zynqbuild_cnv_w1a1_pynq_driver.onnx" + ) + model = model.transform(ZynqBuild(test_pynq_board, target_clk_ns)) + model = model.transform(AnnotateResources("synth")) + model.save(build_dir + "/end2end_zynqbuild_cnv_w1a1_build.onnx") + + +def test_end2end_zynqbuild_cnv_w1a1_deploy_on_pynq(): + model = load_test_checkpoint_or_skip( + build_dir + "/end2end_zynqbuild_cnv_w1a1_build.onnx" + ) + try: + ip = os.environ["PYNQ_IP"] # no fault for this one; skip if not defined + if ip == "": + pytest.skip("PYNQ board IP address not specified") + username = os.getenv("PYNQ_USERNAME", "xilinx") + password = os.getenv("PYNQ_PASSWORD", "xilinx") + port = os.getenv("PYNQ_PORT", 22) + target_dir = os.getenv("PYNQ_TARGET_DIR", "/home/xilinx/finn") + model = model.transform(DeployToPYNQ(ip, port, username, password, target_dir)) + # save the model to be able to link it to the parent + model.save(build_dir + "/end2end_zynqbuild_cnv_w1a1_pynq_deploy.onnx") + except KeyError: + pytest.skip("PYNQ board IP address not specified") + + +def test_end2end_zynqbuild_cnv_w1a1_run_on_pynq(): + # use the streamlined model as the "golden" model for right answers + golden = load_test_checkpoint_or_skip( + build_dir + "/end2end_zynqbuild_cnv_w1a1_streamlined.onnx" + ) + iname = golden.graph.input[0].name + oname = golden.graph.output[0].name + # load one of the test vectors + fn = pk.resource_filename("finn", "data/cifar10/cifar10-test-data-class3.npz") + input_tensor = np.load(fn)["arr_0"].astype(np.float32) + input_tensor = input_tensor / 255 + assert input_tensor.shape == (1, 3, 32, 32) + x = input_tensor + # x = np.zeros(ishape, dtype=np.float32) + # run using FINN-based execution + ret_golden = execute_onnx(golden, {iname: x}, True) + y_golden = ret_golden[oname] + # set up parent+child graph to test + # we'll use models from the previous step as the child model + parent_model = load_test_checkpoint_or_skip( + build_dir + "/end2end_zynqbuild_cnv_w1a1_dataflow_parent.onnx" + ) + iname = parent_model.graph.input[0].name + oname = parent_model.graph.output[0].name + try: + ip = os.environ["PYNQ_IP"] # NOQA + if ip == "": + pytest.skip("PYNQ board IP address not specified") + # produce results with cppsim + sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0] + sdp_node = getCustomOp(sdp_node) + load_test_checkpoint_or_skip( + build_dir + "/end2end_zynqbuild_cnv_w1a1_pynq_deploy.onnx" + ) + sdp_node.set_nodeattr( + "model", build_dir + "/end2end_zynqbuild_cnv_w1a1_pynq_deploy.onnx" + ) + ret = execute_onnx(parent_model, {iname: x}, True) + y = ret[oname] + assert np.isclose(y, y_golden).all() + assert np.argmax(y) == 3 + + except KeyError: + pytest.skip("PYNQ board IP address not specified") diff --git a/tests/end2end/test_zynqbuild_end2end_tfc_w1a1.py b/tests/end2end/test_zynqbuild_end2end_tfc_w1a1.py new file mode 100644 index 0000000000000000000000000000000000000000..98ccc93188c17f4f82b3cbf0164f847c92b7b7bd --- /dev/null +++ b/tests/end2end/test_zynqbuild_end2end_tfc_w1a1.py @@ -0,0 +1,232 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os +from pkgutil import get_data + +import pytest + +import numpy as np + +# as of Feb'20 there is a bug that segfaults ONNX shape inference if we +# import pytorch before onnx, so we make sure to import onnx first +import onnx # NOQA +import onnx.numpy_helper as nph + +import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls +import finn.transformation.streamline.absorb as absorb +from finn.core.onnx_exec import execute_onnx +from finn.custom_op.registry import getCustomOp +from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount +from finn.transformation.fold_constants import FoldConstants + +from finn.transformation.fpgadataflow.create_dataflow_partition import ( + CreateDataflowPartition, +) +from finn.transformation.fpgadataflow.make_deployment import DeployToPYNQ +from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver +from finn.transformation.general import ( + RemoveUnusedTensors, + RemoveStaticGraphInputs, + GiveReadableTensorNames, + GiveUniqueNodeNames, +) +from finn.transformation.infer_datatypes import InferDataTypes +from finn.transformation.infer_shapes import InferShapes +from finn.transformation.streamline import Streamline +from finn.transformation.streamline.round_thresholds import RoundAndClipThresholds +from finn.util.basic import pynq_part_map +from finn.util.test import get_test_model_trained, load_test_checkpoint_or_skip +from finn.transformation.fpgadataflow.annotate_resources import AnnotateResources + +from finn.transformation.fpgadataflow.make_zynq_proj import ZynqBuild + +build_dir = "/tmp/" + os.environ["FINN_INST_NAME"] +test_pynq_board = os.getenv("PYNQ_BOARD", default="Pynq-Z1") +test_fpga_part = pynq_part_map[test_pynq_board] +target_clk_ns = 10 +mem_mode = "decoupled" + + +def test_end2end_zynqbuild_tfc_w1a1_export(): + import brevitas.onnx as bo + + tfc = get_test_model_trained("TFC", 1, 1) + bo.export_finn_onnx( + tfc, (1, 1, 28, 28), build_dir + "/end2end_zynqbuild_tfc_w1a1_export.onnx" + ) + + +def test_end2end_zynqbuild_tfc_w1a1_import_and_tidy(): + model = load_test_checkpoint_or_skip( + build_dir + "/end2end_zynqbuild_tfc_w1a1_export.onnx" + ) + model = model.transform(InferShapes()) + model = model.transform(FoldConstants()) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(GiveReadableTensorNames()) + model = model.transform(InferDataTypes()) + model = model.transform(RemoveStaticGraphInputs()) + model.save(build_dir + "/end2end_zynqbuild_tfc_w1a1_tidy.onnx") + + +def test_end2end_zynqbuild_tfc_w1a1_streamline(): + model = load_test_checkpoint_or_skip( + build_dir + "/end2end_zynqbuild_tfc_w1a1_tidy.onnx" + ) + model = model.transform(Streamline()) + model = model.transform(RemoveUnusedTensors()) + model.save(build_dir + "/end2end_zynqbuild_tfc_w1a1_streamlined.onnx") + + +def test_end2end_zynqbuild_tfc_w1a1_convert_to_hls_layers(): + model = load_test_checkpoint_or_skip( + build_dir + "/end2end_zynqbuild_tfc_w1a1_streamlined.onnx" + ) + model = model.transform(ConvertBipolarMatMulToXnorPopcount()) + model = model.transform(absorb.AbsorbAddIntoMultiThreshold()) + model = model.transform(absorb.AbsorbMulIntoMultiThreshold()) + model = model.transform(RoundAndClipThresholds()) + model = model.transform(to_hls.InferBinaryStreamingFCLayer(mem_mode)) + model.save(build_dir + "/end2end_zynqbuild_tfc_w1a1_hls_layers.onnx") + + +def test_end2end_zynqbuild_tfc_w1a1_create_dataflow_partition(): + model = load_test_checkpoint_or_skip( + build_dir + "/end2end_zynqbuild_tfc_w1a1_hls_layers.onnx" + ) + parent_model = model.transform(CreateDataflowPartition()) + parent_model.save(build_dir + "/end2end_zynqbuild_tfc_w1a1_dataflow_parent.onnx") + sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0] + sdp_node = getCustomOp(sdp_node) + dataflow_model_filename = sdp_node.get_nodeattr("model") + dataflow_model = load_test_checkpoint_or_skip(dataflow_model_filename) + dataflow_model.save(build_dir + "/end2end_zynqbuild_tfc_w1a1_dataflow_model.onnx") + + +def test_end2end_zynqbuild_tfc_w1a1_fold(): + model = load_test_checkpoint_or_skip( + build_dir + "/end2end_zynqbuild_tfc_w1a1_dataflow_model.onnx" + ) + fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch") + # (PE, SIMD, in_fifo_depth, out_fifo_depth, ramstyle) for each layer + config = [ + (16, 49, 16, 64, "block"), + (8, 8, 64, 64, "auto"), + (8, 8, 64, 64, "auto"), + (10, 8, 64, 10, "distributed"), + ] + for fcl, (pe, simd, ififo, ofifo, ramstyle) in zip(fc_layers, config): + fcl_inst = getCustomOp(fcl) + fcl_inst.set_nodeattr("PE", pe) + fcl_inst.set_nodeattr("SIMD", simd) + fcl_inst.set_nodeattr("inFIFODepth", ififo) + fcl_inst.set_nodeattr("outFIFODepth", ofifo) + fcl_inst.set_nodeattr("ram_style", ramstyle) + + model.save(build_dir + "/end2end_zynqbuild_tfc_w1a1_folded.onnx") + + +def test_end2end_zynqbuild_tfc_w1a1_make_driver(): + model = load_test_checkpoint_or_skip( + build_dir + "/end2end_zynqbuild_tfc_w1a1_folded.onnx" + ) + model = model.transform(MakePYNQDriver(platform="zynq-iodma")) + model.save(build_dir + "/end2end_zynqbuild_tfc_w1a1_pynq_driver.onnx") + + +@pytest.mark.slow +@pytest.mark.vivado +def test_end2end_zynqbuild_tfc_w1a1_build(): + model = load_test_checkpoint_or_skip( + build_dir + "/end2end_zynqbuild_tfc_w1a1_pynq_driver.onnx" + ) + model = model.transform(ZynqBuild(test_pynq_board, target_clk_ns)) + model = model.transform(AnnotateResources("synth")) + model.save(build_dir + "/end2end_zynqbuild_tfc_w1a1_build.onnx") + + +def test_end2end_zynqbuild_tfc_w1a1_deploy_on_pynq(): + model = load_test_checkpoint_or_skip( + build_dir + "/end2end_zynqbuild_tfc_w1a1_build.onnx" + ) + try: + ip = os.environ["PYNQ_IP"] # no fault for this one; skip if not defined + if ip == "": + pytest.skip("PYNQ board IP address not specified") + username = os.getenv("PYNQ_USERNAME", "xilinx") + password = os.getenv("PYNQ_PASSWORD", "xilinx") + port = os.getenv("PYNQ_PORT", 22) + target_dir = os.getenv("PYNQ_TARGET_DIR", "/home/xilinx/finn") + model = model.transform(DeployToPYNQ(ip, port, username, password, target_dir)) + # save the model to be able to link it to the parent + model.save(build_dir + "/end2end_zynqbuild_tfc_w1a1_pynq_deploy.onnx") + except KeyError: + pytest.skip("PYNQ board IP address not specified") + + +def test_end2end_zynqbuild_tfc_w1a1_run_on_pynq(): + # use the streamlined model as the "golden" model for right answers + golden = load_test_checkpoint_or_skip( + build_dir + "/end2end_zynqbuild_tfc_w1a1_streamlined.onnx" + ) + iname = golden.graph.input[0].name + oname = golden.graph.output[0].name + raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb") + input_tensor = onnx.load_tensor_from_string(raw_i) + x = nph.to_array(input_tensor) + # x = np.zeros(ishape, dtype=np.float32) + # run using FINN-based execution + ret_golden = execute_onnx(golden, {iname: x}, True) + y_golden = ret_golden[oname] + # set up parent+child graph to test + # we'll use models from the previous step as the child model + parent_model = load_test_checkpoint_or_skip( + build_dir + "/end2end_zynqbuild_tfc_w1a1_dataflow_parent.onnx" + ) + iname = parent_model.graph.input[0].name + oname = parent_model.graph.output[0].name + try: + ip = os.environ["PYNQ_IP"] # NOQA + if ip == "": + pytest.skip("PYNQ board IP address not specified") + # produce results with cppsim + sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0] + sdp_node = getCustomOp(sdp_node) + load_test_checkpoint_or_skip( + build_dir + "/end2end_zynqbuild_tfc_w1a1_pynq_deploy.onnx" + ) + sdp_node.set_nodeattr( + "model", build_dir + "/end2end_zynqbuild_tfc_w1a1_pynq_deploy.onnx" + ) + ret = execute_onnx(parent_model, {iname: x}, True) + y = ret[oname] + assert np.isclose(y, y_golden).all() + + except KeyError: + pytest.skip("PYNQ board IP address not specified") diff --git a/tests/end2end/test_zynqbuild_end2end_tfc_w2a2.py b/tests/end2end/test_zynqbuild_end2end_tfc_w2a2.py new file mode 100644 index 0000000000000000000000000000000000000000..b8db8c1a4a2049a38e64b9c5bb54fb7d4d8d0ab0 --- /dev/null +++ b/tests/end2end/test_zynqbuild_end2end_tfc_w2a2.py @@ -0,0 +1,222 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os +from pkgutil import get_data + +import pytest + +import numpy as np + +# as of Feb'20 there is a bug that segfaults ONNX shape inference if we +# import pytorch before onnx, so we make sure to import onnx first +import onnx # NOQA +import onnx.numpy_helper as nph +import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls +from finn.core.onnx_exec import execute_onnx +from finn.custom_op.registry import getCustomOp +from finn.transformation.fold_constants import FoldConstants +from finn.transformation.fpgadataflow.create_dataflow_partition import ( + CreateDataflowPartition, +) +from finn.transformation.fpgadataflow.make_deployment import DeployToPYNQ +from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver +from finn.transformation.general import ( + RemoveUnusedTensors, + RemoveStaticGraphInputs, + GiveReadableTensorNames, + GiveUniqueNodeNames, +) +from finn.transformation.infer_datatypes import InferDataTypes +from finn.transformation.infer_shapes import InferShapes +from finn.transformation.streamline import Streamline +from finn.util.basic import pynq_part_map +from finn.util.test import get_test_model_trained, load_test_checkpoint_or_skip +from finn.transformation.fpgadataflow.annotate_resources import AnnotateResources +from finn.transformation.fpgadataflow.make_zynq_proj import ZynqBuild + +build_dir = "/tmp/" + os.environ["FINN_INST_NAME"] +test_pynq_board = os.getenv("PYNQ_BOARD", default="Pynq-Z1") +test_fpga_part = pynq_part_map[test_pynq_board] +target_clk_ns = 10 +mem_mode = "decoupled" + + +def test_end2end_zynqbuild_tfc_w2a2_export(): + import brevitas.onnx as bo + + tfc = get_test_model_trained("TFC", 2, 2) + bo.export_finn_onnx( + tfc, (1, 1, 28, 28), build_dir + "/end2end_zynqbuild_tfc_w2a2_export.onnx" + ) + + +def test_end2end_zynqbuild_tfc_w2a2_import_and_tidy(): + model = load_test_checkpoint_or_skip( + build_dir + "/end2end_zynqbuild_tfc_w2a2_export.onnx" + ) + model = model.transform(InferShapes()) + model = model.transform(FoldConstants()) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(GiveReadableTensorNames()) + model = model.transform(InferDataTypes()) + model = model.transform(RemoveStaticGraphInputs()) + model.save(build_dir + "/end2end_zynqbuild_tfc_w2a2_tidy.onnx") + + +def test_end2end_zynqbuild_tfc_w2a2_streamline(): + model = load_test_checkpoint_or_skip( + build_dir + "/end2end_zynqbuild_tfc_w2a2_tidy.onnx" + ) + model = model.transform(Streamline()) + model = model.transform(RemoveUnusedTensors()) + model.save(build_dir + "/end2end_zynqbuild_tfc_w2a2_streamlined.onnx") + + +def test_end2end_zynqbuild_tfc_w2a2_convert_to_hls_layers(): + model = load_test_checkpoint_or_skip( + build_dir + "/end2end_zynqbuild_tfc_w2a2_streamlined.onnx" + ) + model = model.transform(to_hls.InferQuantizedStreamingFCLayer(mem_mode)) + model.save(build_dir + "/end2end_zynqbuild_tfc_w2a2_hls_layers.onnx") + + +def test_end2end_zynqbuild_tfc_w2a2_create_dataflow_partition(): + model = load_test_checkpoint_or_skip( + build_dir + "/end2end_zynqbuild_tfc_w2a2_hls_layers.onnx" + ) + parent_model = model.transform(CreateDataflowPartition()) + parent_model.save(build_dir + "/end2end_zynqbuild_tfc_w2a2_dataflow_parent.onnx") + sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0] + sdp_node = getCustomOp(sdp_node) + dataflow_model_filename = sdp_node.get_nodeattr("model") + dataflow_model = load_test_checkpoint_or_skip(dataflow_model_filename) + dataflow_model.save(build_dir + "/end2end_zynqbuild_tfc_w2a2_dataflow_model.onnx") + + +def test_end2end_zynqbuild_tfc_w2a2_fold(): + model = load_test_checkpoint_or_skip( + build_dir + "/end2end_zynqbuild_tfc_w2a2_dataflow_model.onnx" + ) + fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch") + # (PE, SIMD, in_fifo_depth, out_fifo_depth, ramstyle) for each layer + config = [ + (16, 49, 16, 64, "block"), + (8, 8, 64, 64, "auto"), + (8, 8, 64, 64, "auto"), + (10, 8, 64, 10, "distributed"), + ] + for fcl, (pe, simd, ififo, ofifo, ramstyle) in zip(fc_layers, config): + fcl_inst = getCustomOp(fcl) + fcl_inst.set_nodeattr("PE", pe) + fcl_inst.set_nodeattr("SIMD", simd) + fcl_inst.set_nodeattr("inFIFODepth", ififo) + fcl_inst.set_nodeattr("outFIFODepth", ofifo) + fcl_inst.set_nodeattr("ram_style", ramstyle) + + model.save(build_dir + "/end2end_zynqbuild_tfc_w2a2_folded.onnx") + + +def test_end2end_zynqbuild_tfc_w2a2_make_driver(): + model = load_test_checkpoint_or_skip( + build_dir + "/end2end_zynqbuild_tfc_w2a2_folded.onnx" + ) + model = model.transform(MakePYNQDriver(platform="zynq-iodma")) + model.save(build_dir + "/end2end_zynqbuild_tfc_w2a2_pynq_driver.onnx") + + +@pytest.mark.slow +@pytest.mark.vivado +def test_end2end_zynqbuild_tfc_w2a2_build(): + model = load_test_checkpoint_or_skip( + build_dir + "/end2end_zynqbuild_tfc_w2a2_pynq_driver.onnx" + ) + model = model.transform(ZynqBuild(test_pynq_board, target_clk_ns)) + model = model.transform(AnnotateResources("synth")) + model.save(build_dir + "/end2end_zynqbuild_tfc_w2a2_build.onnx") + + +def test_end2end_zynqbuild_tfc_w2a2_deploy_on_pynq(): + model = load_test_checkpoint_or_skip( + build_dir + "/end2end_zynqbuild_tfc_w2a2_build.onnx" + ) + try: + ip = os.environ["PYNQ_IP"] # no fault for this one; skip if not defined + if ip == "": + pytest.skip("PYNQ board IP address not specified") + username = os.getenv("PYNQ_USERNAME", "xilinx") + password = os.getenv("PYNQ_PASSWORD", "xilinx") + port = os.getenv("PYNQ_PORT", 22) + target_dir = os.getenv("PYNQ_TARGET_DIR", "/home/xilinx/finn") + model = model.transform(DeployToPYNQ(ip, port, username, password, target_dir)) + # save the model to be able to link it to the parent + model.save(build_dir + "/end2end_zynqbuild_tfc_w2a2_pynq_deploy.onnx") + except KeyError: + pytest.skip("PYNQ board IP address not specified") + + +def test_end2end_zynqbuild_tfc_w2a2_run_on_pynq(): + # use the streamlined model as the "golden" model for right answers + golden = load_test_checkpoint_or_skip( + build_dir + "/end2end_zynqbuild_tfc_w2a2_streamlined.onnx" + ) + iname = golden.graph.input[0].name + oname = golden.graph.output[0].name + raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb") + input_tensor = onnx.load_tensor_from_string(raw_i) + x = nph.to_array(input_tensor) + # x = np.zeros(ishape, dtype=np.float32) + # run using FINN-based execution + ret_golden = execute_onnx(golden, {iname: x}, True) + y_golden = ret_golden[oname] + # set up parent+child graph to test + # we'll use models from the previous step as the child model + parent_model = load_test_checkpoint_or_skip( + build_dir + "/end2end_zynqbuild_tfc_w2a2_dataflow_parent.onnx" + ) + iname = parent_model.graph.input[0].name + oname = parent_model.graph.output[0].name + try: + ip = os.environ["PYNQ_IP"] # NOQA + if ip == "": + pytest.skip("PYNQ board IP address not specified") + # produce results with cppsim + sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0] + sdp_node = getCustomOp(sdp_node) + load_test_checkpoint_or_skip( + build_dir + "/end2end_zynqbuild_tfc_w2a2_pynq_deploy.onnx" + ) + sdp_node.set_nodeattr( + "model", build_dir + "/end2end_zynqbuild_tfc_w2a2_pynq_deploy.onnx" + ) + ret = execute_onnx(parent_model, {iname: x}, True) + y = ret[oname] + assert np.isclose(y, y_golden).all() + + except KeyError: + pytest.skip("PYNQ board IP address not specified") diff --git a/tests/fpgadataflow/test_fpgadataflow_ipstitch.py b/tests/fpgadataflow/test_fpgadataflow_ipstitch.py index 1add41861562b84808697fc3936f504f9f3d6c48..40f29b0c7df025df09137f124d66ea33236e18e4 100644 --- a/tests/fpgadataflow/test_fpgadataflow_ipstitch.py +++ b/tests/fpgadataflow/test_fpgadataflow_ipstitch.py @@ -63,6 +63,7 @@ from finn.transformation.infer_data_layouts import InferDataLayouts from finn.transformation.fpgadataflow.insert_iodma import InsertIODMA from finn.transformation.fpgadataflow.floorplan import Floorplan from finn.transformation.fpgadataflow.vitis_build import VitisBuild +from finn.transformation.fpgadataflow.make_zynq_proj import ZynqBuild test_pynq_board = os.getenv("PYNQ_BOARD", default="Pynq-Z1") @@ -428,6 +429,8 @@ def test_fpgadataflow_ipstitch_iodma_floorplan(): @pytest.mark.vivado @pytest.mark.vitis def test_fpgadataflow_ipstitch_vitis(board, period_ns, extw): + if "VITIS_PATH" not in os.environ: + pytest.skip("VITIS_PATH not set") platform = alveo_default_platform[board] fpga_part = alveo_part_map[board] model = create_two_fc_model("external" if extw else "decoupled") @@ -438,3 +441,52 @@ def test_fpgadataflow_ipstitch_vitis(board, period_ns, extw): model = load_test_checkpoint_or_skip(sdp_node.get_nodeattr("model")) model = model.transform(VitisBuild(fpga_part, period_ns, platform)) model.save(ip_stitch_model_dir + "/test_fpgadataflow_ipstitch_vitis.onnx") + + +# board +@pytest.mark.parametrize("board", ["Pynq-Z1"]) +@pytest.mark.slow +@pytest.mark.vivado +def test_fpgadataflow_ipstitch_zynqbuild(board): + model = create_two_fc_model() + if model.graph.node[0].op_type == "StreamingDataflowPartition": + sdp_node = getCustomOp(model.graph.node[0]) + assert sdp_node.__class__.__name__ == "StreamingDataflowPartition" + assert os.path.isfile(sdp_node.get_nodeattr("model")) + model = load_test_checkpoint_or_skip(sdp_node.get_nodeattr("model")) + # generate inputs for remote exec + iname = "inp" + idt = model.get_tensor_datatype(iname) + ishape = model.get_tensor_shape(iname) + x = gen_finn_dt_tensor(idt, ishape) + # driver + model = model.transform(MakePYNQDriver()) + driver_dir = model.get_metadata_prop("pynq_driver_dir") + assert driver_dir is not None + assert os.path.isdir(driver_dir) + # bitfile using ZynqBuild + model = model.transform(ZynqBuild(board, 10)) + model.save(ip_stitch_model_dir + "/test_fpgadataflow_ipstitch_customzynq.onnx") + + bitfile_name = model.get_metadata_prop("vivado_pynq_bitfile") + assert bitfile_name is not None + assert os.path.isfile(bitfile_name) + # deployment + try: + ip = os.environ["PYNQ_IP"] # no default for this one; skip if not defined + if ip == "": + pytest.skip("PYNQ board IP address not specified") + username = os.getenv("PYNQ_USERNAME", "xilinx") + password = os.getenv("PYNQ_PASSWORD", "xilinx") + port = os.getenv("PYNQ_PORT", 22) + target_dir = os.getenv("PYNQ_TARGET_DIR", "/home/xilinx/finn") + model = model.transform(DeployToPYNQ(ip, port, username, password, target_dir)) + deployment_dir = model.get_metadata_prop("pynq_deploy_dir") + assert deployment_dir is not None + assert os.path.isdir(deployment_dir) + # remote exec + input_dict = {"global_in": x} + outp = execute_onnx(model, input_dict) + assert np.isclose(outp["global_out"], x).all() + except KeyError: + pytest.skip("PYNQ board IP address not specified")