diff --git a/src/finn/transformation/fpgadataflow/make_zynq_proj.py b/src/finn/transformation/fpgadataflow/make_zynq_proj.py new file mode 100644 index 0000000000000000000000000000000000000000..e2bae9350cb4ce06c5ad32140e9c259cf61b6f87 --- /dev/null +++ b/src/finn/transformation/fpgadataflow/make_zynq_proj.py @@ -0,0 +1,291 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os +import subprocess + +from finn.custom_op.registry import getCustomOp +from finn.transformation import Transformation +from finn.core.modelwrapper import ModelWrapper +from finn.util.basic import get_by_name, make_build_dir +from finn.util.basic import get_num_default_workers +from finn.util.basic import pynq_part_map + +from finn.transformation.fpgadataflow.create_dataflow_partition import ( + CreateDataflowPartition, +) +from finn.transformation.fpgadataflow.insert_dwc import InsertDWC +from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO +from finn.transformation.fpgadataflow.insert_tlastmarker import InsertTLastMarker +from finn.transformation.fpgadataflow.insert_iodma import InsertIODMA +from finn.transformation.fpgadataflow.prepare_ip import PrepareIP +from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP +from finn.transformation.fpgadataflow.replace_verilog_relpaths import ( + ReplaceVerilogRelPaths, +) +from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP +from finn.transformation.fpgadataflow.floorplan import Floorplan +from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames +from finn.transformation.infer_data_layouts import InferDataLayouts + +from . import templates + + +def collect_ip_dirs(model, ipstitch_path): + # collect list of all IP dirs + ip_dirs = [] + for node in model.graph.node: + ip_dir_attribute = get_by_name(node.attribute, "ip_path") + assert ( + ip_dir_attribute is not None + ), """Node attribute "ip_path" is + empty. Please run transformation HLSSynth_ipgen first.""" + ip_dir_value = ip_dir_attribute.s.decode("UTF-8") + assert os.path.isdir( + ip_dir_value + ), """The directory that should + contain the generated ip blocks doesn't exist.""" + ip_dirs += [ip_dir_value] + ip_dirs += [ipstitch_path + "/ip"] + return ip_dirs + + +class MakeZYNQProject(Transformation): + """Create a Vivado overlay project (including the shell infrastructure) + from the already-stitched IP block for this graph. + All nodes in the graph must have the fpgadataflow backend attribute, + and the CreateStitchedIP transformation must have been previously run on + the graph. This is functionally equivalent with MakePYNQProject but does + not use Pynq infrastructure and instead creates a fully custom block design. + However, this transform requires DMAs in the accelerator design. + + Outcome if successful: sets the vivado_pynq_proj attribute in the ONNX + ModelProto's metadata_props field, with the created project dir as the + value. + """ + + def __init__(self, platform): + super().__init__() + self.platform = platform + + def apply(self, model): + + # create a config file and empty list of xo files + config = [] + idma_idx = 0 + odma_idx = 0 + aximm_idx = 0 + axilite_idx = 0 + global_clk_ns = 0 + instance_names = {} + for node in model.graph.node: + assert node.op_type == "StreamingDataflowPartition", "Invalid link graph" + sdp_node = getCustomOp(node) + dataflow_model_filename = sdp_node.get_nodeattr("model") + kernel_model = ModelWrapper(dataflow_model_filename) + + ipstitch_path = kernel_model.get_metadata_prop("vivado_stitch_proj") + if ipstitch_path is None or (not os.path.isdir(ipstitch_path)): + raise Exception( + "No stitched IPI design found for %s, apply CreateStitchedIP first." + % node.name + ) + + vivado_stitch_vlnv = kernel_model.get_metadata_prop("vivado_stitch_vlnv") + if vivado_stitch_vlnv is None: + raise Exception( + "No vlnv found for %s, apply CreateStitchedIP first." % node.name + ) + + ip_dirs = ["list"] + ip_dirs += collect_ip_dirs(kernel_model, ipstitch_path) + ip_dirs_str = "[%s]" % (" ".join(ip_dirs)) + config.append( + "set_property ip_repo_paths " + "[concat [get_property ip_repo_paths [current_project]] %s] " + "[current_project]" % ip_dirs_str + ) + config.append("update_ip_catalog -rebuild -scan_changes") + + # get metadata property clk_ns to calculate clock frequency + clk_ns = float(kernel_model.get_metadata_prop("clk_ns")) + if clk_ns > global_clk_ns: + global_clk_ns = clk_ns + + # gather info on connectivity + # assume each node connected to outputs/inputs is DMA: + # has axis, aximm and axilite + # everything else is axis-only + # assume only one connection from each ip to the next + # all aximm allocated to DDR[0] + # all kernels allocated to SLR0 + producer = model.find_producer(node.input[0]) + consumer = model.find_consumers(node.output[0]) + # define kernel instances + # name kernels connected to graph inputs as idmaxx + # name kernels connected to graph inputs as odmaxx + if producer is None or consumer is None: + if producer is None: + instance_names[node.name] = "idma" + str(idma_idx) + elif consumer is None: + instance_names[node.name] = "odma" + str(odma_idx) + config.append( + "create_bd_cell -type ip -vlnv %s %s" + % (vivado_stitch_vlnv, instance_names[node.name]) + ) + config.append( + "connect_bd_intf_net [get_bd_intf_pins %s/m_axi_gmem0] " + "[get_bd_intf_pins smartconnect_0/S%02d_AXI]" + % (instance_names[node.name], aximm_idx) + ) + config.append( + "connect_bd_intf_net [get_bd_intf_pins %s/s_axi_control] " + "[get_bd_intf_pins axi_interconnect_0/M%02d_AXI]" + % (instance_names[node.name], axilite_idx) + ) + idma_idx += 1 + aximm_idx += 1 + axilite_idx += 1 + else: + instance_names[node.name] = node.name + config.append( + "create_bd_cell -type ip -vlnv %s %s" + % (vivado_stitch_vlnv, instance_names[node.name]) + ) + config.append( + "connect_bd_net [get_bd_pins %s/ap_clk] " + "[get_bd_pins smartconnect_0/aclk]" % instance_names[node.name] + ) + config.append( + "connect_bd_net [get_bd_pins %s/ap_rst_n] " + "[get_bd_pins smartconnect_0/aresetn]" % instance_names[node.name] + ) + # connect streams + if producer is not None: + for i in range(len(node.input)): + producer = model.find_producer(node.input[i]) + if producer is not None: + j = list(producer.output).index(node.input[i]) + config.append( + "connect_bd_intf_net [get_bd_intf_pins %s/s_axis_%d] " + "[get_bd_intf_pins %s/m_axis_%d]" + % ( + instance_names[node.name], + i, + instance_names[producer.name], + j, + ) + ) + + # create a temporary folder for the project + vivado_pynq_proj_dir = make_build_dir(prefix="vivado_zynq_proj_") + model.set_metadata_prop("vivado_pynq_proj", vivado_pynq_proj_dir) + + fclk_mhz = int(1 / (global_clk_ns * 0.001)) + + # create a TCL recipe for the project + ipcfg = vivado_pynq_proj_dir + "/ip_config.tcl" + config = "\n".join(config) + "\n" + with open(ipcfg, "w") as f: + f.write( + templates.custom_zynq_shell_template + % ( + fclk_mhz, + axilite_idx, + aximm_idx, + self.platform, + pynq_part_map[self.platform], + config, + get_num_default_workers(), + ) + ) + + # create a TCL recipe for the project + synth_project_sh = vivado_pynq_proj_dir + "/synth_project.sh" + working_dir = os.environ["PWD"] + with open(synth_project_sh, "w") as f: + f.write("#!/bin/bash \n") + f.write("cd {}\n".format(vivado_pynq_proj_dir)) + f.write("vivado -mode tcl -source %s\n" % ipcfg) + f.write("cd {}\n".format(working_dir)) + + # call the synthesis script + bash_command = ["bash", synth_project_sh] + process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE) + process_compile.communicate() + return (model, False) + + +class ZynqBuild(Transformation): + """Best-effort attempt at building the accelerator for Zynq.""" + + def __init__(self, platform, period_ns): + super().__init__() + self.fpga_part = pynq_part_map[platform] + self.period_ns = period_ns + self.platform = platform + + def apply(self, model): + # first infer layouts + model = model.transform(InferDataLayouts()) + # prepare at global level, then break up into kernels + prep_transforms = [ + InsertIODMA(64), + InsertDWC(), + Floorplan(), + CreateDataflowPartition(), + ] + for trn in prep_transforms: + model = model.transform(trn) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(GiveReadableTensorNames()) + # Build each kernel individually + sdp_nodes = model.get_nodes_by_op_type("StreamingDataflowPartition") + for sdp_node in sdp_nodes: + sdp_node = getCustomOp(sdp_node) + dataflow_model_filename = sdp_node.get_nodeattr("model") + kernel_model = ModelWrapper(dataflow_model_filename) + kernel_model = kernel_model.transform(InsertFIFO()) + kernel_model = kernel_model.transform( + InsertTLastMarker(both=True, external=False, dynamic=False) + ) + kernel_model = kernel_model.transform(GiveUniqueNodeNames()) + kernel_model.save(dataflow_model_filename) + kernel_model = kernel_model.transform( + PrepareIP(self.fpga_part, self.period_ns) + ) + kernel_model = kernel_model.transform(HLSSynthIP()) + kernel_model = kernel_model.transform(ReplaceVerilogRelPaths()) + kernel_model = kernel_model.transform( + CreateStitchedIP( + self.fpga_part, self.period_ns, sdp_node.onnx_node.name, True + ) + ) + kernel_model.save(dataflow_model_filename) + # Assemble design from IPs + model = model.transform(MakeZYNQProject(self.platform)) diff --git a/src/finn/transformation/fpgadataflow/templates.py b/src/finn/transformation/fpgadataflow/templates.py index ab9fd03251819aee72f74cc0c1fa17b99b1e05a4..e4da964552d15543ea93df4fbf01ddab7eb7f6f2 100644 --- a/src/finn/transformation/fpgadataflow/templates.py +++ b/src/finn/transformation/fpgadataflow/templates.py @@ -258,3 +258,92 @@ if __name__ == "__main__": """ + +custom_zynq_shell_template = """ +set FREQ_MHZ %s +set NUM_AXILITE %d +if {$NUM_AXILITE > 9} { + error "Maximum 10 AXI-Lite interfaces supported" +} +set NUM_AXIMM %d +set BOARD %s +set FPGA_PART %s +create_project finn_zynq_link ./ -part $FPGA_PART +if {$BOARD == "ZCU104"} { + set_property board_part xilinx.com:zcu104:part0:1.1 [current_project] + set ZYNQ_TYPE "zynq_us+" +} elseif {$BOARD == "Ultra96"} { + set ZYNQ_TYPE "zynq_us+" +} elseif {$BOARD == "Pynq-Z2"} { + set ZYNQ_TYPE "zynq_7000" +} elseif {$BOARD == "Pynq-Z1"} { + set ZYNQ_TYPE "zynq_7000" + set_property board_part www.digilentinc.com:pynq-z1:part0:1.0 [current_project] +} else { + puts "Unrecognized board" +} + +create_bd_design "top" +if {$ZYNQ_TYPE == "zynq_us+"} { + create_bd_cell -type ip -vlnv xilinx.com:ip:zynq_ultra_ps_e:3.3 zynq_ps + apply_bd_automation -rule xilinx.com:bd_rule:zynq_ultra_ps_e -config {apply_board_preset "1" } [get_bd_cells zynq_ps] + #activate one slave port, deactivate the second master port + set_property -dict [list CONFIG.PSU__USE__S_AXI_GP2 {1}] [get_bd_cells zynq_ps] + set_property -dict [list CONFIG.PSU__USE__M_AXI_GP1 {0}] [get_bd_cells zynq_ps] + #set frequency of PS clock (this can't always be exactly met) + set_property -dict [list CONFIG.PSU__CRL_APB__PL0_REF_CTRL__FREQMHZ [expr int($FREQ_MHZ)]] [get_bd_cells zynq_ps] +} elseif {$ZYNQ_TYPE == "zynq_7000"} { + create_bd_cell -type ip -vlnv xilinx.com:ip:processing_system7:5.5 zynq_ps + apply_bd_automation -rule xilinx.com:bd_rule:processing_system7 -config {make_external "FIXED_IO, DDR" apply_board_preset "1" Master "Disable" Slave "Disable" } [get_bd_cells zynq_ps] + set_property -dict [list CONFIG.PCW_USE_S_AXI_HP0 {1}] [get_bd_cells zynq_ps] + set_property -dict [list CONFIG.PCW_FPGA0_PERIPHERAL_FREQMHZ [expr int($FREQ_MHZ)]] [get_bd_cells zynq_ps] +} else { + puts "Unrecognized Zynq type" +} + +#instantiate axi interconnect, axi smartconnect +create_bd_cell -type ip -vlnv xilinx.com:ip:axi_interconnect:2.1 axi_interconnect_0 +create_bd_cell -type ip -vlnv xilinx.com:ip:smartconnect:1.0 smartconnect_0 +#set number of axilite interfaces, and number of axi master interfaces +set_property -dict [list CONFIG.NUM_SI $NUM_AXILITE] [get_bd_cells smartconnect_0] +set_property -dict [list CONFIG.NUM_MI $NUM_AXIMM] [get_bd_cells axi_interconnect_0] + +#create reset controller and connect interconnects to PS +if {$ZYNQ_TYPE == "zynq_us+"} { + connect_bd_intf_net [get_bd_intf_pins smartconnect_0/M00_AXI] [get_bd_intf_pins zynq_ps/S_AXI_HP0_FPD] + connect_bd_intf_net [get_bd_intf_pins zynq_ps/M_AXI_HPM0_FPD] -boundary_type upper [get_bd_intf_pins axi_interconnect_0/S00_AXI] + #connect interconnect clocks and resets + apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config { Clk {/zynq_ps/pl_clk0} Freq {} Ref_Clk0 {} Ref_Clk1 {} Ref_Clk2 {}} [get_bd_pins axi_interconnect_0/ACLK] + apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config { Clk {/zynq_ps/pl_clk0} Freq {} Ref_Clk0 {} Ref_Clk1 {} Ref_Clk2 {}} [get_bd_pins axi_interconnect_0/S00_ACLK] + apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config { Clk {/zynq_ps/pl_clk0} Freq {} Ref_Clk0 {} Ref_Clk1 {} Ref_Clk2 {}} [get_bd_pins zynq_ps/saxihp0_fpd_aclk] +} elseif {$ZYNQ_TYPE == "zynq_7000"} { + connect_bd_intf_net -boundary_type upper [get_bd_intf_pins zynq_ps/M_AXI_GP0] [get_bd_intf_pins axi_interconnect_0/S00_AXI] + connect_bd_intf_net [get_bd_intf_pins smartconnect_0/M00_AXI] [get_bd_intf_pins zynq_ps/S_AXI_HP0] + apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config { Clk {/zynq_ps/FCLK_CLK0} Freq {} Ref_Clk0 {} Ref_Clk1 {} Ref_Clk2 {}} [get_bd_pins axi_interconnect_0/ACLK] + apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config { Clk {/zynq_ps/FCLK_CLK0} Freq {} Ref_Clk0 {} Ref_Clk1 {} Ref_Clk2 {}} [get_bd_pins axi_interconnect_0/S00_ACLK] + apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config { Clk {/zynq_ps/FCLK_CLK0} Freq {} Ref_Clk0 {} Ref_Clk1 {} Ref_Clk2 {}} [get_bd_pins zynq_ps/S_AXI_HP0_ACLK] +} +connect_bd_net [get_bd_pins axi_interconnect_0/ARESETN] [get_bd_pins smartconnect_0/aresetn] + +#custom IP instantiations/connections start here +%s + +#finalize clock and reset connections for interconnects +set i 0 +while {$i < $NUM_AXILITE} { + apply_bd_automation -quiet -rule xilinx.com:bd_rule:clkrst -config { Clk {/zynq_ps/FCLK_CLK0} Freq {} Ref_Clk0 {} Ref_Clk1 {} Ref_Clk2 {}} [get_bd_pins axi_interconnect_0/M0${i}_ACLK] + incr i +} + +save_bd_design +assign_bd_address +validate_bd_design + +set_property SYNTH_CHECKPOINT_MODE "Hierarchical" [ get_files top.bd ] +make_wrapper -files [get_files top.bd] -import -fileset sources_1 -top + +set_property -name {STEPS.SYNTH_DESIGN.ARGS.MORE OPTIONS} -value {-mode out_of_context} -objects [get_runs synth_1] +launch_runs -to_step write_bitstream impl_1 -jobs %d +wait_on_run [get_runs impl_1] + +""" diff --git a/tests/fpgadataflow/test_fpgadataflow_ipstitch.py b/tests/fpgadataflow/test_fpgadataflow_ipstitch.py index a9f5bf5ffa1f816b82ef701800e92249056b7c74..9fcd78521e967ebed248e1873f92700673d484f2 100644 --- a/tests/fpgadataflow/test_fpgadataflow_ipstitch.py +++ b/tests/fpgadataflow/test_fpgadataflow_ipstitch.py @@ -57,6 +57,7 @@ from finn.transformation.fpgadataflow.synth_ooc import SynthOutOfContext from finn.transformation.infer_data_layouts import InferDataLayouts from finn.transformation.fpgadataflow.insert_iodma import InsertIODMA from finn.transformation.fpgadataflow.floorplan import Floorplan +from finn.transformation.fpgadataflow.make_zynq_proj import ZynqBuild test_pynq_board = os.getenv("PYNQ_BOARD", default="Pynq-Z1") @@ -410,3 +411,18 @@ def test_fpgadataflow_ipstitch_iodma_floorplan(): assert getCustomOp(model.graph.node[1]).get_nodeattr("partition_id") == 2 assert getCustomOp(model.graph.node[2]).get_nodeattr("partition_id") == 1 model.save(ip_stitch_model_dir + "/test_fpgadataflow_ipstitch_iodma_floorplan.onnx") + + +# board +@pytest.mark.parametrize("board", ["Pynq-Z1"]) +@pytest.mark.slow +@pytest.mark.vivado +def test_fpgadataflow_ipstitch_zynq(board): + model = create_two_fc_model() + if model.graph.node[0].op_type == "StreamingDataflowPartition": + sdp_node = getCustomOp(model.graph.node[0]) + assert sdp_node.__class__.__name__ == "StreamingDataflowPartition" + assert os.path.isfile(sdp_node.get_nodeattr("model")) + model = load_test_checkpoint_or_skip(sdp_node.get_nodeattr("model")) + model.transform(ZynqBuild(board, 10)) + model.save(ip_stitch_model_dir + "/test_fpgadataflow_ipstitch_customzynq.onnx")