diff --git a/src/finn/transformation/fpgadataflow/vitis_build.py b/src/finn/transformation/fpgadataflow/vitis_build.py new file mode 100644 index 0000000000000000000000000000000000000000..c4c4d4bc5478e78233901374b3877d79b488e5f9 --- /dev/null +++ b/src/finn/transformation/fpgadataflow/vitis_build.py @@ -0,0 +1,298 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os +import subprocess + +from finn.core.modelwrapper import ModelWrapper +from finn.transformation import Transformation +from finn.custom_op.registry import getCustomOp + +from finn.transformation.fpgadataflow.create_dataflow_partition import ( + CreateDataflowPartition, +) +from finn.transformation.fpgadataflow.insert_dwc import InsertDWC +from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO +from finn.transformation.fpgadataflow.insert_tlastmarker import InsertTLastMarker +from finn.transformation.fpgadataflow.insert_iodma import InsertIODMA +from finn.transformation.fpgadataflow.prepare_ip import PrepareIP +from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP +from finn.transformation.fpgadataflow.replace_verilog_relpaths import ( + ReplaceVerilogRelPaths, +) +from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP +from finn.transformation.fpgadataflow.floorplan import Floorplan +from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver +from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames +from finn.util.basic import make_build_dir +from finn.transformation.infer_data_layouts import InferDataLayouts + + +class CreateVitisXO(Transformation): + """Create a Vitis object file from a stitched FINN ip. + + Outcome if successful: sets the vitis_xo attribute in the ONNX + ModelProto's metadata_props field with the name of the object file as value. + The object file can be found under the ip subdirectory. + """ + + def __init__(self, ip_name="finn_design"): + super().__init__() + self.ip_name = ip_name + + def apply(self, model): + vivado_proj_dir = model.get_metadata_prop("vivado_stitch_proj") + stitched_ip_dir = vivado_proj_dir + "/ip" + args_string = [] + m_axis_idx = 0 + s_axis_idx = 0 + # NOTE: this assumes the graph is Vitis-compatible: max one axi lite interface + # developed from instructions in UG1393 (v2019.2) and package_xo documentation + # package_xo is responsible for generating the kernel xml + for node in model.graph.node: + node_inst = getCustomOp(node) + arg_id = 0 + if node.op_type == "TLastMarker": + stream_width = node_inst.get_nodeattr("StreamWidth") + # add a stream input or output port, based on direction + if node_inst.get_nodeattr("Direction") == "in": + args_string.append( + "{in:4:%s:s_axis_%d:0x0:0x0:ap_uint<%s>:0}" + % (str(arg_id), s_axis_idx, str(stream_width)) + ) + s_axis_idx += 1 + else: + args_string.append( + "{out:4:%s:m_axis_%d:0x0:0x0:ap_uint<%s>:0}" + % (str(arg_id), m_axis_idx, str(stream_width)) + ) + m_axis_idx += 1 + arg_id += 1 + # add a axilite port if dynamic + # add a count parameter if dynamic + if node_inst.get_nodeattr("DynIters") == 1: + args_string.append( + "{numReps:0:%s:s_axi_control:0x4:0x10:uint:0}" % str(arg_id) + ) + arg_id += 1 + elif node.op_type == "IODMA": + port_width = node_inst.get_nodeattr("intfWidth") + # add an address parameter + # add a count parameter + args_string.append( + "{addr:1:%s:m_axi_gmem0:0x8:0x10:ap_uint<%s>*:0}" + % (str(arg_id), str(port_width)) + ) + arg_id += 1 + args_string.append( + "{numReps:0:%s:s_axi_control:0x4:0x1C:uint:0}" % str(arg_id) + ) + arg_id += 1 + + # save kernel xml then run package_xo + xo_name = self.ip_name + ".xo" + xo_path = vivado_proj_dir + "/" + xo_name + model.set_metadata_prop("vitis_xo", xo_path) + + # generate the package_xo command in a tcl script + package_xo_string = ( + "package_xo -force -xo_path %s -kernel_name %s -ip_directory %s" + % (xo_path, self.ip_name, stitched_ip_dir) + ) + for arg in args_string: + package_xo_string += " -kernel_xml_args " + arg + with open(vivado_proj_dir + "/gen_xo.tcl", "w") as f: + f.write(package_xo_string) + + # create a shell script and call Vivado + package_xo_sh = vivado_proj_dir + "/gen_xo.sh" + working_dir = os.environ["PWD"] + with open(package_xo_sh, "w") as f: + f.write("#!/bin/bash \n") + f.write("cd {}\n".format(vivado_proj_dir)) + f.write("vivado -mode batch -source gen_xo.tcl\n") + f.write("cd {}\n".format(working_dir)) + bash_command = ["bash", package_xo_sh] + process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE) + process_compile.communicate() + return (model, False) + + +class VitisLink(Transformation): + """Create an XCLBIN with Vitis. + + Outcome if successful: sets the xclbin attribute in the ONNX + ModelProto's metadata_props field with the XCLBIN full path as value. + """ + + def __init__(self, platform, f_mhz=200): + super().__init__() + self.platform = platform + self.f_mhz = f_mhz + + def apply(self, model): + + # create a config file and empty list of xo files + config = ["[connectivity]"] + object_files = [] + idma_idx = 0 + odma_idx = 0 + instance_names = {} + for node in model.graph.node: + assert node.op_type == "StreamingDataflowPartition", "Invalid link graph" + sdp_node = getCustomOp(node) + dataflow_model_filename = sdp_node.get_nodeattr("model") + kernel_model = ModelWrapper(dataflow_model_filename) + kernel_xo = kernel_model.get_metadata_prop("vitis_xo") + object_files.append(kernel_xo) + # gather info on connectivity + # assume each node connected to outputs/inputs is DMA: + # has axis, aximm and axilite + # everything else is axis-only + # assume only one connection from each ip to the next + # all aximm allocated to DDR[0] + # all kernels allocated to SLR0 + producer = model.find_producer(node.input[0]) + consumer = model.find_consumers(node.output[0]) + # define kernel instances + # name kernels connected to graph inputs as idmaxx + # name kernels connected to graph inputs as odmaxx + if producer is None: + instance_names[node.name] = "idma" + str(idma_idx) + config.append("nk=%s:1:%s" % (node.name, instance_names[node.name])) + idma_idx += 1 + elif consumer is None: + instance_names[node.name] = "odma" + str(odma_idx) + config.append("nk=%s:1:%s" % (node.name, instance_names[node.name])) + odma_idx += 1 + else: + instance_names[node.name] = node.name + config.append("nk=%s:1:%s" % (node.name, instance_names[node.name])) + # assign SLRs + config.append("slr=%s:SLR0" % instance_names[node.name]) + # assign memory banks + if producer is None or consumer is None: + config.append( + "sp=%s.m_axi_gmem0:DDR[%d]" % (instance_names[node.name], 0) + ) + # connect streams + if producer is not None: + for i in range(len(node.input)): + producer = model.find_producer(node.input[i]) + if producer is not None: + j = list(producer.output).index(node.input[i]) + config.append( + "stream_connect=%s.m_axis_%d:%s.s_axis_%d" + % ( + instance_names[producer.name], + j, + instance_names[node.name], + i, + ) + ) + + # create a temporary folder for the project + link_dir = make_build_dir(prefix="vitis_link_proj_") + model.set_metadata_prop("vitis_link_proj", link_dir) + + config = "\n".join(config) + "\n" + with open(link_dir + "/config.txt", "w") as f: + f.write(config) + + # create a shell script and call Vitis + script = link_dir + "/run_vitis_link.sh" + working_dir = os.environ["PWD"] + with open(script, "w") as f: + f.write("#!/bin/bash \n") + f.write("cd {}\n".format(link_dir)) + f.write( + "v++ -t hw --platform %s --link %s" + " --kernel_frequency %d --config config.txt\n" + % (self.platform, " ".join(object_files), self.f_mhz) + ) + f.write("cd {}\n".format(working_dir)) + bash_command = ["bash", script] + process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE) + process_compile.communicate() + return (model, False) + + +class VitisBuild(Transformation): + """Best-effort attempt at building the accelerator with Vitis.""" + + def __init__(self, fpga_part, period_ns, platform): + super().__init__() + self.fpga_part = fpga_part + self.period_ns = period_ns + self.platform = platform + + def apply(self, model): + # first infer layouts + model = model.transform(InferDataLayouts()) + # prepare at global level, then break up into kernels + prep_transforms = [ + MakePYNQDriver(), + InsertIODMA(512), + InsertDWC(), + Floorplan(), + CreateDataflowPartition(), + ] + for trn in prep_transforms: + model = model.transform(trn) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(GiveReadableTensorNames()) + # Build each kernel individually + sdp_nodes = model.get_nodes_by_op_type("StreamingDataflowPartition") + for sdp_node in sdp_nodes: + sdp_node = getCustomOp(sdp_node) + dataflow_model_filename = sdp_node.get_nodeattr("model") + kernel_model = ModelWrapper(dataflow_model_filename) + kernel_model = kernel_model.transform(InsertFIFO()) + kernel_model = kernel_model.transform( + InsertTLastMarker(both=True, external=False, dynamic=False) + ) + kernel_model = kernel_model.transform(GiveUniqueNodeNames()) + kernel_model.save(dataflow_model_filename) + kernel_model = kernel_model.transform( + PrepareIP(self.fpga_part, self.period_ns) + ) + kernel_model = kernel_model.transform(HLSSynthIP()) + kernel_model = kernel_model.transform(ReplaceVerilogRelPaths()) + kernel_model = kernel_model.transform( + CreateStitchedIP( + self.fpga_part, self.period_ns, sdp_node.onnx_node.name, True + ) + ) + kernel_model = kernel_model.transform( + CreateVitisXO(sdp_node.onnx_node.name) + ) + kernel_model.save(dataflow_model_filename) + # Assemble design from kernels + model = model.transform(VitisLink(self.platform, round(1000 / self.period_ns))) + + return (model, False) diff --git a/src/finn/util/basic.py b/src/finn/util/basic.py index 4a8277e08d3fc21e0b20668edf2ecad947b36647..91ff811069369383099f5ae5aebf3228fbdbaae5 100644 --- a/src/finn/util/basic.py +++ b/src/finn/util/basic.py @@ -51,6 +51,19 @@ pynq_native_port_width["Pynq-Z2"] = 64 pynq_native_port_width["Ultra96"] = 128 pynq_native_port_width["ZCU104"] = 128 +# Alveo device and platform mappings +alveo_part_map = dict() +alveo_part_map["U50"] = "xcu50-fsvh2104-2L-e" +alveo_part_map["U200"] = "xcu200-fsgd2104-2-e" +alveo_part_map["U250"] = "xcu250-figd2104-2L-e" +alveo_part_map["U280"] = "xcu280-fsvh2892-2L-e" + +alveo_default_platform = dict() +alveo_default_platform["U50"] = "xilinx_u50_gen3x16_xdma_201920_3" +alveo_default_platform["U200"] = "xilinx_u200_xdma_201830_2" +alveo_default_platform["U250"] = "xilinx_u250_xdma_201830_2" +alveo_default_platform["U280"] = "xilinx_u280_xdma_201920_3" + def get_rtlsim_trace_depth(): """Return the trace depth for rtlsim via PyVerilator. Controllable diff --git a/tests/fpgadataflow/test_fpgadataflow_ipstitch.py b/tests/fpgadataflow/test_fpgadataflow_ipstitch.py index a9f5bf5ffa1f816b82ef701800e92249056b7c74..4118976933b71fac54b1123c40c70d14fbbd318d 100644 --- a/tests/fpgadataflow/test_fpgadataflow_ipstitch.py +++ b/tests/fpgadataflow/test_fpgadataflow_ipstitch.py @@ -50,13 +50,19 @@ from finn.transformation.fpgadataflow.make_pynq_proj import MakePYNQProject from finn.transformation.fpgadataflow.synth_pynq_proj import SynthPYNQProject import finn.transformation.fpgadataflow.replace_verilog_relpaths as rvp from finn.transformation.general import GiveUniqueNodeNames -from finn.util.basic import gen_finn_dt_tensor, pynq_part_map +from finn.util.basic import ( + gen_finn_dt_tensor, + pynq_part_map, + alveo_part_map, + alveo_default_platform, +) from finn.util.fpgadataflow import pyverilate_stitched_ip from finn.util.test import load_test_checkpoint_or_skip from finn.transformation.fpgadataflow.synth_ooc import SynthOutOfContext from finn.transformation.infer_data_layouts import InferDataLayouts from finn.transformation.fpgadataflow.insert_iodma import InsertIODMA from finn.transformation.fpgadataflow.floorplan import Floorplan +from finn.transformation.fpgadataflow.vitis_build import VitisBuild test_pynq_board = os.getenv("PYNQ_BOARD", default="Pynq-Z1") @@ -119,7 +125,7 @@ def create_one_fc_model(): return model -def create_two_fc_model(): +def create_two_fc_model(mem_mode="decoupled"): # create a model with two StreamingFCLayer instances wdt = DataType.INT2 idt = DataType.INT32 @@ -152,7 +158,7 @@ def create_two_fc_model(): ActVal=actval, binaryXnorMode=binary_xnor_mode, noActivation=no_act, - mem_mode="decoupled", + mem_mode=mem_mode, ) fc1 = helper.make_node( @@ -172,7 +178,7 @@ def create_two_fc_model(): ActVal=actval, binaryXnorMode=binary_xnor_mode, noActivation=no_act, - mem_mode="decoupled", + mem_mode=mem_mode, ) graph = helper.make_graph( @@ -247,35 +253,35 @@ def test_fpgadataflow_ipstitch_rtlsim(): model.set_metadata_prop("rtlsim_trace", "whole_trace.vcd") sim = pyverilate_stitched_ip(model) exp_io = [ - "ap_clk_0", - "ap_rst_n_0", - "in0_V_V_0_tdata", - "in0_V_V_0_tready", - "in0_V_V_0_tvalid", - "out_r_0_tdata", - "out_r_0_tkeep", - "out_r_0_tlast", - "out_r_0_tready", - "out_r_0_tvalid", - "s_axi_control_0_araddr", - "s_axi_control_0_arready", - "s_axi_control_0_arvalid", - "s_axi_control_0_awaddr", - "s_axi_control_0_awready", - "s_axi_control_0_awvalid", - "s_axi_control_0_bready", - "s_axi_control_0_bresp", - "s_axi_control_0_bvalid", - "s_axi_control_0_rdata", - "s_axi_control_0_rready", - "s_axi_control_0_rresp", - "s_axi_control_0_rvalid", - "s_axi_control_0_wdata", - "s_axi_control_0_wready", - "s_axi_control_0_wstrb", - "s_axi_control_0_wvalid", + "ap_clk", + "ap_rst_n", + "s_axis_0_tdata", + "s_axis_0_tready", + "s_axis_0_tvalid", + "m_axis_0_tdata", + "m_axis_0_tkeep", + "m_axis_0_tlast", + "m_axis_0_tready", + "m_axis_0_tvalid", + "s_axi_control_araddr", + "s_axi_control_arready", + "s_axi_control_arvalid", + "s_axi_control_awaddr", + "s_axi_control_awready", + "s_axi_control_awvalid", + "s_axi_control_bready", + "s_axi_control_bresp", + "s_axi_control_bvalid", + "s_axi_control_rdata", + "s_axi_control_rready", + "s_axi_control_rresp", + "s_axi_control_rvalid", + "s_axi_control_wdata", + "s_axi_control_wready", + "s_axi_control_wstrb", + "s_axi_control_wvalid", ] - assert dir(sim.io) == exp_io + assert sorted(dir(sim.io)) == sorted(exp_io) model.set_metadata_prop("exec_mode", "rtlsim") idt = model.get_tensor_datatype("inp") ishape = model.get_tensor_shape("inp") @@ -410,3 +416,24 @@ def test_fpgadataflow_ipstitch_iodma_floorplan(): assert getCustomOp(model.graph.node[1]).get_nodeattr("partition_id") == 2 assert getCustomOp(model.graph.node[2]).get_nodeattr("partition_id") == 1 model.save(ip_stitch_model_dir + "/test_fpgadataflow_ipstitch_iodma_floorplan.onnx") + + +# board +@pytest.mark.parametrize("board", ["U250"]) +# clock period +@pytest.mark.parametrize("period_ns", [5]) +# override mem_mode to external +@pytest.mark.parametrize("extw", [True, False]) +@pytest.mark.slow +@pytest.mark.vivado +def test_fpgadataflow_ipstitch_vitis(board, period_ns, extw): + platform = alveo_default_platform[board] + fpga_part = alveo_part_map[board] + model = create_two_fc_model("external" if extw else "decoupled") + if model.graph.node[0].op_type == "StreamingDataflowPartition": + sdp_node = getCustomOp(model.graph.node[0]) + assert sdp_node.__class__.__name__ == "StreamingDataflowPartition" + assert os.path.isfile(sdp_node.get_nodeattr("model")) + model = load_test_checkpoint_or_skip(sdp_node.get_nodeattr("model")) + model = model.transform(VitisBuild(fpga_part, period_ns, platform)) + model.save(ip_stitch_model_dir + "/test_fpgadataflow_ipstitch_vitis.onnx")