diff --git a/ACCL b/ACCL index 8c7e26f98ba8275ca5294fb7186ee8e9666ac786..914a8b520e62adbd4c02a8f50567928fe117d9c9 160000 --- a/ACCL +++ b/ACCL @@ -1 +1 @@ -Subproject commit 8c7e26f98ba8275ca5294fb7186ee8e9666ac786 +Subproject commit 914a8b520e62adbd4c02a8f50567928fe117d9c9 diff --git a/custom_hls/accl/funcs.hpp b/custom_hls/accl/funcs.hpp index 925506248c51106b2b2e6bcc2ce3489365880391..eb4c26f373cb0471103d9da76c740a95a5800d0e 100644 --- a/custom_hls/accl/funcs.hpp +++ b/custom_hls/accl/funcs.hpp @@ -1,9 +1,11 @@ -const size_t accl_width = 512; +#pragma once #ifdef CPPSIM #include <iostream> #endif +const size_t accl_width = 512; + template<unsigned int stream_width, unsigned int num_bits, unsigned int step> void accl_out( unsigned int destination, @@ -30,10 +32,10 @@ void accl_out( bool leftover = num_bits % accl_width != 0; int num_transfer_bits = ((num_bits + accl_width - 1) / accl_width) * accl_width; - accl.stream_put(num_transfer_bits / 32, 9, destination, 0, true); + accl.stream_put(num_transfer_bits / 32, 9, destination, 0, false); // TODO: Doing it like this is probably not optimal. It seems like we're reinventing a - // dwc here somehow. + // DWC here. send: for (int i = 0; i < num_bits - step + 1; i += step) { if (i % stream_width == 0) { stream_word = in.read(); diff --git a/docker/Dockerfile.finn b/docker/Dockerfile.finn index 52665c152ccf03888f9803f18987caf68dcd8b7a..8d04653be612dc5aeb08d64ec8baa213a4812e46 100644 --- a/docker/Dockerfile.finn +++ b/docker/Dockerfile.finn @@ -33,6 +33,11 @@ ARG XRT_DEB_VERSION="xrt_202220.2.14.354_22.04-amd64-xrt" WORKDIR /workspace +ENV http_proxy http://proxy.ethz.ch:3128 +ENV https_proxy http://proxy.ethz.ch:3128 +ENV HTTPS_PROXY http://proxy.ethz.ch:3128 +ENV HTTP_PROXY http://proxy.ethz.ch:3128 + # some Vitis deps require a timezone to be specified, which hangs in Docker # use workaround from https://grigorkh.medium.com/fix-tzdata-hangs-docker-image-build-cdb52cc3360d ENV TZ="Europe/Dublin" @@ -131,7 +136,7 @@ RUN git clone https://github.com/zeromq/zmqpp && \ make && \ make install PREFIX=/usr/ -RUN apt-get install -y gdb +RUN apt-get install -y gdb libboost-dev # extra environment variables for FINN compiler ENV VIVADO_IP_CACHE "/tmp/vivado_ip_cache" diff --git a/notebooks/end2end_example/cybersecurity/build_accl.py b/notebooks/end2end_example/cybersecurity/build_accl.py index 8b852a6cb0e5636031375c7d468c6b61d626e7a7..077231162f884aa76715d4c8fdb7017123f4ddf5 100644 --- a/notebooks/end2end_example/cybersecurity/build_accl.py +++ b/notebooks/end2end_example/cybersecurity/build_accl.py @@ -44,7 +44,6 @@ steps = [ "step_hls_ipgen", "step_set_fifo_depths", "step_create_stitched_ip", - "step_setup_accl_interface", ] cfg_splits = build.DataflowBuildConfig( @@ -54,16 +53,16 @@ cfg_splits = build.DataflowBuildConfig( mvau_wwidth_max = 80, target_fps = 1000000, synth_clk_period_ns = 10.0, - fpga_part = "xc7z020clg400-1", generate_outputs = [ build_cfg.DataflowOutputType.ESTIMATE_REPORTS, build_cfg.DataflowOutputType.STITCHED_IP, ], - verify_steps = [build_cfg.VerificationStepType.FOLDED_HLS_CPPSIM], - board = 'U250', - num_boards = 3, + stitched_ip_gen_dcp = True, + # verify_steps = [build_cfg.VerificationStepType.FOLDED_HLS_CPPSIM], + shell_flow_type = build_cfg.ShellFlowType.VITIS_ALVEO, + board = "U55C", + num_boards = 2, save_intermediate_models = True, - # start_step = 'step_setup_accl_interface', ) build.build_distributed_dataflow_cfg(model_file, cfg_splits) diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py index 72284cb8364dad48d968434a755bace36faa3ac9..8a4a3cfb3ab99b111e6f985e56f83a9a6bf5a63e 100644 --- a/src/finn/builder/build_dataflow_steps.py +++ b/src/finn/builder/build_dataflow_steps.py @@ -638,6 +638,7 @@ def step_create_stitched_ip(model: ModelWrapper, cfg: DataflowBuildConfig): cfg.synth_clk_period_ns, vitis=cfg.stitched_ip_gen_dcp, signature=cfg.signature, + accl_interface=True, ) ) # TODO copy all ip sources into output dir? as zip? diff --git a/src/finn/custom_op/fpgadataflow/accl.py b/src/finn/custom_op/fpgadataflow/accl.py index 5ea1fca1ee0fd00f41724e625fe41a9a307af517..651b6071c6e40dc842af77000461168d70223113 100644 --- a/src/finn/custom_op/fpgadataflow/accl.py +++ b/src/finn/custom_op/fpgadataflow/accl.py @@ -364,7 +364,6 @@ class ACCLOut(ACCLOp): return intf_names - class ACCLIn(ACCLOp): def get_instream_width(self, ind=0): return accl_word_size diff --git a/src/finn/transformation/fpgadataflow/assign_partition_ids.py b/src/finn/transformation/fpgadataflow/assign_partition_ids.py index f65e6d416d0d9f5ccd1b739ce3bba55670472d02..aeccf6c2c998958ce71bce614aedfe73e5fee54e 100644 --- a/src/finn/transformation/fpgadataflow/assign_partition_ids.py +++ b/src/finn/transformation/fpgadataflow/assign_partition_ids.py @@ -16,7 +16,7 @@ class AssignPartitionIDs(Transformation): self.target_platform, self.ndevices, # TODO: Remove this after testing - abs_anchors=[(0, [3]), (1, [7]), (2, [11])] + abs_anchors=[(0, [3]), (1, [7])] ) if floorplans is None: diff --git a/src/finn/transformation/fpgadataflow/create_stitched_ip.py b/src/finn/transformation/fpgadataflow/create_stitched_ip.py index 23ab4a2f77bc1cddc769bf8c91e394519d97a3f8..94c31aaddae3463d076b55013983e45935ba80b6 100644 --- a/src/finn/transformation/fpgadataflow/create_stitched_ip.py +++ b/src/finn/transformation/fpgadataflow/create_stitched_ip.py @@ -86,7 +86,7 @@ class CreateStitchedIP(Transformation): The packaged block design IP can be found under the ip subdirectory. """ - def __init__(self, fpgapart, clk_ns, ip_name="finn_design", vitis=False, signature=[]): + def __init__(self, fpgapart, clk_ns, ip_name="finn_design", vitis=False, signature=[], accl_interface=False): super().__init__() self.fpgapart = fpgapart self.clk_ns = clk_ns @@ -101,6 +101,7 @@ class CreateStitchedIP(Transformation): self.clock_reset_are_external = False self.create_cmds = [] self.connect_cmds = [] + self.accl_interface = accl_interface # keep track of top-level interface names self.intf_names = { "clk": [], @@ -278,6 +279,53 @@ class CreateStitchedIP(Transformation): self.connect_cmds.append("set_property name s_axilite_info [get_bd_intf_ports s_axi_0]") self.connect_cmds.append("assign_bd_address") + def setup_accl_interface(self, model): + has_accl_in = any(node.op_type == "ACCLIn" for node in model.graph.node) + + unused_src = None + unused_sink = None + + if has_accl_in: + tcl.append("set_property name data_from_cclo [get_bd_intf_ports s_axis_0]") + tcl.append("create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 s_axis_0") + unused_src = "s_axis_0" + else: + tcl.append("create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 data_from_cclo") + unused_src = "data_from_cclo" + + accl_out_node = None + for node in model.graph.node: + if node.op_type == "ACCLOut": + accl_out_node = node + break + + if accl_out_node is not None: + tcl.append("set_property name data_to_cclo [get_bd_intf_ports m_axis_0]") + tcl.append("create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 m_axis_0") + + # TODO: In a case where we have multiple nodes that access this interface we + # need to add an arbiter for these and the data streams. + tcl += [ + "make_bd_intf_pins_external [get_bd_intf_pins {}/{}]".format( + accl_out_node.name, + pin_name + ) + for pin_name in ["cmd_to_cclo", "sts_from_cclo", "s_axi_control"] + ] + + tcl.append("create_bd_cell -type ip -vlnv xilinx.com:ip:xlconstant:1.1 xlconstant_0") + tcl.append("connect_bd_net [get_bd_pins xlconstant_0/dout] [get_bd_pins {}/wait_for_ack]".format(accl_out_node.name)) + else: + tcl.append("create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 cmd_to_cclo") + tcl.append("create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 sts_from_cclo") + tcl.append("create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 data_to_cclo") + + tie_off_cmd = accl_out_node is not None + + def tie_off(a, b): + + + def apply(self, model): # ensure non-relative readmemh .dat files model = model.transform(ReplaceVerilogRelPaths()) @@ -353,6 +401,9 @@ class CreateStitchedIP(Transformation): checksum_layers = model.get_nodes_by_op_type("checksum") self.insert_signature(len(checksum_layers)) + if self.accl_interface: + self.setup_accl_interface(model) + # create a temporary folder for the project prjname = "finn_vivado_stitch_proj" vivado_stitch_proj_dir = make_build_dir(prefix="vivado_stitch_proj_") @@ -392,6 +443,7 @@ class CreateStitchedIP(Transformation): tcl.append("add_files -norecurse %s" % wrapper_filename) model.set_metadata_prop("wrapper_filename", wrapper_filename) tcl.append("set_property top %s_wrapper [current_fileset]" % block_name) + # synthesize to DCP and export stub, DCP and constraints if self.vitis: tcl.append( @@ -415,6 +467,7 @@ class CreateStitchedIP(Transformation): "report_utilization -hierarchical -hierarchical_depth 5 " "-file %s_partition_util.rpt" % block_name ) + # export block design itself as an IP core block_vendor = "xilinx_finn" block_library = "finn" diff --git a/src/finn/util/basic.py b/src/finn/util/basic.py index 05f748d3bb5a7d1ed049def0bfb99459515cfb1b..86a06b045312b47f33eee7e29f34ed51cf4f9490 100644 --- a/src/finn/util/basic.py +++ b/src/finn/util/basic.py @@ -60,12 +60,14 @@ alveo_part_map["U50"] = "xcu50-fsvh2104-2L-e" alveo_part_map["U200"] = "xcu200-fsgd2104-2-e" alveo_part_map["U250"] = "xcu250-figd2104-2L-e" alveo_part_map["U280"] = "xcu280-fsvh2892-2L-e" +alveo_part_map["U55C"] = "xcu55c-fsvh2892-2L-e" alveo_default_platform = dict() alveo_default_platform["U50"] = "xilinx_u50_gen3x16_xdma_5_202210_1" alveo_default_platform["U200"] = "xilinx_u200_gen3x16_xdma_2_202110_1" alveo_default_platform["U250"] = "xilinx_u250_gen3x16_xdma_4_1_202210_1" alveo_default_platform["U280"] = "xilinx_u280_gen3x16_xdma_1_202211_1" +alveo_default_platform["U55C"] = "xilinx_u55c_gen3x16_xdma_3_202210_1" def get_rtlsim_trace_depth(): diff --git a/src/finn/util/platforms.py b/src/finn/util/platforms.py index 77dc59144549366277572dfd2985a5fcda1eed6d..2ffe3b82aa8a403176e0bd41c30d93dae53e2a4e 100644 --- a/src/finn/util/platforms.py +++ b/src/finn/util/platforms.py @@ -461,11 +461,45 @@ class Alveo_NxU280_Platform(Platform): ] +class Alveo_NxU55C_Platform(Platform): + def __init__( + self, + ndevices=1, + limits=DEFAULT_RES_LIMITS, + avg_constraints=DEFAULT_AVG_CONSTRAINTS, + ): + sll_counts = [[0, 5000, 0], [5000, 0, 5000], [0, 5000, 0]] + super(Alveo_NxU50_Platform, self).__init__( + nslr=2, + ndevices=ndevices, + sll_count=sll_counts, + ddr_slr=[], + hbm_slr=0, + eth_slr=2, + eth_gbps=100, + limits=limits, + avg_constraints=avg_constraints, + ) + + @property + def compute_resources(self): + # according to UG1120 + # return [[369000, 746000, 2*507, 320, 2733], + # [333000, 675000, 2*468, 320, 2877], + # [367000, 729000, 2*512, 320, 2880]] + # observed from Vivado: + return [ + [400800, 2 * 400800, 2 * 600, 320, 2736], + [382080, 2 * 382080, 2 * 576, 320, 2880], + [380640, 2 * 380640, 2 * 576, 320, 2880], + ] + platforms = dict() platforms["U50"] = Alveo_NxU50_Platform platforms["U200"] = Alveo_NxU200_Platform platforms["U250"] = Alveo_NxU250_Platform platforms["U280"] = Alveo_NxU280_Platform +platforms["U55C"] = Alveo_NxU55C_Platform platforms["Pynq-Z1"] = Zynq7020_Platform platforms["Pynq-Z2"] = Zynq7020_Platform platforms["Ultra96"] = ZU3EG_Platform