From d11636ed5b4c14dde69f2231bd84262983a32326 Mon Sep 17 00:00:00 2001
From: Georg Streich <streichg@hacc-build-01.inf.ethz.ch>
Date: Thu, 9 Nov 2023 08:59:33 +0100
Subject: [PATCH] Fix issues with synthesis

---
 ACCL                                          |  2 +-
 custom_hls/accl/funcs.hpp                     |  8 ++-
 docker/Dockerfile.finn                        |  7 ++-
 .../cybersecurity/build_accl.py               | 11 ++--
 src/finn/builder/build_dataflow_steps.py      |  1 +
 src/finn/custom_op/fpgadataflow/accl.py       |  1 -
 .../fpgadataflow/assign_partition_ids.py      |  2 +-
 .../fpgadataflow/create_stitched_ip.py        | 55 ++++++++++++++++++-
 src/finn/util/basic.py                        |  2 +
 src/finn/util/platforms.py                    | 34 ++++++++++++
 10 files changed, 109 insertions(+), 14 deletions(-)

diff --git a/ACCL b/ACCL
index 8c7e26f98..914a8b520 160000
--- a/ACCL
+++ b/ACCL
@@ -1 +1 @@
-Subproject commit 8c7e26f98ba8275ca5294fb7186ee8e9666ac786
+Subproject commit 914a8b520e62adbd4c02a8f50567928fe117d9c9
diff --git a/custom_hls/accl/funcs.hpp b/custom_hls/accl/funcs.hpp
index 925506248..eb4c26f37 100644
--- a/custom_hls/accl/funcs.hpp
+++ b/custom_hls/accl/funcs.hpp
@@ -1,9 +1,11 @@
-const size_t accl_width = 512;
+#pragma once
 
 #ifdef CPPSIM
 #include <iostream>
 #endif
 
+const size_t accl_width = 512;
+
 template<unsigned int stream_width, unsigned int num_bits, unsigned int step>
 void accl_out(
     unsigned int destination,
@@ -30,10 +32,10 @@ void accl_out(
     bool leftover = num_bits % accl_width != 0;
     int num_transfer_bits = ((num_bits + accl_width - 1) / accl_width) * accl_width;
 
-    accl.stream_put(num_transfer_bits / 32, 9, destination, 0, true);
+    accl.stream_put(num_transfer_bits / 32, 9, destination, 0, false);
 
     // TODO: Doing it like this is probably not optimal. It seems like we're reinventing a
-    // dwc here somehow.
+    // DWC here.
     send: for (int i = 0; i < num_bits - step + 1; i += step) {
         if (i % stream_width == 0) {
             stream_word = in.read();
diff --git a/docker/Dockerfile.finn b/docker/Dockerfile.finn
index 52665c152..8d04653be 100644
--- a/docker/Dockerfile.finn
+++ b/docker/Dockerfile.finn
@@ -33,6 +33,11 @@ ARG XRT_DEB_VERSION="xrt_202220.2.14.354_22.04-amd64-xrt"
 
 WORKDIR /workspace
 
+ENV http_proxy http://proxy.ethz.ch:3128
+ENV https_proxy http://proxy.ethz.ch:3128
+ENV HTTPS_PROXY http://proxy.ethz.ch:3128
+ENV HTTP_PROXY http://proxy.ethz.ch:3128
+
 # some Vitis deps require a timezone to be specified, which hangs in Docker
 # use workaround from https://grigorkh.medium.com/fix-tzdata-hangs-docker-image-build-cdb52cc3360d
 ENV TZ="Europe/Dublin"
@@ -131,7 +136,7 @@ RUN git clone https://github.com/zeromq/zmqpp && \
     make && \
     make install PREFIX=/usr/
 
-RUN apt-get install -y gdb
+RUN apt-get install -y gdb libboost-dev
 
 # extra environment variables for FINN compiler
 ENV VIVADO_IP_CACHE "/tmp/vivado_ip_cache"
diff --git a/notebooks/end2end_example/cybersecurity/build_accl.py b/notebooks/end2end_example/cybersecurity/build_accl.py
index 8b852a6cb..077231162 100644
--- a/notebooks/end2end_example/cybersecurity/build_accl.py
+++ b/notebooks/end2end_example/cybersecurity/build_accl.py
@@ -44,7 +44,6 @@ steps = [
     "step_hls_ipgen",
     "step_set_fifo_depths",
     "step_create_stitched_ip",
-    "step_setup_accl_interface",
 ]
 
 cfg_splits = build.DataflowBuildConfig(
@@ -54,16 +53,16 @@ cfg_splits = build.DataflowBuildConfig(
     mvau_wwidth_max     = 80,
     target_fps          = 1000000,
     synth_clk_period_ns = 10.0,
-    fpga_part           = "xc7z020clg400-1",
     generate_outputs    = [
         build_cfg.DataflowOutputType.ESTIMATE_REPORTS,
         build_cfg.DataflowOutputType.STITCHED_IP,
     ],
-    verify_steps        = [build_cfg.VerificationStepType.FOLDED_HLS_CPPSIM],
-    board               = 'U250',
-    num_boards          = 3,
+    stitched_ip_gen_dcp = True,
+    # verify_steps        = [build_cfg.VerificationStepType.FOLDED_HLS_CPPSIM],
+    shell_flow_type     = build_cfg.ShellFlowType.VITIS_ALVEO,
+    board               = "U55C",
+    num_boards          = 2,
     save_intermediate_models = True,
-    # start_step          = 'step_setup_accl_interface',
 )
 
 build.build_distributed_dataflow_cfg(model_file, cfg_splits)
diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py
index 72284cb83..8a4a3cfb3 100644
--- a/src/finn/builder/build_dataflow_steps.py
+++ b/src/finn/builder/build_dataflow_steps.py
@@ -638,6 +638,7 @@ def step_create_stitched_ip(model: ModelWrapper, cfg: DataflowBuildConfig):
                 cfg.synth_clk_period_ns,
                 vitis=cfg.stitched_ip_gen_dcp,
                 signature=cfg.signature,
+                accl_interface=True,
             )
         )
         # TODO copy all ip sources into output dir? as zip?
diff --git a/src/finn/custom_op/fpgadataflow/accl.py b/src/finn/custom_op/fpgadataflow/accl.py
index 5ea1fca1e..651b6071c 100644
--- a/src/finn/custom_op/fpgadataflow/accl.py
+++ b/src/finn/custom_op/fpgadataflow/accl.py
@@ -364,7 +364,6 @@ class ACCLOut(ACCLOp):
 
         return intf_names
 
-
 class ACCLIn(ACCLOp):
     def get_instream_width(self, ind=0):
         return accl_word_size
diff --git a/src/finn/transformation/fpgadataflow/assign_partition_ids.py b/src/finn/transformation/fpgadataflow/assign_partition_ids.py
index f65e6d416..aeccf6c2c 100644
--- a/src/finn/transformation/fpgadataflow/assign_partition_ids.py
+++ b/src/finn/transformation/fpgadataflow/assign_partition_ids.py
@@ -16,7 +16,7 @@ class AssignPartitionIDs(Transformation):
             self.target_platform,
             self.ndevices,
             # TODO: Remove this after testing
-            abs_anchors=[(0, [3]), (1, [7]), (2, [11])]
+            abs_anchors=[(0, [3]), (1, [7])]
         )
 
         if floorplans is None:
diff --git a/src/finn/transformation/fpgadataflow/create_stitched_ip.py b/src/finn/transformation/fpgadataflow/create_stitched_ip.py
index 23ab4a2f7..94c31aadd 100644
--- a/src/finn/transformation/fpgadataflow/create_stitched_ip.py
+++ b/src/finn/transformation/fpgadataflow/create_stitched_ip.py
@@ -86,7 +86,7 @@ class CreateStitchedIP(Transformation):
     The packaged block design IP can be found under the ip subdirectory.
     """
 
-    def __init__(self, fpgapart, clk_ns, ip_name="finn_design", vitis=False, signature=[]):
+    def __init__(self, fpgapart, clk_ns, ip_name="finn_design", vitis=False, signature=[], accl_interface=False):
         super().__init__()
         self.fpgapart = fpgapart
         self.clk_ns = clk_ns
@@ -101,6 +101,7 @@ class CreateStitchedIP(Transformation):
         self.clock_reset_are_external = False
         self.create_cmds = []
         self.connect_cmds = []
+        self.accl_interface = accl_interface
         # keep track of top-level interface names
         self.intf_names = {
             "clk": [],
@@ -278,6 +279,53 @@ class CreateStitchedIP(Transformation):
         self.connect_cmds.append("set_property name s_axilite_info [get_bd_intf_ports s_axi_0]")
         self.connect_cmds.append("assign_bd_address")
 
+    def setup_accl_interface(self, model):
+        has_accl_in = any(node.op_type == "ACCLIn" for node in model.graph.node)
+
+        unused_src = None
+        unused_sink = None
+
+        if has_accl_in:
+            tcl.append("set_property name data_from_cclo [get_bd_intf_ports s_axis_0]")
+            tcl.append("create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 s_axis_0")
+            unused_src = "s_axis_0"
+        else:
+            tcl.append("create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 data_from_cclo")
+            unused_src = "data_from_cclo"
+
+        accl_out_node = None
+        for node in model.graph.node:
+            if node.op_type == "ACCLOut":
+                accl_out_node = node
+                break
+
+        if accl_out_node is not None:
+            tcl.append("set_property name data_to_cclo [get_bd_intf_ports m_axis_0]")
+            tcl.append("create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 m_axis_0")
+
+            # TODO: In a case where we have multiple nodes that access this interface we
+            # need to add an arbiter for these and the data streams.
+            tcl += [
+                "make_bd_intf_pins_external [get_bd_intf_pins {}/{}]".format(
+                    accl_out_node.name,
+                    pin_name
+                )
+                for pin_name in ["cmd_to_cclo", "sts_from_cclo", "s_axi_control"]
+            ]
+
+            tcl.append("create_bd_cell -type ip -vlnv xilinx.com:ip:xlconstant:1.1 xlconstant_0")
+            tcl.append("connect_bd_net [get_bd_pins xlconstant_0/dout] [get_bd_pins {}/wait_for_ack]".format(accl_out_node.name))
+        else:
+            tcl.append("create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 cmd_to_cclo")
+            tcl.append("create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 sts_from_cclo")
+            tcl.append("create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 data_to_cclo")
+
+        tie_off_cmd = accl_out_node is not None
+
+        def tie_off(a, b):
+
+
+
     def apply(self, model):
         # ensure non-relative readmemh .dat files
         model = model.transform(ReplaceVerilogRelPaths())
@@ -353,6 +401,9 @@ class CreateStitchedIP(Transformation):
             checksum_layers = model.get_nodes_by_op_type("checksum")
             self.insert_signature(len(checksum_layers))
 
+        if self.accl_interface:
+            self.setup_accl_interface(model)
+
         # create a temporary folder for the project
         prjname = "finn_vivado_stitch_proj"
         vivado_stitch_proj_dir = make_build_dir(prefix="vivado_stitch_proj_")
@@ -392,6 +443,7 @@ class CreateStitchedIP(Transformation):
         tcl.append("add_files -norecurse %s" % wrapper_filename)
         model.set_metadata_prop("wrapper_filename", wrapper_filename)
         tcl.append("set_property top %s_wrapper [current_fileset]" % block_name)
+
         # synthesize to DCP and export stub, DCP and constraints
         if self.vitis:
             tcl.append(
@@ -415,6 +467,7 @@ class CreateStitchedIP(Transformation):
                 "report_utilization -hierarchical -hierarchical_depth 5 "
                 "-file %s_partition_util.rpt" % block_name
             )
+
         # export block design itself as an IP core
         block_vendor = "xilinx_finn"
         block_library = "finn"
diff --git a/src/finn/util/basic.py b/src/finn/util/basic.py
index 05f748d3b..86a06b045 100644
--- a/src/finn/util/basic.py
+++ b/src/finn/util/basic.py
@@ -60,12 +60,14 @@ alveo_part_map["U50"] = "xcu50-fsvh2104-2L-e"
 alveo_part_map["U200"] = "xcu200-fsgd2104-2-e"
 alveo_part_map["U250"] = "xcu250-figd2104-2L-e"
 alveo_part_map["U280"] = "xcu280-fsvh2892-2L-e"
+alveo_part_map["U55C"] = "xcu55c-fsvh2892-2L-e"
 
 alveo_default_platform = dict()
 alveo_default_platform["U50"] = "xilinx_u50_gen3x16_xdma_5_202210_1"
 alveo_default_platform["U200"] = "xilinx_u200_gen3x16_xdma_2_202110_1"
 alveo_default_platform["U250"] = "xilinx_u250_gen3x16_xdma_4_1_202210_1"
 alveo_default_platform["U280"] = "xilinx_u280_gen3x16_xdma_1_202211_1"
+alveo_default_platform["U55C"] = "xilinx_u55c_gen3x16_xdma_3_202210_1"
 
 
 def get_rtlsim_trace_depth():
diff --git a/src/finn/util/platforms.py b/src/finn/util/platforms.py
index 77dc59144..2ffe3b82a 100644
--- a/src/finn/util/platforms.py
+++ b/src/finn/util/platforms.py
@@ -461,11 +461,45 @@ class Alveo_NxU280_Platform(Platform):
         ]
 
 
+class Alveo_NxU55C_Platform(Platform):
+    def __init__(
+        self,
+        ndevices=1,
+        limits=DEFAULT_RES_LIMITS,
+        avg_constraints=DEFAULT_AVG_CONSTRAINTS,
+    ):
+        sll_counts = [[0, 5000, 0], [5000, 0, 5000], [0, 5000, 0]]
+        super(Alveo_NxU50_Platform, self).__init__(
+            nslr=2,
+            ndevices=ndevices,
+            sll_count=sll_counts,
+            ddr_slr=[],
+            hbm_slr=0,
+            eth_slr=2,
+            eth_gbps=100,
+            limits=limits,
+            avg_constraints=avg_constraints,
+        )
+
+    @property
+    def compute_resources(self):
+        # according to UG1120
+        # return [[369000, 746000, 2*507, 320, 2733],
+        #        [333000, 675000, 2*468, 320, 2877],
+        #        [367000, 729000, 2*512, 320, 2880]]
+        # observed from Vivado:
+        return [
+            [400800, 2 * 400800, 2 * 600, 320, 2736],
+            [382080, 2 * 382080, 2 * 576, 320, 2880],
+            [380640, 2 * 380640, 2 * 576, 320, 2880],
+        ]
+
 platforms = dict()
 platforms["U50"] = Alveo_NxU50_Platform
 platforms["U200"] = Alveo_NxU200_Platform
 platforms["U250"] = Alveo_NxU250_Platform
 platforms["U280"] = Alveo_NxU280_Platform
+platforms["U55C"] = Alveo_NxU55C_Platform
 platforms["Pynq-Z1"] = Zynq7020_Platform
 platforms["Pynq-Z2"] = Zynq7020_Platform
 platforms["Ultra96"] = ZU3EG_Platform
-- 
GitLab