From 8af685dfab4ada64def37917c920623ea7d9e5d3 Mon Sep 17 00:00:00 2001
From: Felix Jentzsch <45395194+fpjentzsch@users.noreply.github.com>
Date: Thu, 4 Mar 2021 15:07:51 +0100
Subject: [PATCH] Fix AXI addressing for large runtime-writable weight arrays
 (#294)

* Assign BD address segments based on actual range

* Change minimum address range to 4k

* [Test] lfc end2end tests use runtime-wr weights instead of tfc

Co-authored-by: Yaman Umuroglu <yamanu@xilinx.com>
---
 .../fpgadataflow/make_zynq_proj.py              | 10 ++++++++++
 .../transformation/fpgadataflow/templates.py    | 17 +++++++++++++++++
 tests/end2end/test_end2end_bnn_pynq.py          |  2 +-
 3 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/src/finn/transformation/fpgadataflow/make_zynq_proj.py b/src/finn/transformation/fpgadataflow/make_zynq_proj.py
index 99c08dc05..1ac7ee178 100644
--- a/src/finn/transformation/fpgadataflow/make_zynq_proj.py
+++ b/src/finn/transformation/fpgadataflow/make_zynq_proj.py
@@ -173,6 +173,11 @@ class MakeZYNQProject(Transformation):
                     "[get_bd_intf_pins axi_interconnect_0/M%02d_AXI]"
                     % (instance_names[node.name], axilite_intf_name, axilite_idx)
                 )
+                # assign_bd_address with appropriate range/offset
+                config.append(
+                    "assign_axi_addr_proc %s/%s"
+                    % (instance_names[node.name], axilite_intf_name)
+                )
                 idma_idx += 1
                 aximm_idx += 1
                 axilite_idx += 1
@@ -188,6 +193,11 @@ class MakeZYNQProject(Transformation):
                         "[get_bd_intf_pins axi_interconnect_0/M%02d_AXI]"
                         % (instance_names[node.name], axilite_intf_name, axilite_idx)
                     )
+                    # assign_bd_address with appropriate range/offset
+                    config.append(
+                        "assign_axi_addr_proc %s/%s"
+                        % (instance_names[node.name], axilite_intf_name)
+                    )
                     axilite_idx += 1
 
             config.append(
diff --git a/src/finn/transformation/fpgadataflow/templates.py b/src/finn/transformation/fpgadataflow/templates.py
index 73beb62f0..9c0169a98 100644
--- a/src/finn/transformation/fpgadataflow/templates.py
+++ b/src/finn/transformation/fpgadataflow/templates.py
@@ -153,6 +153,7 @@ set_property -dict [list CONFIG.NUM_MI $NUM_AXILITE] [get_bd_cells axi_interconn
 
 #create reset controller and connect interconnects to PS
 if {$ZYNQ_TYPE == "zynq_us+"} {
+    set axi_peripheral_base 0xA0000000
     connect_bd_intf_net [get_bd_intf_pins smartconnect_0/M00_AXI] [get_bd_intf_pins zynq_ps/S_AXI_HP0_FPD]
     connect_bd_intf_net [get_bd_intf_pins zynq_ps/M_AXI_HPM0_FPD] -boundary_type upper [get_bd_intf_pins axi_interconnect_0/S00_AXI]
     #connect interconnect clocks and resets
@@ -160,6 +161,7 @@ if {$ZYNQ_TYPE == "zynq_us+"} {
     apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config { Clk {/zynq_ps/pl_clk0} Freq {} Ref_Clk0 {} Ref_Clk1 {} Ref_Clk2 {}}  [get_bd_pins axi_interconnect_0/S00_ACLK]
     apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config { Clk {/zynq_ps/pl_clk0} Freq {} Ref_Clk0 {} Ref_Clk1 {} Ref_Clk2 {}}  [get_bd_pins zynq_ps/saxihp0_fpd_aclk]
 } elseif {$ZYNQ_TYPE == "zynq_7000"} {
+    set axi_peripheral_base 0x40000000
     connect_bd_intf_net -boundary_type upper [get_bd_intf_pins zynq_ps/M_AXI_GP0] [get_bd_intf_pins axi_interconnect_0/S00_AXI]
     connect_bd_intf_net [get_bd_intf_pins smartconnect_0/M00_AXI] [get_bd_intf_pins zynq_ps/S_AXI_HP0]
     apply_bd_automation -rule xilinx.com:bd_rule:clkrst -config { Clk {/zynq_ps/FCLK_CLK0} Freq {} Ref_Clk0 {} Ref_Clk1 {} Ref_Clk2 {}}  [get_bd_pins axi_interconnect_0/ACLK]
@@ -168,6 +170,21 @@ if {$ZYNQ_TYPE == "zynq_us+"} {
 }
 connect_bd_net [get_bd_pins axi_interconnect_0/ARESETN] [get_bd_pins smartconnect_0/aresetn]
 
+#procedure used by below IP instantiations to map BD address segments based on the axi interface aperture
+proc assign_axi_addr_proc {axi_intf_path} {
+    #global variable holds current base address
+    global axi_peripheral_base
+    #infer range
+    set range [expr 2**[get_property CONFIG.ADDR_WIDTH [get_bd_intf_pins $axi_intf_path]]]
+    set range [expr $range < 4096 ? 4096 : $range]
+    #align base address to range
+    set offset [expr ($axi_peripheral_base + ($range-1)) & ~($range-1)]
+    #perform assignment
+    assign_bd_address [get_bd_addr_segs $axi_intf_path/Reg] -offset $offset -range $range
+    #advance base address
+    set axi_peripheral_base [expr $offset + $range]
+}
+
 #custom IP instantiations/connections start here
 %s
 
diff --git a/tests/end2end/test_end2end_bnn_pynq.py b/tests/end2end/test_end2end_bnn_pynq.py
index 5f54eeacf..ddea2dafc 100644
--- a/tests/end2end/test_end2end_bnn_pynq.py
+++ b/tests/end2end/test_end2end_bnn_pynq.py
@@ -140,7 +140,6 @@ def fold_tfc(model):
         fcl_inst.set_nodeattr("PE", pe)
         fcl_inst.set_nodeattr("SIMD", simd)
         fcl_inst.set_nodeattr("ram_style", ramstyle)
-        fcl_inst.set_nodeattr("runtime_writeable_weights", 1)
     # set parallelism for input quantizer to be same as first layer's SIMD
     inp_qnt_node = model.get_nodes_by_op_type("Thresholding_Batch")[0]
     inp_qnt = getCustomOp(inp_qnt_node)
@@ -164,6 +163,7 @@ def fold_lfc(model):
         fcl_inst.set_nodeattr("PE", pe)
         fcl_inst.set_nodeattr("SIMD", simd)
         fcl_inst.set_nodeattr("ram_style", ramstyle)
+        fcl_inst.set_nodeattr("runtime_writeable_weights", 1)
     # set parallelism for input quantizer to be same as first layer's SIMD
     inp_qnt_node = model.get_nodes_by_op_type("Thresholding_Batch")[0]
     inp_qnt = getCustomOp(inp_qnt_node)
-- 
GitLab