diff --git a/src/finn/custom_op/fpgadataflow/__init__.py b/src/finn/custom_op/fpgadataflow/__init__.py index 65c898a8c453420ed96ca22715ef2595c5840288..7de6cce936ee54d58d9a526e926ff79dcd35b90d 100644 --- a/src/finn/custom_op/fpgadataflow/__init__.py +++ b/src/finn/custom_op/fpgadataflow/__init__.py @@ -308,6 +308,12 @@ class HLSCustomOp(CustomOp): f.close() self.code_gen_dict.clear() + def code_generation_ipi(self): + """Constructs and returns the TCL for node instantiation in Vivado IPI.""" + vlnv = self.get_nodeattr("ip_vlnv") + cmd = ["create_bd_cell -type ip -vlnv %s %s" % (vlnv, self.onnx_node.name)] + return cmd + def compile_singlenode_code(self): """Builds the bash script for compilation using the CppBuilder from finn.util.basic and executes the script to produce the executable.""" diff --git a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py index f666becdbcceca6ca202907610595f8c0069c5a0..5f1697f819d229d6d7c3b8907abcb541061ecbb3 100644 --- a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py +++ b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py @@ -51,6 +51,10 @@ class StreamingDataWidthConverter_Batch(HLSCustomOp): "outWidth": ("i", True, 0), # FINN DataTypes for inputs/outputs "dataType": ("s", True, ""), + # Toggle between hls or IPI implementation + # hls - use the hls generated IP during stitching + # vivado - use the AXI Infrastructure DWC + "impl_style": ("s", False, "hls"), } my_attrs.update(super().get_nodeattr_types()) return my_attrs @@ -381,3 +385,65 @@ class StreamingDataWidthConverter_Batch(HLSCustomOp): exp_shape ), """Output shape doesn't match expected shape, should be same as input shape""" + + def code_generation_ipi(self): + impl_style = self.get_nodeattr("impl_style") + if impl_style == "hls": + return super().code_generation_ipi() + elif impl_style == "vivado": + cmd = [] + node_name = self.onnx_node.name + # create a hierarchy for this layer, with the same port names + clk_name = self.get_verilog_top_module_intf_names()["clk"][0] + rst_name = self.get_verilog_top_module_intf_names()["rst"][0] + dout_name = self.get_verilog_top_module_intf_names()["m_axis"][0] + din_name = self.get_verilog_top_module_intf_names()["s_axis"][0] + cmd.append("create_bd_cell -type hier %s" % node_name) + cmd.append("create_bd_pin -dir I -type clk /%s/%s" % (node_name, clk_name)) + cmd.append("create_bd_pin -dir I -type rst /%s/%s" % (node_name, rst_name)) + cmd.append( + "create_bd_intf_pin -mode Master " + "-vlnv xilinx.com:interface:axis_rtl:1.0 /%s/%s" + % (node_name, dout_name) + ) + cmd.append( + "create_bd_intf_pin -mode Slave " + "-vlnv xilinx.com:interface:axis_rtl:1.0 /%s/%s" % (node_name, din_name) + ) + # instantiate and configure DWC + cmd.append( + "create_bd_cell -type ip " + "-vlnv xilinx.com:ip:axis_dwidth_converter:1.1 /%s/dwc" % node_name + ) + cmd.append( + "set_property -dict " + "[list CONFIG.S_TDATA_NUM_BYTES.VALUE_SRC PROPAGATED] " + "[get_bd_cells /%s/dwc]" % node_name + ) + cmd.append( + "set_property -dict " + "[list CONFIG.M_TDATA_NUM_BYTES {%d}] [get_bd_cells /%s/dwc]" + % (np.ceil(self.get_outstream_width() / 8), node_name) + ) + cmd.append( + "connect_bd_intf_net [get_bd_intf_pins %s/dwc/M_AXIS] " + "[get_bd_intf_pins %s/%s]" % (node_name, node_name, dout_name) + ) + cmd.append( + "connect_bd_intf_net [get_bd_intf_pins %s/dwc/S_AXIS] " + "[get_bd_intf_pins %s/%s]" % (node_name, node_name, din_name) + ) + cmd.append( + "connect_bd_net [get_bd_pins %s/%s] [get_bd_pins %s/dwc/aresetn]" + % (node_name, rst_name, node_name) + ) + cmd.append( + "connect_bd_net [get_bd_pins %s/%s] [get_bd_pins %s/dwc/aclk]" + % (node_name, clk_name, node_name) + ) + return cmd + else: + raise Exception( + "DWC implementation style %s not supported, please use hls or vivado" + % impl_style + ) diff --git a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py index de640f22ae87c89721f8b7ed2b3b270b54000ebb..87f52eeea591ba42bf5374df3c93bcc3e4f8e944 100644 --- a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py +++ b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py @@ -28,9 +28,6 @@ import math import os -import subprocess -from shutil import copy - import numpy as np from onnx import TensorProto, helper @@ -100,23 +97,6 @@ class StreamingFCLayer_Batch(HLSCustomOp): my_attrs.update(super().get_nodeattr_types()) return my_attrs - def get_verilog_top_module_name(self): - "Return the Verilog top module name for this node." - - node = self.onnx_node - # set top name depending on mem_mode - mem_mode = self.get_nodeattr("mem_mode") - if mem_mode == "const" or mem_mode == "external": - prefixed_top_name = "%s_%s" % (node.name, node.name) - elif mem_mode == "decoupled": - prefixed_top_name = "%s_memstream" % (node.name) - else: - raise Exception( - """Please set mem_mode to "const", "decoupled", or "external", - currently no other parameter value is supported!""" - ) - return prefixed_top_name - def calc_wmem(self): """Calculates and returns WMEM.""" mw = self.get_nodeattr("MW") @@ -705,7 +685,7 @@ class StreamingFCLayer_Batch(HLSCustomOp): ) super().reset_rtlsim(sim) super().toggle_clk(sim) - if mem_mode == "external": + if mem_mode == "external" or mem_mode == "decoupled": wnbits = self.get_weightstream_width() export_wdt = self.get_weight_datatype() # we have converted bipolar weights to binary for export, @@ -1008,112 +988,96 @@ class StreamingFCLayer_Batch(HLSCustomOp): ) ) - def code_generation_ipgen(self, model, fpgapart, clk): - # generate code for all mem_mode of MVAU/FCLayer unit - super().code_generation_ipgen(model, fpgapart, clk) - - # if mem_mode = "decoupled" generate code for verilog wrapper + def code_generation_ipi(self): + cmd = [] + # add streamer if needed mem_mode = self.get_nodeattr("mem_mode") if mem_mode == "decoupled": - # empty code gen dictionary for new entries - self.code_gen_dict.clear() - self.code_gen_dict["$TOPNAME$"] = [ - "{}_memstream".format(self.onnx_node.name) - ] - self.code_gen_dict["$LAYER_NAME$"] = [ - "{}_{}".format(self.onnx_node.name, self.onnx_node.name) - ] - # make instream width a multiple of 8 for AXI stream interface - in_width = self.get_instream_width_padded() - self.code_gen_dict["$IN_RANGE$"] = ["[{}:0]".format(in_width - 1)] - self.code_gen_dict["$OUT_RANGE$"] = [ - "[{}:0]".format(self.get_outstream_width_padded() - 1) - ] - # make weight stream width a multiple of 8 for AXI stream interface - weight_width = self.get_weightstream_width_padded() - self.code_gen_dict["$WEIGHT_RANGE$"] = ["[{}:0]".format(weight_width - 1)] - self.code_gen_dict["$WEIGHT_WIDTH$"] = [str(weight_width)] - self.code_gen_dict["$WSTREAM_DEPTH$"] = [str(self.calc_wmem())] - self.code_gen_dict["$MEM_DEPTH$"] = [str(self.calc_wmem())] - self.code_gen_dict["$RAM_STYLE$"] = [self.get_nodeattr("ram_style")] - - template = self.decoupled_wrapper - - for key in self.code_gen_dict: - # transform list into long string separated by '\n' - code_gen_line = "\n".join(self.code_gen_dict[key]) - template = template.replace(key, code_gen_line) - code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") - f = open( - os.path.join( - code_gen_dir, "{}_memstream.v".format(self.onnx_node.name) - ), - "w", + node_name = self.onnx_node.name + # create a hierarchy for this layer, with the same port names + clk_name = self.get_verilog_top_module_intf_names()["clk"][0] + rst_name = self.get_verilog_top_module_intf_names()["rst"][0] + dout_name = self.get_verilog_top_module_intf_names()["m_axis"][0] + din_name = self.get_verilog_top_module_intf_names()["s_axis"][0] + cmd.append("create_bd_cell -type hier %s" % node_name) + cmd.append("create_bd_pin -dir I -type clk /%s/%s" % (node_name, clk_name)) + cmd.append("create_bd_pin -dir I -type rst /%s/%s" % (node_name, rst_name)) + cmd.append( + "create_bd_intf_pin -mode Master " + "-vlnv xilinx.com:interface:axis_rtl:1.0 /%s/%s" + % (node_name, dout_name) ) - f.write(template) - f.close() - self.code_gen_dict.clear() - - def ipgen_singlenode_code(self): - # generate ip block of MVAU/FCLayer unit for all mem modes - super().ipgen_singlenode_code() - - mem_mode = self.get_nodeattr("mem_mode") - if mem_mode == "decoupled": - # copy necessary verilog and .dat files - # into verilog folder in code generation folder - code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") - verilog_folder = "{}/project_{}/sol1/impl/verilog/".format( - code_gen_dir, self.onnx_node.name + cmd.append( + "create_bd_intf_pin -mode Slave " + "-vlnv xilinx.com:interface:axis_rtl:1.0 /%s/%s" % (node_name, din_name) ) - # copy memstream components from finn-rtllib - memstream_dir = "/workspace/finn/finn-rtllib/memstream/hdl/" - for file in os.listdir(memstream_dir): - if file.endswith(".v"): - verilog_file = os.path.join(memstream_dir, file) - copy(verilog_file, verilog_folder) - # copy .dat files of weights - for file in os.listdir(code_gen_dir): - if file.endswith(".dat"): - dat_file = os.path.join(code_gen_dir, file) - copy(dat_file, verilog_folder) - # copy verilog wrapper - verilog_wrapper = "{}/{}_memstream.v".format( - code_gen_dir, self.onnx_node.name + # instantiate the hls ip + cmd.append( + "create_bd_cell -type ip -vlnv %s /%s/%s" + % (self.get_nodeattr("ip_vlnv"), node_name, node_name) ) - copy(verilog_wrapper, verilog_folder) - # prepare the IP packaging tcl template - template = templates.ip_package_tcl - self.code_gen_dict["$TOPNAME$"] = [ - "{}_memstream".format(self.onnx_node.name) - ] - self.code_gen_dict["$VERILOG_DIR$"] = [verilog_folder] - for key in self.code_gen_dict: - # transform list into long string separated by '\n' - code_gen_line = "\n".join(self.code_gen_dict[key]) - template = template.replace(key, code_gen_line) - code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") - f = open(os.path.join(verilog_folder, "package_ip.tcl"), "w") - f.write(template) - f.close() - # create a shell script and call Vivado to invoke the IP pkg script - make_project_sh = verilog_folder + "/make_ip.sh" - working_dir = os.environ["PWD"] - with open(make_project_sh, "w") as f: - f.write("#!/bin/bash \n") - f.write("cd {}\n".format(verilog_folder)) - f.write("vivado -mode batch -source package_ip.tcl\n") - f.write("cd {}\n".format(working_dir)) - bash_command = ["bash", make_project_sh] - process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE) - process_compile.communicate() - # re-set ip_path to point to the new packaged IP - self.set_nodeattr("ip_path", verilog_folder) - vlnv = "xilinx.com:hls:%s:1.0" % ( - "{}_memstream".format(self.onnx_node.name) + # instantiate a streamer and connect it to the HLS IP + strm_vlnv = "xilinx.com:user:memstream:1.0" + strm_inst = node_name + "_wstrm" + cmd.append( + "create_bd_cell -type ip -vlnv %s /%s/%s" + % (strm_vlnv, node_name, strm_inst) + ) + cmd.append( + "set_property -dict [list " + "CONFIG.NSTREAMS {1} " + "CONFIG.MEM_DEPTH {%d} " + "CONFIG.MEM_WIDTH {%d} " + "CONFIG.MEM_INIT {%s} " + "CONFIG.RAM_STYLE {%s} " + "CONFIG.STRM0_DEPTH {%d} " + "CONFIG.STRM0_WIDTH {%d} " + "CONFIG.STRM0_OFFSET {0} " + "] [get_bd_cells /%s/%s]" + % ( + self.calc_wmem(), + self.get_weightstream_width_padded(), + self.get_nodeattr("code_gen_dir_ipgen") + "/", + self.get_nodeattr("ram_style"), + self.calc_wmem(), + self.get_weightstream_width_padded(), + node_name, + strm_inst, + ) + ) + cmd.append( + "connect_bd_intf_net [get_bd_intf_pins %s/%s/m_axis_0] " + "[get_bd_intf_pins %s/%s/weights_V_V]" + % (node_name, strm_inst, node_name, node_name) + ) + cmd.append( + "connect_bd_net [get_bd_pins %s/%s] [get_bd_pins %s/%s/aresetn]" + % (node_name, rst_name, node_name, strm_inst) + ) + cmd.append( + "connect_bd_net [get_bd_pins %s/%s] [get_bd_pins %s/%s/aclk]" + % (node_name, clk_name, node_name, strm_inst) + ) + cmd.append( + "connect_bd_net [get_bd_pins %s/%s] [get_bd_pins %s/%s/%s]" + % (node_name, rst_name, node_name, node_name, rst_name) + ) + cmd.append( + "connect_bd_net [get_bd_pins %s/%s] [get_bd_pins %s/%s/%s]" + % (node_name, clk_name, node_name, node_name, clk_name) + ) + cmd.append( + "connect_bd_intf_net [get_bd_intf_pins %s/%s] " + "[get_bd_intf_pins %s/%s/%s]" + % (node_name, din_name, node_name, node_name, din_name) + ) + cmd.append( + "connect_bd_intf_net [get_bd_intf_pins %s/%s] " + "[get_bd_intf_pins %s/%s/%s]" + % (node_name, dout_name, node_name, node_name, dout_name) ) - self.set_nodeattr("ip_vlnv", vlnv) - self.code_gen_dict.clear() + cmd.append("save_bd_design") + return cmd def get_verilog_top_module_intf_names(self): intf_names = super().get_verilog_top_module_intf_names() diff --git a/src/finn/custom_op/fpgadataflow/streamingfifo.py b/src/finn/custom_op/fpgadataflow/streamingfifo.py index 1f734b548f923341687843c538d1887fcc069bee..c9011b50d06a55c34bdd49c8ea374bdf81ea5f4f 100644 --- a/src/finn/custom_op/fpgadataflow/streamingfifo.py +++ b/src/finn/custom_op/fpgadataflow/streamingfifo.py @@ -51,6 +51,16 @@ class StreamingFIFO(HLSCustomOp): "folded_shape": ("ints", True, []), # FINN DataTypes for inputs/outputs "dataType": ("s", True, ""), + # Toggle between hls or IPI implementation + # rtl - use the hls generated IP during stitching + # vivado - use the AXI Infrastructure FIFO + "impl_style": ("s", False, "rtl"), + # FPGA resource type for FIFOs when impl_style is vivado + # auto -- let Vivado decide + # block -- use BRAM + # distributed -- use LUTRAM + # ultra -- use URAM (on UltraScale+) + "ram_style": ("s", False, "auto"), } my_attrs.update(super().get_nodeattr_types()) @@ -306,3 +316,71 @@ class StreamingFIFO(HLSCustomOp): def pragmas(self): pass + + def code_generation_ipi(self): + impl_style = self.get_nodeattr("impl_style") + if impl_style == "rtl": + return super().code_generation_ipi() + elif impl_style == "vivado": + cmd = [] + node_name = self.onnx_node.name + depth = self.get_nodeattr("depth") + ram_style = self.get_nodeattr("ram_style") + # create a hierarchy for this layer, with the same port names + clk_name = self.get_verilog_top_module_intf_names()["clk"][0] + rst_name = self.get_verilog_top_module_intf_names()["rst"][0] + dout_name = self.get_verilog_top_module_intf_names()["m_axis"][0] + din_name = self.get_verilog_top_module_intf_names()["s_axis"][0] + cmd.append("create_bd_cell -type hier %s" % node_name) + cmd.append("create_bd_pin -dir I -type clk /%s/%s" % (node_name, clk_name)) + cmd.append("create_bd_pin -dir I -type rst /%s/%s" % (node_name, rst_name)) + cmd.append( + "create_bd_intf_pin -mode Master " + "-vlnv xilinx.com:interface:axis_rtl:1.0 /%s/%s" + % (node_name, dout_name) + ) + cmd.append( + "create_bd_intf_pin -mode Slave " + "-vlnv xilinx.com:interface:axis_rtl:1.0 /%s/%s" % (node_name, din_name) + ) + # instantiate and configure DWC + cmd.append( + "create_bd_cell -type ip " + "-vlnv xilinx.com:ip:axis_data_fifo:2.0 /%s/fifo" % node_name + ) + cmd.append( + "set_property -dict [list CONFIG.FIFO_DEPTH {%d}] " + "[get_bd_cells /%s/fifo]" % (depth, node_name) + ) + cmd.append( + "set_property -dict [list CONFIG.FIFO_MEMORY_TYPE {%s}] " + "[get_bd_cells /%s/fifo]" % (ram_style, node_name) + ) + cmd.append( + "set_property -dict [list CONFIG.TDATA_NUM_BYTES {%d}] " + "[get_bd_cells /%s/fifo]" + % (np.ceil(self.get_outstream_width() / 8), node_name) + ) + cmd.append( + "connect_bd_intf_net [get_bd_intf_pins %s/fifo/M_AXIS] " + "[get_bd_intf_pins %s/%s]" % (node_name, node_name, dout_name) + ) + cmd.append( + "connect_bd_intf_net [get_bd_intf_pins %s/fifo/S_AXIS] " + "[get_bd_intf_pins %s/%s]" % (node_name, node_name, din_name) + ) + cmd.append( + "connect_bd_net [get_bd_pins %s/%s] " + "[get_bd_pins %s/fifo/s_axis_aresetn]" + % (node_name, rst_name, node_name) + ) + cmd.append( + "connect_bd_net [get_bd_pins %s/%s] " + "[get_bd_pins %s/fifo/s_axis_aclk]" % (node_name, clk_name, node_name) + ) + return cmd + else: + raise Exception( + "FIFO implementation style %s not supported, please use rtl or vivado" + % impl_style + ) diff --git a/src/finn/transformation/fpgadataflow/create_stitched_ip.py b/src/finn/transformation/fpgadataflow/create_stitched_ip.py index 90b4b6c47e6e353c1b606d6918eb271e9c0619c5..6e9ce35634760c06c7a409fb5befdb94a08e9c7d 100644 --- a/src/finn/transformation/fpgadataflow/create_stitched_ip.py +++ b/src/finn/transformation/fpgadataflow/create_stitched_ip.py @@ -182,6 +182,8 @@ class CreateStitchedIP(Transformation): def apply(self, model): ip_dirs = ["list"] + # add RTL streamer IP + ip_dirs.append("/workspace/finn/finn-rtllib/memstream") # ensure that all nodes are fpgadataflow, and that IPs are generated for node in model.graph.node: assert node.domain == "finn", 'Node domain is not set to "finn"' @@ -196,10 +198,7 @@ class CreateStitchedIP(Transformation): ip_dir_value = node_inst.get_nodeattr("ip_path") assert os.path.isdir(ip_dir_value), "IP generation directory doesn't exist." ip_dirs += [ip_dir_value] - vlnv = node_inst.get_nodeattr("ip_vlnv") - inst_name = node.name - create_cmd = "create_bd_cell -type ip -vlnv %s %s" % (vlnv, inst_name) - self.create_cmds += [create_cmd] + self.create_cmds += node_inst.code_generation_ipi() my_producer = model.find_producer(node.input[0]) self.connect_clk_rst(node) self.connect_axi(node) @@ -223,6 +222,7 @@ class CreateStitchedIP(Transformation): # find index of producer output connected to our target input # get names of hdl interfaces for input and producer output # issue a TCL directive to connect input to output + # if FC layer with mode "decoupled", add a streamer on input 1 for i in range(len(node.input)): producer = model.find_producer(node.input[i]) if producer is None: