diff --git a/src/finn/custom_op/fpgadataflow/__init__.py b/src/finn/custom_op/fpgadataflow/__init__.py index d093a410e5b30a039688ec1a5264e59b92edfd8a..a0c10f08c017db78c8aff284a7e07fa1c26d466e 100644 --- a/src/finn/custom_op/fpgadataflow/__init__.py +++ b/src/finn/custom_op/fpgadataflow/__init__.py @@ -118,8 +118,8 @@ class HLSCustomOp(CustomOp): intf_names = {} intf_names["clk"] = ["ap_clk"] intf_names["rst"] = ["ap_rst_n"] - intf_names["s_axis"] = ["in0_V_V"] - intf_names["m_axis"] = ["out_V_V"] + intf_names["s_axis"] = [("in0_V_V", self.get_instream_width_padded())] + intf_names["m_axis"] = [("out_V_V", self.get_outstream_width_padded())] intf_names["aximm"] = [] intf_names["axilite"] = [] return intf_names diff --git a/src/finn/custom_op/fpgadataflow/addstreams_batch.py b/src/finn/custom_op/fpgadataflow/addstreams_batch.py index 14fb65739dab4208edd0c61bb7ca8ae2d114baab..593f9f4fdf574aa2a2b4e70de5fe6ece2ce2085d 100644 --- a/src/finn/custom_op/fpgadataflow/addstreams_batch.py +++ b/src/finn/custom_op/fpgadataflow/addstreams_batch.py @@ -63,7 +63,7 @@ class AddStreams_Batch(HLSCustomOp): ishape = tuple(vecs + [ich]) return ishape - def get_folded_input_shape(self): + def get_folded_input_shape(self, ind=0): ich = self.get_nodeattr("NumChannels") pe = self.get_nodeattr("PE") assert ich % pe == 0, "PE must divide NumChannels" @@ -363,5 +363,5 @@ class AddStreams_Batch(HLSCustomOp): def get_verilog_top_module_intf_names(self): intf_names = super().get_verilog_top_module_intf_names() - intf_names["s_axis"] = ["in0_V_V", "in1_V_V"] + intf_names["s_axis"].append(("in1_V_V", self.get_instream_width_padded())) return intf_names diff --git a/src/finn/custom_op/fpgadataflow/duplicatestreams_batch.py b/src/finn/custom_op/fpgadataflow/duplicatestreams_batch.py index 044cfddaab51a5f9bf7aa25e9123247b10de8529..603fef78df561b301ffd20725febdc35daa78f6f 100644 --- a/src/finn/custom_op/fpgadataflow/duplicatestreams_batch.py +++ b/src/finn/custom_op/fpgadataflow/duplicatestreams_batch.py @@ -309,7 +309,8 @@ class DuplicateStreams_Batch(HLSCustomOp): def docompute(self): self.code_gen_dict["$DOCOMPUTE$"] = [ """DuplicateStreams_Batch<{}, {}> (in0, out0, out1, 1);""".format( - self.get_outstream_width(), self.get_number_output_values() // 2, + self.get_outstream_width(), + self.get_number_output_values() // 2, ) ] @@ -375,5 +376,8 @@ class DuplicateStreams_Batch(HLSCustomOp): def get_verilog_top_module_intf_names(self): intf_names = super().get_verilog_top_module_intf_names() - intf_names["m_axis"] = ["out0_V_V", "out1_V_V"] + intf_names["m_axis"] = [ + ("out0_V_V", self.get_outstream_width_padded()), + ("out1_V_V", self.get_outstream_width_padded()), + ] return intf_names diff --git a/src/finn/custom_op/fpgadataflow/iodma.py b/src/finn/custom_op/fpgadataflow/iodma.py index 7d0374445d816f1e8d49ed92cf7aa67b024f9ac1..67af0c5cb409c6deea9bacf247f803d119aa1b17 100644 --- a/src/finn/custom_op/fpgadataflow/iodma.py +++ b/src/finn/custom_op/fpgadataflow/iodma.py @@ -350,11 +350,9 @@ class IODMA(HLSCustomOp): def get_verilog_top_module_intf_names(self): intf_names = super().get_verilog_top_module_intf_names() if self.get_nodeattr("direction") == "out": - intf_names["s_axis"] = ["in0_V_V"] intf_names["m_axis"] = [] else: intf_names["s_axis"] = [] - intf_names["m_axis"] = ["out_V_V"] intf_names["axilite"] = ["s_axi_control"] - intf_names["aximm"] = ["m_axi_gmem"] + intf_names["aximm"] = [("m_axi_gmem", self.get_nodeattr("intfWidth"))] return intf_names diff --git a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py index 37c6ad4894a1a82878f68c92501844d7fd45d353..3b557d084797432e7551a1e6c83d5f772bf7ccd0 100644 --- a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py +++ b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py @@ -319,12 +319,24 @@ class StreamingFCLayer_Batch(HLSCustomOp): weightstream = self.get_weightstream_width() return max([weightstream, temp_value]) - def get_folded_input_shape(self): + def get_folded_input_shape(self, ind=0): mw = self.get_nodeattr("MW") + mh = self.get_nodeattr("MH") simd = self.get_nodeattr("SIMD") + pe = self.get_nodeattr("PE") sf = mw // simd + nf = mh // pe vecs = list(self.get_nodeattr("numInputVectors")) - folded_input_shape = tuple(vecs + [sf, simd]) + + if ind == 0: + # calculate shape of input 0 + folded_input_shape = tuple(vecs + [sf, simd]) + elif ind == 1 and self.get_nodeattr("mem_mode") == "external": + # calculate shape of input 1 (weights) + folded_input_shape = tuple(vecs + [sf * nf, simd * pe]) + else: + raise Exception("Undefined input shape for requested input") + return folded_input_shape def get_folded_output_shape(self): @@ -1046,8 +1058,8 @@ class StreamingFCLayer_Batch(HLSCustomOp): # create a hierarchy for this layer, with the same port names clk_name = self.get_verilog_top_module_intf_names()["clk"][0] rst_name = self.get_verilog_top_module_intf_names()["rst"][0] - dout_name = self.get_verilog_top_module_intf_names()["m_axis"][0] - din_name = self.get_verilog_top_module_intf_names()["s_axis"][0] + dout_name = self.get_verilog_top_module_intf_names()["m_axis"][0][0] + din_name = self.get_verilog_top_module_intf_names()["s_axis"][0][0] cmd.append("create_bd_cell -type hier %s" % node_name) cmd.append("create_bd_pin -dir I -type clk /%s/%s" % (node_name, clk_name)) cmd.append("create_bd_pin -dir I -type rst /%s/%s" % (node_name, rst_name)) @@ -1126,8 +1138,8 @@ class StreamingFCLayer_Batch(HLSCustomOp): % (node_name, dout_name, node_name, node_name, dout_name) ) cmd.append("save_bd_design") - elif mem_mode == "const": - # base class impl sufficient for const mode + elif mem_mode == "const" or mem_mode == "external": + # base class impl sufficient for const/external modes return super().code_generation_ipi() else: raise Exception("Unrecognized mem_mode for StreamingFCLayer") @@ -1137,5 +1149,7 @@ class StreamingFCLayer_Batch(HLSCustomOp): intf_names = super().get_verilog_top_module_intf_names() mem_mode = self.get_nodeattr("mem_mode") if mem_mode == "external": - intf_names["s_axis"] = ["in0_V_V", "weights_V_V"] + intf_names["s_axis"].append( + ("weights_V_V", self.get_weightstream_width_padded()) + ) return intf_names diff --git a/src/finn/transformation/fpgadataflow/create_stitched_ip.py b/src/finn/transformation/fpgadataflow/create_stitched_ip.py index 3470e9525d590303eca1f8700fe4b79c4e03d38f..2bcb4a89a4610c64c53947fdb7e8093a2d050821 100644 --- a/src/finn/transformation/fpgadataflow/create_stitched_ip.py +++ b/src/finn/transformation/fpgadataflow/create_stitched_ip.py @@ -29,6 +29,7 @@ import os import warnings import subprocess +import json from finn.transformation.base import Transformation from finn.util.basic import get_by_name, make_build_dir @@ -40,6 +41,31 @@ from finn.transformation.fpgadataflow.replace_verilog_relpaths import ( ) +def is_external_input(model, node, i): + # indicate whether input i of node should be made external + # True only if input is unconnected and has no initializer + # Only esception is second input of FC layers when mem_mode is external + node_inst = getCustomOp(node) + producer = model.find_producer(node.input[i]) + if producer is None: + if model.get_initializer(node.input[i]) is None: + return True + else: + if node.op_type == "StreamingFCLayer_Batch": + if node_inst.get_nodeattr("mem_mode") == "external": + return True + return False + + +def is_external_output(model, node, i): + # indicate whether output i of node should be made external + # True only if output is unconnected + consumers = model.find_consumers(node.output[i]) + if consumers is None: + return True + return False + + class CreateStitchedIP(Transformation): """Create a Vivado IP Block Design project from all the generated IPs of a graph. All nodes in the graph must have the fpgadataflow backend attribute, @@ -138,21 +164,24 @@ class CreateStitchedIP(Transformation): if len(aximm_intf_name) != 0: self.connect_cmds.append( "make_bd_intf_pins_external [get_bd_intf_pins %s/%s]" - % (inst_name, aximm_intf_name[0]) + % (inst_name, aximm_intf_name[0][0]) ) self.connect_cmds.append( "set_property name m_axi_gmem0 [get_bd_intf_ports m_axi_gmem_0]" ) - self.intf_names["aximm"] = ["m_axi_gmem0"] + self.intf_names["aximm"] = [("m_axi_gmem0", aximm_intf_name[0][1])] assert self.has_aximm is False, "Currently limited to one AXI-MM interface" self.has_aximm = True - def connect_m_axis_external(self, node): + def connect_m_axis_external(self, node, idx=None): inst_name = node.name node_inst = getCustomOp(node) output_intf_names = node_inst.get_verilog_top_module_intf_names()["m_axis"] # make output axis external - for output_intf_name in output_intf_names: + for i in range(len(output_intf_names)): + if idx is not None and idx != i: + continue + output_intf_name = output_intf_names[i][0] self.connect_cmds.append( "make_bd_intf_pins_external [get_bd_intf_pins %s/%s]" % (inst_name, output_intf_name) @@ -162,15 +191,20 @@ class CreateStitchedIP(Transformation): % (self.m_axis_idx, output_intf_name) ) self.has_m_axis = True - self.intf_names["m_axis"].append("m_axis_%d" % self.m_axis_idx) + self.intf_names["m_axis"].append( + ("m_axis_%d" % self.m_axis_idx, output_intf_names[i][1]) + ) self.m_axis_idx += 1 - def connect_s_axis_external(self, node): + def connect_s_axis_external(self, node, idx=None): inst_name = node.name node_inst = getCustomOp(node) input_intf_names = node_inst.get_verilog_top_module_intf_names()["s_axis"] # make input axis external - for input_intf_name in input_intf_names: + for i in range(len(input_intf_names)): + if idx is not None and idx != i: + continue + input_intf_name = input_intf_names[i][0] self.connect_cmds.append( "make_bd_intf_pins_external [get_bd_intf_pins %s/%s]" % (inst_name, input_intf_name) @@ -180,7 +214,9 @@ class CreateStitchedIP(Transformation): % (self.s_axis_idx, input_intf_name) ) self.has_s_axis = True - self.intf_names["s_axis"].append("s_axis_%d" % self.s_axis_idx) + self.intf_names["s_axis"].append( + ("s_axis_%d" % self.s_axis_idx, input_intf_names[i][1]) + ) self.s_axis_idx += 1 def apply(self, model): @@ -204,57 +240,30 @@ class CreateStitchedIP(Transformation): assert os.path.isdir(ip_dir_value), "IP generation directory doesn't exist." ip_dirs += [ip_dir_value] self.create_cmds += node_inst.code_generation_ipi() - my_producer = model.find_producer(node.input[0]) self.connect_clk_rst(node) self.connect_axi(node) - if my_producer is None: - # first node in graph - self.connect_s_axis_external(node) - if node.op_type == "TLastMarker": - assert ( - node_inst.get_nodeattr("Direction") == "in" - ), """Output TLastMarker incorrect direction""" - elif node.op_type == "IODMA" and len(model.graph.node) != 1: - # don't apply this check for a 1-node partition - assert ( - node_inst.get_nodeattr("direction") == "in" - ), """Input DMA incorrect direction""" - else: - # intermediate node - # wire up input(s) to previous node output(s) - # foreach input - # find producer - # find index of producer output connected to our target input - # get names of hdl interfaces for input and producer output - # issue a TCL directive to connect input to output - # if FC layer with mode "decoupled", add a streamer on input 1 - for i in range(len(node.input)): + for i in range(len(node.input)): + if is_external_input(model, node, i): + self.connect_s_axis_external(node, idx=i) + else: producer = model.find_producer(node.input[i]) if producer is None: continue j = list(producer.output).index(node.input[i]) src_intf_name = getCustomOp( producer - ).get_verilog_top_module_intf_names()["m_axis"][j] + ).get_verilog_top_module_intf_names()["m_axis"][j][0] dst_intf_name = node_inst.get_verilog_top_module_intf_names()[ "s_axis" - ][i] + ][i][0] self.connect_cmds.append( "connect_bd_intf_net [get_bd_intf_pins %s/%s] " "[get_bd_intf_pins %s/%s]" % (producer.name, src_intf_name, node.name, dst_intf_name) ) - if model.find_consumers(node.output[0]) is None: - # last node in graph - self.connect_m_axis_external(node) - if node.op_type == "TLastMarker": - assert ( - node_inst.get_nodeattr("Direction") == "out" - ), """Output TLastMarker incorrect direction""" - elif node.op_type == "IODMA" and len(model.graph.node) != 1: - assert ( - node_inst.get_nodeattr("direction") == "out" - ), """Output DMA incorrect direction""" + for i in range(len(node.output)): + if is_external_output(model, node, i): + self.connect_m_axis_external(node, idx=i) # create a temporary folder for the project prjname = "finn_vivado_stitch_proj" @@ -319,7 +328,7 @@ class CreateStitchedIP(Transformation): block_library = "finn" block_vlnv = "%s:%s:%s:1.0" % (block_vendor, block_library, block_name) model.set_metadata_prop("vivado_stitch_vlnv", block_vlnv) - model.set_metadata_prop("vivado_stitch_ifnames", str(self.intf_names)) + model.set_metadata_prop("vivado_stitch_ifnames", json.dumps(self.intf_names)) tcl.append( ( "ipx::package_project -root_dir %s/ip -vendor %s " diff --git a/src/finn/transformation/fpgadataflow/insert_iodma.py b/src/finn/transformation/fpgadataflow/insert_iodma.py index feaa534e1e9d2fb527293a617cc622a5f71c24cb..603d828a532b8afa4ec364dd0487a200608719ee 100644 --- a/src/finn/transformation/fpgadataflow/insert_iodma.py +++ b/src/finn/transformation/fpgadataflow/insert_iodma.py @@ -48,6 +48,39 @@ class InsertIODMA(Transformation): ), "max_intfwidth must be a power of 2" self.max_intfwidth = max_intfwidth + def get_mem_init(self, weights, pe, simd): + """ + Returns matrix ready for pack_innermost_dim_as_hex_string with + reverse=False (finn.util.data_packing) to return the memory init file + little endian packed. + That is, get_mem_init returns: + elem(pe,simd) + addr = 0: [(pe-1,simd-1),(pe-1,simd-2),...(0,1),(0,0)] + addr = 1: [(pe-1,simd*2-1),.......(0,simd+1),(0,simd)] + . + """ + w_shape = weights.shape + assert len(w_shape) == 2, "weights withincorrect number of dims" + inp_w, out_w = w_shape + + assert out_w % pe == 0, "Malformed weight matrix" + assert inp_w % simd == 0, "Malformed weight matrix" + reshaped_w = np.zeros(inp_w * out_w).reshape(-1, pe * simd) + + addr = 0 + for fr in range(out_w // pe): + for fc in range(inp_w // simd): + tile = weights[ + (fc * simd) : ((fc + 1) * simd), (fr * pe) : ((fr + 1) * pe) + ] + for p in range(pe): + reshaped_w[addr, (p * simd) : ((p + 1) * simd)] = tile[ + :, p + ].transpose() + addr += 1 + reshaped_w = np.flip(reshaped_w, axis=-1) + return reshaped_w + def apply(self, model): # only makes sense for a pure fpgadataflow graph -- so we check! all_nodes = list(model.graph.node) @@ -171,21 +204,24 @@ class InsertIODMA(Transformation): # calculate width of stream output from DMA pe = get_by_name(fc_node.attribute, "PE").i simd = get_by_name(fc_node.attribute, "SIMD").i - assert pe * simd == w_shape[0], "Malformed weight matrix" streamWidth = simd * pe * w_dtype.bitwidth() # make new buffer + W = model.get_initializer(fc_w_name) + iodma_mem = self.get_mem_init(W, pe, simd) + model.set_initializer(fc_w_name, iodma_mem) + fc_node_in = oh.make_tensor_value_info( - model.make_new_valueinfo_name(), TensorProto.FLOAT, w_shape + model.make_new_valueinfo_name(), TensorProto.FLOAT, iodma_mem.shape ) model.graph.value_info.append(fc_node_in) model.set_tensor_datatype(fc_node_in.name, w_dtype) - model.set_initializer(fc_node_in.name, model.get_initializer(fc_w_name)) + model.set_initializer(fc_node_in.name, W) dma_node = oh.make_node( "IODMA", [fc_w_name], [fc_node_in.name], - numInputVectors=[w_shape[1]], - NumChannels=w_shape[0], + numInputVectors=[iodma_mem.shape[0]], + NumChannels=pe * simd, dataType=str(w_dtype.name), intfWidth=intfwidth, streamWidth=streamWidth, diff --git a/src/finn/transformation/fpgadataflow/vitis_build.py b/src/finn/transformation/fpgadataflow/vitis_build.py index b6fc62f57f539d69c2d2a0cfa26fb4574f1d7747..e4da0d631b8f8bb1cc21799bba00c454eba528ae 100644 --- a/src/finn/transformation/fpgadataflow/vitis_build.py +++ b/src/finn/transformation/fpgadataflow/vitis_build.py @@ -28,6 +28,7 @@ import os import subprocess +import json from finn.core.modelwrapper import ModelWrapper from finn.transformation.base import Transformation @@ -38,14 +39,17 @@ from finn.transformation.fpgadataflow.create_dataflow_partition import ( ) from finn.transformation.fpgadataflow.insert_dwc import InsertDWC from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO -from finn.transformation.fpgadataflow.insert_tlastmarker import InsertTLastMarker from finn.transformation.fpgadataflow.insert_iodma import InsertIODMA from finn.transformation.fpgadataflow.prepare_ip import PrepareIP from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP from finn.transformation.fpgadataflow.floorplan import Floorplan from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver -from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames +from finn.transformation.general import ( + GiveReadableTensorNames, + GiveUniqueNodeNames, + RemoveUnusedTensors, +) from finn.util.basic import make_build_dir from finn.transformation.infer_data_layouts import InferDataLayouts from . import templates @@ -89,51 +93,40 @@ class CreateVitisXO(Transformation): _check_vitis_envvars() vivado_proj_dir = model.get_metadata_prop("vivado_stitch_proj") stitched_ip_dir = vivado_proj_dir + "/ip" + interfaces = json.loads(model.get_metadata_prop("vivado_stitch_ifnames")) args_string = [] - m_axis_idx = 0 - s_axis_idx = 0 + arg_id = 0 # NOTE: this assumes the graph is Vitis-compatible: max one axi lite interface # developed from instructions in UG1393 (v2019.2) and package_xo documentation # package_xo is responsible for generating the kernel xml - for node in model.graph.node: - node_inst = getCustomOp(node) - arg_id = 0 - if node.op_type == "TLastMarker": - stream_width = node_inst.get_nodeattr("StreamWidth") - # add a stream input or output port, based on direction - if node_inst.get_nodeattr("Direction") == "in": - args_string.append( - "{in:4:%s:s_axis_%d:0x0:0x0:ap_uint<%s>:0}" - % (str(arg_id), s_axis_idx, str(stream_width)) - ) - s_axis_idx += 1 - else: - args_string.append( - "{out:4:%s:m_axis_%d:0x0:0x0:ap_uint<%s>:0}" - % (str(arg_id), m_axis_idx, str(stream_width)) + if len(interfaces["axilite"]) > 0: + if len(interfaces["aximm"]) > 0: + args_string.append( + "{addr:1:%s:%s:0x8:0x10:ap_uint<%s>*:0}" + % ( + str(arg_id), + interfaces["aximm"][0][0], + str(interfaces["aximm"][0][1]), ) - m_axis_idx += 1 + ) arg_id += 1 - # add a axilite port if dynamic - # add a count parameter if dynamic - if node_inst.get_nodeattr("DynIters") == 1: - args_string.append( - "{numReps:0:%s:s_axi_control:0x4:0x10:uint:0}" % str(arg_id) - ) - arg_id += 1 - elif node.op_type == "IODMA": - port_width = node_inst.get_nodeattr("intfWidth") - # add an address parameter - # add a count parameter args_string.append( - "{addr:1:%s:m_axi_gmem0:0x8:0x10:ap_uint<%s>*:0}" - % (str(arg_id), str(port_width)) + "{numReps:0:%s:s_axi_control:0x4:0x1C:uint:0}" % str(arg_id) ) arg_id += 1 + else: args_string.append( - "{numReps:0:%s:s_axi_control:0x4:0x1C:uint:0}" % str(arg_id) + "{numReps:0:%s:s_axi_control:0x4:0x10:uint:0}" % str(arg_id) ) arg_id += 1 + for intf in interfaces["s_axis"] + interfaces["m_axis"]: + stream_width = intf[1] + stream_name = intf[0] + args_string.append( + "{%s:4:%s:%s:0x0:0x0:ap_uint<%s>:0}" + % (stream_name, str(arg_id), stream_name, str(stream_width)) + ) + arg_id += 1 # save kernel xml then run package_xo xo_name = self.ip_name + ".xo" @@ -175,8 +168,11 @@ class VitisLink(Transformation): """ def __init__( - self, platform, f_mhz=200, strategy=VitisOptStrategy.PERFORMANCE, - enable_debug=False + self, + platform, + f_mhz=200, + strategy=VitisOptStrategy.PERFORMANCE, + enable_debug=False, ): super().__init__() self.platform = platform @@ -316,9 +312,12 @@ class VitisBuild(Transformation): """Best-effort attempt at building the accelerator with Vitis.""" def __init__( - self, fpga_part, period_ns, platform, + self, + fpga_part, + period_ns, + platform, strategy=VitisOptStrategy.PERFORMANCE, - enable_debug=False + enable_debug=False, ): super().__init__() self.fpga_part = fpga_part @@ -350,9 +349,7 @@ class VitisBuild(Transformation): dataflow_model_filename = sdp_node.get_nodeattr("model") kernel_model = ModelWrapper(dataflow_model_filename) kernel_model = kernel_model.transform(InsertFIFO()) - kernel_model = kernel_model.transform( - InsertTLastMarker(both=True, external=False, dynamic=False) - ) + kernel_model = kernel_model.transform(RemoveUnusedTensors()) kernel_model = kernel_model.transform(GiveUniqueNodeNames()) kernel_model.save(dataflow_model_filename) kernel_model = kernel_model.transform( @@ -372,8 +369,10 @@ class VitisBuild(Transformation): # Assemble design from kernels model = model.transform( VitisLink( - self.platform, round(1000 / self.period_ns), strategy=self.strategy, - enable_debug=self.enable_debug + self.platform, + round(1000 / self.period_ns), + strategy=self.strategy, + enable_debug=self.enable_debug, ) ) # set platform attribute for correct remote execution diff --git a/tests/end2end/test_end2end_bnn_pynq.py b/tests/end2end/test_end2end_bnn_pynq.py index 7a428b8592e0e67dd8561f1425482a006a79479a..88833a65b4dbd88e1bdc807515eeda538104fc39 100644 --- a/tests/end2end/test_end2end_bnn_pynq.py +++ b/tests/end2end/test_end2end_bnn_pynq.py @@ -560,6 +560,39 @@ class TestEnd2End: update_dashboard_data(topology, wbits, abits, "board", cfg["board"]) model.save(get_checkpoint_name(topology, wbits, abits, "build_" + kind)) + @pytest.mark.slow + @pytest.mark.vivado + @pytest.mark.vitis + @pytest.mark.parametrize("kind", ["zynq", "alveo"]) + def test_build_extweights(self, topology, wbits, abits, kind): + if "VITIS_PATH" not in os.environ: + pytest.skip("VITIS_PATH not set") + prev_chkpt_name = get_checkpoint_name( + topology, wbits, abits, "fifodepth_" + kind + ) + model = load_test_checkpoint_or_skip(prev_chkpt_name) + # select some FC layers, erase their implementation + # and set them to external weights + num_extw_layers = 0 + for node in model.graph.node: + if node.op_type == "StreamingFCLayer_Batch": + node_inst = getCustomOp(node) + simd = node_inst.get_nodeattr("SIMD") + pe = node_inst.get_nodeattr("PE") + # skip layers which require very large IODMA DWCs + if (512 % simd) != 0 or ((pe * simd) % 32) != 0: + continue + node_inst.set_nodeattr("code_gen_dir_ipgen", "") + node_inst.set_nodeattr("ipgen_path", "") + node_inst.set_nodeattr("mem_mode", "external") + num_extw_layers += 1 + if num_extw_layers == 0: + pytest.skip("No layers suitable for external weights") + # build + cfg = get_build_env(kind, target_clk_ns) + model = model.transform(cfg["build_fxn"]) + # check list of interfaces + @pytest.mark.parametrize("kind", ["zynq", "alveo"]) def test_deploy(self, topology, wbits, abits, kind): prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "build_" + kind)