diff --git a/src/finn/custom_op/streamingdataflowpartition.py b/src/finn/custom_op/streamingdataflowpartition.py index b63326d676f4ded5ec1dd62f5cc7f02d7acb82ad..bce4dde426b8838d6c86638a3641d51ab259a6db 100644 --- a/src/finn/custom_op/streamingdataflowpartition.py +++ b/src/finn/custom_op/streamingdataflowpartition.py @@ -83,7 +83,7 @@ class StreamingDataflowPartition(CustomOp): ) # verify the number of inputs - if len(self.onnx_node.input) == 1: + if len(self.onnx_node.input) >= 1: info_messages.append("The number of inputs is correct") else: info_messages.append("StreamingDataflowPartition needs 1 data input") diff --git a/src/finn/transformation/fpgadataflow/create_dataflow_partition.py b/src/finn/transformation/fpgadataflow/create_dataflow_partition.py index e0f990600d9ca4be748b662b47ce8296d3d462ce..7197e68be2fbdf5fc39b7ed202e88672614514ec 100644 --- a/src/finn/transformation/fpgadataflow/create_dataflow_partition.py +++ b/src/finn/transformation/fpgadataflow/create_dataflow_partition.py @@ -45,58 +45,89 @@ class CreateDataflowPartition(Transformation): super().__init__() def apply(self, model): - # TODO we currently assume that all dataflow nodes are connected to - # each other, forming a single partition. check the assumption and/or - # improve this. - all_nodes = list(model.graph.node) - df_nodes = filter( - lambda x: get_by_name(x.attribute, "backend") is not None, all_nodes - ) - df_nodes = filter( - lambda x: get_by_name(x.attribute, "backend").s.decode("UTF-8") - == "fpgadataflow", - df_nodes, - ) - df_nodes = list(df_nodes) - non_df_nodes = filter(lambda x: x not in df_nodes, all_nodes) - non_df_nodes = list(non_df_nodes) - - if len(df_nodes) == 0: - # no changes if no dataflow nodes are present - return (model, False) - else: - # partition the model into two models - df_model = copy.deepcopy(model) - non_df_model = model - # remove all non-dataflow nodes from the dataflow model - for node_to_remove in non_df_nodes: - df_model.graph.node.remove(node_to_remove) - # identify the entry and exit points for the dataflow part - df_in = df_model.graph.node[0].input[0] - df_out = df_model.graph.node[-1].output[0] - df_in_vi = df_model.get_tensor_valueinfo(df_in) - df_out_vi = df_model.get_tensor_valueinfo(df_out) - # set df graph in/out to be df_in/df_out - df_model.graph.input.remove(df_model.graph.input[0]) - df_model.graph.input.insert(0, df_in_vi) - df_model.graph.output.remove(df_model.graph.output[0]) - df_model.graph.output.insert(0, df_out_vi) - df_model_dir = make_build_dir("dataflow_partition_") - df_model_filename = df_model_dir + "/df_model.onnx" - df_model.save(df_model_filename) - # remove all dataflow nodes from the non-dataflow model - # keep track of where the dataflow part starts - df_start_ind = all_nodes.index(df_nodes[0]) - for node_to_remove in df_nodes: - non_df_model.graph.node.remove(node_to_remove) - # create StreamingDataflow node with df_in/df_out io - df_node = helper.make_node( - "StreamingDataflowPartition", - [df_in], - [df_out], - # use the model attribute to mark the df model - model=df_model_filename, + target_partition_id = 0 + # we currently assume that all dataflow nodes belonging to the same partition + # are connected to each other and there is a single input/output to/from each. + # NOTE: all dataflow nodes with no partition_id set are moved to partition 0 + # TODO: check the assumption and/or improve this. + while True: + all_nodes = list(model.graph.node) + df_nodes = filter( + lambda x: get_by_name(x.attribute, "backend") is not None, all_nodes + ) + df_nodes = filter( + lambda x: get_by_name(x.attribute, "backend").s.decode("UTF-8") + == "fpgadataflow" + and ( + get_by_name(x.attribute, "partition_id") is None + or get_by_name(x.attribute, "partition_id").i == target_partition_id + ) + and x.op_type != "StreamingDataflowPartition", + df_nodes, ) - non_df_model.graph.node.insert(df_start_ind, df_node) + df_nodes = list(df_nodes) + non_df_nodes = filter(lambda x: x not in df_nodes, all_nodes) + non_df_nodes = list(non_df_nodes) + + if len(df_nodes) == 0: + # no changes if no dataflow nodes are present + break + else: + # partition the model into two models + df_model = copy.deepcopy(model) + non_df_model = model + # remove all non-dataflow nodes from the dataflow model + for node_to_remove in non_df_nodes: + df_model.graph.node.remove(node_to_remove) + # identify the entry and exit points for the dataflow part + df_in = df_model.graph.node[0].input[0] + df_out = df_model.graph.node[-1].output[0] + df_in_vi = df_model.get_tensor_valueinfo(df_in) + df_out_vi = df_model.get_tensor_valueinfo(df_out) + # set df graph in/out to be df_in/df_out + df_model.graph.input.remove(df_model.graph.input[0]) + df_model.graph.input.insert(0, df_in_vi) + df_model.graph.output.remove(df_model.graph.output[0]) + df_model.graph.output.insert(0, df_out_vi) + # parse StreamingFCLayers looking for external weight memories + fc_extw_nodes = filter( + lambda x: x.op_type == "StreamingFCLayer_Batch" + and get_by_name(x.attribute, "mem_mode") is not None + and get_by_name(x.attribute, "mem_mode").s.decode("UTF-8") + == "external", + df_nodes, + ) + fc_extw_nodes = list(fc_extw_nodes) + extra_df_inputs = [] + + for i in range(len(fc_extw_nodes)): + fc_weight_vi = df_model.get_tensor_valueinfo( + fc_extw_nodes[i].input[1] + ) + df_model.graph.input.insert(i + 1, fc_weight_vi) + extra_df_inputs.append(fc_extw_nodes[i].input[1]) + + # save model + df_model_dir = make_build_dir( + "dataflow_partition" + str(target_partition_id) + "_" + ) + df_model_filename = df_model_dir + "/df_model.onnx" + df_model.save(df_model_filename) + # remove all dataflow nodes from the non-dataflow model + # keep track of where the dataflow part starts + df_start_ind = all_nodes.index(df_nodes[0]) + for node_to_remove in df_nodes: + non_df_model.graph.node.remove(node_to_remove) + # create StreamingDataflow node with df_in/df_out io + df_node = helper.make_node( + "StreamingDataflowPartition", + [df_in] + extra_df_inputs, + [df_out], + # use the model attribute to mark the df model + model=df_model_filename, + ) + non_df_model.graph.node.insert(df_start_ind, df_node) + model = non_df_model + target_partition_id += 1 - return (non_df_model, False) + return (model, False) diff --git a/src/finn/transformation/fpgadataflow/insert_fifo.py b/src/finn/transformation/fpgadataflow/insert_fifo.py index b01f8cbe5c48db6c5288b2db1a8b009ea09ce6c0..85a2d47be0599a852b223f1a65d3ec04efe9bda7 100644 --- a/src/finn/transformation/fpgadataflow/insert_fifo.py +++ b/src/finn/transformation/fpgadataflow/insert_fifo.py @@ -118,8 +118,11 @@ class InsertFIFO(Transformation): graph_modified = True if graph_modified is False: - # insert FIFO as first node - if graph.node[0].op_type != "StreamingFIFO": + # insert FIFO as first node, except when first node is DMA + if ( + graph.node[0].op_type != "StreamingFIFO" + and graph.node[0].op_type != "IODMA" + ): n = graph.node[0] n_input = n.input[0] n0 = getCustomOp(n) @@ -153,8 +156,11 @@ class InsertFIFO(Transformation): # set fifo output tensor as new input tensor of second node n.input[0] = fifo_output_tensor.name - # insert FIFO as last node - if graph.node[-1].op_type != "StreamingFIFO": + # insert FIFO as last node, except when last node is DMA + if ( + graph.node[-1].op_type != "StreamingFIFO" + and graph.node[0].op_type != "IODMA" + ): n = graph.node[-1] assert ( n.op_type != "TLastMarker" diff --git a/src/finn/transformation/fpgadataflow/insert_iodma.py b/src/finn/transformation/fpgadataflow/insert_iodma.py index e4368edea717f7499481e9b1c6ac20f7d5bb5f58..0cd7c0d4d41accf8cdba8adfaf4dbb00fc0cab7a 100644 --- a/src/finn/transformation/fpgadataflow/insert_iodma.py +++ b/src/finn/transformation/fpgadataflow/insert_iodma.py @@ -171,6 +171,7 @@ class InsertIODMA(Transformation): # calculate width of stream output from DMA pe = get_by_name(fc_node.attribute, "PE").i simd = get_by_name(fc_node.attribute, "SIMD").i + assert pe * simd == w_shape[0], "Malformed weight matrix" streamWidth = simd * pe * w_dtype.bitwidth() # make new buffer fc_node_in = oh.make_tensor_value_info( @@ -178,12 +179,13 @@ class InsertIODMA(Transformation): ) model.graph.value_info.append(fc_node_in) model.set_tensor_datatype(fc_node_in.name, w_dtype) + model.set_initializer(fc_node_in.name, model.get_initializer(fc_w_name)) dma_node = oh.make_node( "IODMA", [fc_w_name], [fc_node_in.name], - numInputVectors=w_shape[:-1], - NumChannels=w_shape[-1], + numInputVectors=[w_shape[1]], + NumChannels=w_shape[0], dataType=str(w_dtype.name), intfWidth=intfwidth, streamWidth=streamWidth, diff --git a/src/finn/transformation/fpgadataflow/insert_tlastmarker.py b/src/finn/transformation/fpgadataflow/insert_tlastmarker.py index 04dd437af27b9fbe18b2255c20a8e4acda03b3d0..bbb0e43fda464e919a7d8c9dcd25e08a49b33cec 100644 --- a/src/finn/transformation/fpgadataflow/insert_tlastmarker.py +++ b/src/finn/transformation/fpgadataflow/insert_tlastmarker.py @@ -38,7 +38,8 @@ import numpy as np class InsertTLastMarker(Transformation): """Ensure that the graph is started/terminated with a TLastMarker node, inserting - one if necessary. Use constructor args to determine type of TLastMarker to be inserted. + one if necessary. + Use constructor args to determine type of TLastMarker to be inserted. More information available on the TLastMarker documentation. """ @@ -90,41 +91,78 @@ class InsertTLastMarker(Transformation): graph_modified = True # if both is True, also insert marker on input if self.both: - graph_in_name = model.graph.input[0].name - first_node = model.find_consumer(graph_in_name) - if first_node.op_type != "TLastMarker" and not ( - first_node.op_type == "IODMA" - and get_by_name(first_node.attribute, "direction").s.decode("UTF-8") - == "in" - ): + # detect and parse graph inputs + insert_idx = 0 + graph_in_names = [x.name for x in model.graph.input] + for graph_in_name in graph_in_names: + first_node = model.find_consumers(graph_in_name) + # skip if no consumers (this may be the case for unused initializers) + # TODO: fix this with a cleanup transform + if first_node is None: + continue + assert len(first_node) == 1, "Input fans out to multiple nodes" + first_node = first_node[0] + # several scenarios exclude the node: + # 1. node is a FC layer with internal weights, in which case + # the input is in the list of graph inputs because it has an + # initializer (TODO: fix this with a clean-up transform) + if ( + first_node.op_type == "StreamingFCLayer_Batch" + and get_by_name(first_node.attribute, "mem_mode").s.decode("UTF-8") + != "external" + ): + continue + # 2. node is either a TLastMarker or an input IODMA + if first_node.op_type != "TLastMarker" and not ( + first_node.op_type == "IODMA" + and get_by_name(first_node.attribute, "direction").s.decode("UTF-8") + == "in" + ): - custom_op = getCustomOp(first_node) - num_iters = np.prod(custom_op.get_folded_input_shape()[1:-1]) - stream_width = int(custom_op.get_instream_width()) - in_shape = model.get_tensor_shape(graph_in_name) - in_dtype = model.get_tensor_datatype(graph_in_name) - elem_width = in_dtype.bitwidth() - # make new buffer - first_node_in = oh.make_tensor_value_info( - model.make_new_valueinfo_name(), TensorProto.FLOAT, in_shape - ) - model.graph.value_info.append(first_node_in) - model.set_tensor_datatype(first_node_in.name, in_dtype) - # reroute final node output to first_node_in_name - first_node.input[0] = first_node_in.name - tlast_node = oh.make_node( - "TLastMarker", - [graph_in_name], - [first_node_in.name], - NumIters=num_iters, - StreamWidth=stream_width, - ElemWidth=elem_width, - DynIters=(1 if self.dyniters else 0), - Direction="in", - Protocol=("external" if self.external else "internal"), - domain="finn", - backend="fpgadataflow", - ) - model.graph.node.insert(0, tlast_node) - graph_modified = True + custom_op = getCustomOp(first_node) + num_iters = np.prod(custom_op.get_folded_input_shape()[1:-1]) + inp_idx = list(first_node.input).index(graph_in_name) + if inp_idx > 0: + if ( + first_node.op_type == "StreamingFCLayer_Batch" + and inp_idx == 1 + ): + stream_width = int(custom_op.get_weightstream_width()) + elif first_node.op_type == "AddStreams_Batch" and inp_idx == 1: + stream_width = int(custom_op.get_instream_width()) + else: + raise Exception("No method to determine stream width") + else: + stream_width = int(custom_op.get_instream_width()) + in_shape = model.get_tensor_shape(graph_in_name) + in_dtype = model.get_tensor_datatype(graph_in_name) + elem_width = in_dtype.bitwidth() + # make new buffer + first_node_in = oh.make_tensor_value_info( + model.make_new_valueinfo_name(), TensorProto.FLOAT, in_shape + ) + model.graph.value_info.append(first_node_in) + model.set_tensor_datatype(first_node_in.name, in_dtype) + ini = model.get_initializer(graph_in_name) + # copy initializer if it exists + if ini is not None: + model.set_initializer(first_node_in.name, ini) + # reroute final node output to first_node_in_name + first_node.input[inp_idx] = first_node_in.name + tlast_node = oh.make_node( + "TLastMarker", + [graph_in_name], + [first_node_in.name], + NumIters=num_iters, + StreamWidth=stream_width, + ElemWidth=elem_width, + DynIters=(1 if self.dyniters else 0), + Direction="in", + Protocol=("external" if self.external else "internal"), + domain="finn", + backend="fpgadataflow", + ) + model.graph.node.insert(insert_idx, tlast_node) + graph_modified = True + insert_idx += 1 return (model, graph_modified)