Skip to content
Snippets Groups Projects
Unverified Commit 0559625b authored by Yaman Umuroglu's avatar Yaman Umuroglu Committed by GitHub
Browse files

Merge pull request #190 from quetric/feature_transforms_nonlinear_iodma

Updated multiple build-related transforms to recognize IODMAs and nonlinear graphs
parents ef693dda 21e480e7
No related branches found
No related tags found
No related merge requests found
......@@ -83,7 +83,7 @@ class StreamingDataflowPartition(CustomOp):
)
# verify the number of inputs
if len(self.onnx_node.input) == 1:
if len(self.onnx_node.input) >= 1:
info_messages.append("The number of inputs is correct")
else:
info_messages.append("StreamingDataflowPartition needs 1 data input")
......
......@@ -45,58 +45,89 @@ class CreateDataflowPartition(Transformation):
super().__init__()
def apply(self, model):
# TODO we currently assume that all dataflow nodes are connected to
# each other, forming a single partition. check the assumption and/or
# improve this.
all_nodes = list(model.graph.node)
df_nodes = filter(
lambda x: get_by_name(x.attribute, "backend") is not None, all_nodes
)
df_nodes = filter(
lambda x: get_by_name(x.attribute, "backend").s.decode("UTF-8")
== "fpgadataflow",
df_nodes,
)
df_nodes = list(df_nodes)
non_df_nodes = filter(lambda x: x not in df_nodes, all_nodes)
non_df_nodes = list(non_df_nodes)
if len(df_nodes) == 0:
# no changes if no dataflow nodes are present
return (model, False)
else:
# partition the model into two models
df_model = copy.deepcopy(model)
non_df_model = model
# remove all non-dataflow nodes from the dataflow model
for node_to_remove in non_df_nodes:
df_model.graph.node.remove(node_to_remove)
# identify the entry and exit points for the dataflow part
df_in = df_model.graph.node[0].input[0]
df_out = df_model.graph.node[-1].output[0]
df_in_vi = df_model.get_tensor_valueinfo(df_in)
df_out_vi = df_model.get_tensor_valueinfo(df_out)
# set df graph in/out to be df_in/df_out
df_model.graph.input.remove(df_model.graph.input[0])
df_model.graph.input.insert(0, df_in_vi)
df_model.graph.output.remove(df_model.graph.output[0])
df_model.graph.output.insert(0, df_out_vi)
df_model_dir = make_build_dir("dataflow_partition_")
df_model_filename = df_model_dir + "/df_model.onnx"
df_model.save(df_model_filename)
# remove all dataflow nodes from the non-dataflow model
# keep track of where the dataflow part starts
df_start_ind = all_nodes.index(df_nodes[0])
for node_to_remove in df_nodes:
non_df_model.graph.node.remove(node_to_remove)
# create StreamingDataflow node with df_in/df_out io
df_node = helper.make_node(
"StreamingDataflowPartition",
[df_in],
[df_out],
# use the model attribute to mark the df model
model=df_model_filename,
target_partition_id = 0
# we currently assume that all dataflow nodes belonging to the same partition
# are connected to each other and there is a single input/output to/from each.
# NOTE: all dataflow nodes with no partition_id set are moved to partition 0
# TODO: check the assumption and/or improve this.
while True:
all_nodes = list(model.graph.node)
df_nodes = filter(
lambda x: get_by_name(x.attribute, "backend") is not None, all_nodes
)
df_nodes = filter(
lambda x: get_by_name(x.attribute, "backend").s.decode("UTF-8")
== "fpgadataflow"
and (
get_by_name(x.attribute, "partition_id") is None
or get_by_name(x.attribute, "partition_id").i == target_partition_id
)
and x.op_type != "StreamingDataflowPartition",
df_nodes,
)
non_df_model.graph.node.insert(df_start_ind, df_node)
df_nodes = list(df_nodes)
non_df_nodes = filter(lambda x: x not in df_nodes, all_nodes)
non_df_nodes = list(non_df_nodes)
if len(df_nodes) == 0:
# no changes if no dataflow nodes are present
break
else:
# partition the model into two models
df_model = copy.deepcopy(model)
non_df_model = model
# remove all non-dataflow nodes from the dataflow model
for node_to_remove in non_df_nodes:
df_model.graph.node.remove(node_to_remove)
# identify the entry and exit points for the dataflow part
df_in = df_model.graph.node[0].input[0]
df_out = df_model.graph.node[-1].output[0]
df_in_vi = df_model.get_tensor_valueinfo(df_in)
df_out_vi = df_model.get_tensor_valueinfo(df_out)
# set df graph in/out to be df_in/df_out
df_model.graph.input.remove(df_model.graph.input[0])
df_model.graph.input.insert(0, df_in_vi)
df_model.graph.output.remove(df_model.graph.output[0])
df_model.graph.output.insert(0, df_out_vi)
# parse StreamingFCLayers looking for external weight memories
fc_extw_nodes = filter(
lambda x: x.op_type == "StreamingFCLayer_Batch"
and get_by_name(x.attribute, "mem_mode") is not None
and get_by_name(x.attribute, "mem_mode").s.decode("UTF-8")
== "external",
df_nodes,
)
fc_extw_nodes = list(fc_extw_nodes)
extra_df_inputs = []
for i in range(len(fc_extw_nodes)):
fc_weight_vi = df_model.get_tensor_valueinfo(
fc_extw_nodes[i].input[1]
)
df_model.graph.input.insert(i + 1, fc_weight_vi)
extra_df_inputs.append(fc_extw_nodes[i].input[1])
# save model
df_model_dir = make_build_dir(
"dataflow_partition" + str(target_partition_id) + "_"
)
df_model_filename = df_model_dir + "/df_model.onnx"
df_model.save(df_model_filename)
# remove all dataflow nodes from the non-dataflow model
# keep track of where the dataflow part starts
df_start_ind = all_nodes.index(df_nodes[0])
for node_to_remove in df_nodes:
non_df_model.graph.node.remove(node_to_remove)
# create StreamingDataflow node with df_in/df_out io
df_node = helper.make_node(
"StreamingDataflowPartition",
[df_in] + extra_df_inputs,
[df_out],
# use the model attribute to mark the df model
model=df_model_filename,
)
non_df_model.graph.node.insert(df_start_ind, df_node)
model = non_df_model
target_partition_id += 1
return (non_df_model, False)
return (model, False)
......@@ -118,8 +118,11 @@ class InsertFIFO(Transformation):
graph_modified = True
if graph_modified is False:
# insert FIFO as first node
if graph.node[0].op_type != "StreamingFIFO":
# insert FIFO as first node, except when first node is DMA
if (
graph.node[0].op_type != "StreamingFIFO"
and graph.node[0].op_type != "IODMA"
):
n = graph.node[0]
n_input = n.input[0]
n0 = getCustomOp(n)
......@@ -153,8 +156,11 @@ class InsertFIFO(Transformation):
# set fifo output tensor as new input tensor of second node
n.input[0] = fifo_output_tensor.name
# insert FIFO as last node
if graph.node[-1].op_type != "StreamingFIFO":
# insert FIFO as last node, except when last node is DMA
if (
graph.node[-1].op_type != "StreamingFIFO"
and graph.node[0].op_type != "IODMA"
):
n = graph.node[-1]
assert (
n.op_type != "TLastMarker"
......
......@@ -171,6 +171,7 @@ class InsertIODMA(Transformation):
# calculate width of stream output from DMA
pe = get_by_name(fc_node.attribute, "PE").i
simd = get_by_name(fc_node.attribute, "SIMD").i
assert pe * simd == w_shape[0], "Malformed weight matrix"
streamWidth = simd * pe * w_dtype.bitwidth()
# make new buffer
fc_node_in = oh.make_tensor_value_info(
......@@ -178,12 +179,13 @@ class InsertIODMA(Transformation):
)
model.graph.value_info.append(fc_node_in)
model.set_tensor_datatype(fc_node_in.name, w_dtype)
model.set_initializer(fc_node_in.name, model.get_initializer(fc_w_name))
dma_node = oh.make_node(
"IODMA",
[fc_w_name],
[fc_node_in.name],
numInputVectors=w_shape[:-1],
NumChannels=w_shape[-1],
numInputVectors=[w_shape[1]],
NumChannels=w_shape[0],
dataType=str(w_dtype.name),
intfWidth=intfwidth,
streamWidth=streamWidth,
......
......@@ -38,7 +38,8 @@ import numpy as np
class InsertTLastMarker(Transformation):
"""Ensure that the graph is started/terminated with a TLastMarker node, inserting
one if necessary. Use constructor args to determine type of TLastMarker to be inserted.
one if necessary.
Use constructor args to determine type of TLastMarker to be inserted.
More information available on the TLastMarker documentation.
"""
......@@ -90,41 +91,78 @@ class InsertTLastMarker(Transformation):
graph_modified = True
# if both is True, also insert marker on input
if self.both:
graph_in_name = model.graph.input[0].name
first_node = model.find_consumer(graph_in_name)
if first_node.op_type != "TLastMarker" and not (
first_node.op_type == "IODMA"
and get_by_name(first_node.attribute, "direction").s.decode("UTF-8")
== "in"
):
# detect and parse graph inputs
insert_idx = 0
graph_in_names = [x.name for x in model.graph.input]
for graph_in_name in graph_in_names:
first_node = model.find_consumers(graph_in_name)
# skip if no consumers (this may be the case for unused initializers)
# TODO: fix this with a cleanup transform
if first_node is None:
continue
assert len(first_node) == 1, "Input fans out to multiple nodes"
first_node = first_node[0]
# several scenarios exclude the node:
# 1. node is a FC layer with internal weights, in which case
# the input is in the list of graph inputs because it has an
# initializer (TODO: fix this with a clean-up transform)
if (
first_node.op_type == "StreamingFCLayer_Batch"
and get_by_name(first_node.attribute, "mem_mode").s.decode("UTF-8")
!= "external"
):
continue
# 2. node is either a TLastMarker or an input IODMA
if first_node.op_type != "TLastMarker" and not (
first_node.op_type == "IODMA"
and get_by_name(first_node.attribute, "direction").s.decode("UTF-8")
== "in"
):
custom_op = getCustomOp(first_node)
num_iters = np.prod(custom_op.get_folded_input_shape()[1:-1])
stream_width = int(custom_op.get_instream_width())
in_shape = model.get_tensor_shape(graph_in_name)
in_dtype = model.get_tensor_datatype(graph_in_name)
elem_width = in_dtype.bitwidth()
# make new buffer
first_node_in = oh.make_tensor_value_info(
model.make_new_valueinfo_name(), TensorProto.FLOAT, in_shape
)
model.graph.value_info.append(first_node_in)
model.set_tensor_datatype(first_node_in.name, in_dtype)
# reroute final node output to first_node_in_name
first_node.input[0] = first_node_in.name
tlast_node = oh.make_node(
"TLastMarker",
[graph_in_name],
[first_node_in.name],
NumIters=num_iters,
StreamWidth=stream_width,
ElemWidth=elem_width,
DynIters=(1 if self.dyniters else 0),
Direction="in",
Protocol=("external" if self.external else "internal"),
domain="finn",
backend="fpgadataflow",
)
model.graph.node.insert(0, tlast_node)
graph_modified = True
custom_op = getCustomOp(first_node)
num_iters = np.prod(custom_op.get_folded_input_shape()[1:-1])
inp_idx = list(first_node.input).index(graph_in_name)
if inp_idx > 0:
if (
first_node.op_type == "StreamingFCLayer_Batch"
and inp_idx == 1
):
stream_width = int(custom_op.get_weightstream_width())
elif first_node.op_type == "AddStreams_Batch" and inp_idx == 1:
stream_width = int(custom_op.get_instream_width())
else:
raise Exception("No method to determine stream width")
else:
stream_width = int(custom_op.get_instream_width())
in_shape = model.get_tensor_shape(graph_in_name)
in_dtype = model.get_tensor_datatype(graph_in_name)
elem_width = in_dtype.bitwidth()
# make new buffer
first_node_in = oh.make_tensor_value_info(
model.make_new_valueinfo_name(), TensorProto.FLOAT, in_shape
)
model.graph.value_info.append(first_node_in)
model.set_tensor_datatype(first_node_in.name, in_dtype)
ini = model.get_initializer(graph_in_name)
# copy initializer if it exists
if ini is not None:
model.set_initializer(first_node_in.name, ini)
# reroute final node output to first_node_in_name
first_node.input[inp_idx] = first_node_in.name
tlast_node = oh.make_node(
"TLastMarker",
[graph_in_name],
[first_node_in.name],
NumIters=num_iters,
StreamWidth=stream_width,
ElemWidth=elem_width,
DynIters=(1 if self.dyniters else 0),
Direction="in",
Protocol=("external" if self.external else "internal"),
domain="finn",
backend="fpgadataflow",
)
model.graph.node.insert(insert_idx, tlast_node)
graph_modified = True
insert_idx += 1
return (model, graph_modified)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment