Skip to content
Snippets Groups Projects
Unverified Commit ef3d4c9d authored by Yaman Umuroglu's avatar Yaman Umuroglu Committed by GitHub
Browse files

Merge pull request #147 from quetric/feature_tlastmarker

Added features to tlastmarker and updated the insertion transform
parents 2be5dcdd 2250a368
No related branches found
No related tags found
No related merge requests found
......@@ -30,20 +30,30 @@ from finn.custom_op.fpgadataflow import HLSCustomOp
class TLastMarker(HLSCustomOp):
"""Class that corresponds to the TLastMarker node that needs to be
inserted at the end of the model for rtlsim with stitched IP.
It marks the end of the current image/input sample."""
"""Node that adds/removes AXI stream TLAST signals where needed. Its behavior
is transparent in node-by-node execution, only visible in IP-stitched rtlsim or
actual hardware.
This node may be needed at the end of the network to signal a DMA write (needed by the
FINN PYNQ shell) or at the beginning to remove the end-of-burst from DMA read."""
def __init__(self, onnx_node):
super().__init__(onnx_node)
def get_nodeattr_types(self):
my_attrs = {
# number of (static) iterations until TLAST=1 is generated for Direction=out
"NumIters": ("i", True, 0),
# whether static or dynamic (from AXI lite) number of iterations are used
"DynIters": ("i", False, 1),
# direction: whether to insert or remove TLAST
"Direction": ("s", False, "out"),
# width of input-output data streams, in bits
"StreamWidth": ("i", True, 0),
# width of individual element in stream, in bits
"ElemWidth": ("i", True, 0),
# Protocol: external or internal
# Vitis docs recommend using qdma_axis for external, ap_axiu for internal
"Protocol": ("s", False, "external"),
}
my_attrs.update(super().get_nodeattr_types())
return my_attrs
......@@ -76,12 +86,33 @@ class TLastMarker(HLSCustomOp):
def defines(self, var):
stream_width = self.get_nodeattr("StreamWidth")
direction = self.get_nodeattr("Direction")
protocol = self.get_nodeattr("Protocol")
# output stream must have TLAST, so we use this stream data type:
# qdma_axis<stream_data_width,0,0,0 >
out_stream_dtype = "qdma_axis<%d,0,0,0>" % stream_width
if direction == "out":
if protocol == "external":
out_stream_dtype = "qdma_axis<%d,0,0,0>" % stream_width
elif protocol == "internal":
out_stream_dtype = "ap_axiu<%d,0,0,0>" % stream_width
else:
raise Exception("Unrecognized Protocol in TLastMarker")
in_stream_dtype = "ap_uint<%d>" % stream_width
elif direction == "in":
out_stream_dtype = "ap_uint<%d>" % stream_width
if protocol == "external":
in_stream_dtype = "qdma_axis<%d,0,0,0>" % stream_width
elif protocol == "internal":
in_stream_dtype = "ap_axiu<%d,0,0,0>" % stream_width
else:
raise Exception("Unrecognized Protocol in TLastMarker")
else:
raise Exception("Unrecognized Direction in TLastMarker")
self.code_gen_dict["$DEFINES$"] = [
"#define StreamWidth %d" % stream_width,
"#define OutDType %s" % out_stream_dtype,
"#define InDType %s" % in_stream_dtype,
"#define NumItersPerImg %d" % self.get_nodeattr("NumIters"),
]
......@@ -89,27 +120,60 @@ class TLastMarker(HLSCustomOp):
self.code_gen_dict["$READNPYDATA$"] = []
def docompute(self):
self.code_gen_dict["$DOCOMPUTE$"] = [
"unsigned int n = 1;",
"OutDType t;",
"t.set_keep(-1);",
"io_section: { // start of cycle accurate region",
"#pragma HLS protocol fixed",
"// do a first read from stream before we decide on numIters",
"// giving software a chance to set up the numIters prior to startup",
"t.set_data(in0.read());",
"n = (numIters == 0 ? NumItersPerImg : numIters);",
"t.set_last(n==1);",
"out.write(t);",
"} // end of cycle accurate region",
"// do one less iteration than spec since we already did one",
"for(unsigned int i=1; i<n; i++) {",
"#pragma HLS PIPELINE II=1",
"t.set_data(in0.read());",
"t.set_last(i==(n-1));",
"out.write(t);",
"}",
]
dyn_iters = self.get_nodeattr("DynIters")
direction = self.get_nodeattr("Direction")
use_qdma_axis = self.get_nodeattr("Protocol") == "external"
if direction == "in":
# read from input and just pass data along; ignore tlast
# no dyn iters on input, it doesnt make sense
self.code_gen_dict["$DOCOMPUTE$"] = [
"for(unsigned int i=0; i<NumItersPerImg; i++) {",
"#pragma HLS PIPELINE II=1",
"out.write(in0.read().get_data());"
if use_qdma_axis
else "out.write(in0.read().data);",
"}",
]
elif dyn_iters == 1:
# output, with dynamic iteration counts
self.code_gen_dict["$DOCOMPUTE$"] = [
"unsigned int n = 1;",
"OutDType t;",
"t.set_keep(-1);" if use_qdma_axis else "t.keep = -1;",
"io_section: { // start of cycle accurate region",
"#pragma HLS protocol fixed",
"// do a first read from stream before we decide on numIters",
"// giving software a chance to set up the numIters prior to startup",
"t.set_data(in0.read());" if use_qdma_axis else "t.data = in0.read();",
"n = (numIters == 0 ? NumItersPerImg : numIters);",
"t.set_last(n==1);" if use_qdma_axis else "t.last = (n==1);",
"out.write(t);",
"} // end of cycle accurate region",
"// do one less iteration than spec since we already did one",
"for(unsigned int i=1; i<n; i++) {",
"#pragma HLS PIPELINE II=1",
"t.set_data(in0.read());" if use_qdma_axis else "t.data = in0.read();",
"t.set_last(i==(n-1));" if use_qdma_axis else "t.last = (i==(n-1));",
"out.write(t);",
"}",
]
else:
# output, with static iteration counts
self.code_gen_dict["$DOCOMPUTE$"] = [
"unsigned int n = 1;",
"OutDType t;",
"t.set_keep(-1);" if use_qdma_axis else "t.keep = -1;",
"for(unsigned int i=0; i<NumItersPerImg; i++) {",
"#pragma HLS PIPELINE II=1",
"t.set_data(in0.read());" if use_qdma_axis else "t.data = in0.read();",
"t.set_last(i==(NumItersPerImg-1));"
if use_qdma_axis
else "t.last = (i==(NumItersPerImg-1));",
"out.write(t);",
"}",
]
def dataoutstrm(self):
self.code_gen_dict["$DATAOUTSTREAM$"] = []
......@@ -118,18 +182,30 @@ class TLastMarker(HLSCustomOp):
self.code_gen_dict["$SAVEASCNPY$"] = []
def blackboxfunction(self):
self.code_gen_dict["$BLACKBOXFUNCTION$"] = [
"""void %s(hls::stream<ap_uint<StreamWidth> > &in0,
hls::stream<OutDType> &out, unsigned int numIters)"""
% self.onnx_node.name
]
dyn_iters = self.get_nodeattr("DynIters")
if dyn_iters == 1:
self.code_gen_dict["$BLACKBOXFUNCTION$"] = [
"""void %s(hls::stream<InDType> &in0,
hls::stream<OutDType> &out, unsigned int numIters)"""
% self.onnx_node.name
]
else:
self.code_gen_dict["$BLACKBOXFUNCTION$"] = [
"""void %s(hls::stream<InDType> &in0, hls::stream<OutDType> &out)"""
% self.onnx_node.name
]
def pragmas(self):
self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"]
self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out")
self.code_gen_dict["$PRAGMAS$"].append(
"#pragma HLS INTERFACE s_axilite port=numIters bundle=control"
)
dyn_iters = self.get_nodeattr("DynIters")
if dyn_iters == 1:
self.code_gen_dict["$PRAGMAS$"].append(
"#pragma HLS INTERFACE s_axilite port=numIters bundle=control"
)
self.code_gen_dict["$PRAGMAS$"].append(
"#pragma HLS INTERFACE ap_ctrl_none port=return"
)
......@@ -158,7 +234,7 @@ class TLastMarker(HLSCustomOp):
def strm_decl(self):
self.code_gen_dict["$STREAMDECLARATIONS$"] = []
self.code_gen_dict["$STREAMDECLARATIONS$"].append(
'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width())
'hls::stream<InDType> in0 ("in0");'
)
self.code_gen_dict["$STREAMDECLARATIONS$"].append(
'hls::stream<OutDType> out ("out");'
......
......@@ -31,23 +31,34 @@ from onnx import helper as oh
from finn.custom_op.registry import getCustomOp
from finn.transformation import Transformation
from finn.util.basic import get_by_name
import numpy as np
class InsertTLastMarker(Transformation):
"""Ensure that the graph is terminated with a TLastMarker node, inserting
one if necessary."""
"""Ensure that the graph is started/terminated with a TLastMarker node, inserting
one if necessary. Use constructor args to determine type of TLastMarker to be inserted.
More information available on the TLastMarker documentation.
"""
def __init__(self):
def __init__(self, both=False, external=True, dynamic=True):
super().__init__()
self.dyniters = dynamic
self.external = external
self.both = both
def apply(self, model):
# TODO only makes sense for a pure fpgadataflow graph -- check!
graph_out_name = model.graph.output[0].name
final_node = model.find_producer(graph_out_name)
if final_node.op_type == "TLastMarker":
# TODO maybe check the correctness of properties
return (model, False)
else:
graph_modified = False
if final_node.op_type != "TLastMarker" and not (
final_node.op_type == "IODMA"
and get_by_name(final_node.attribute, "direction").s.decode("UTF-8")
== "out"
):
custom_op = getCustomOp(final_node)
num_iters = int(custom_op.get_number_output_values())
stream_width = int(custom_op.get_outstream_width())
......@@ -69,8 +80,51 @@ class InsertTLastMarker(Transformation):
NumIters=num_iters,
StreamWidth=stream_width,
ElemWidth=elem_width,
DynIters=(1 if self.dyniters else 0),
Direction="out",
Protocol=("external" if self.external else "internal"),
domain="finn",
backend="fpgadataflow",
)
model.graph.node.append(tlast_node)
return (model, True)
graph_modified = True
# if both is True, also insert marker on input
if self.both:
graph_in_name = model.graph.input[0].name
first_node = model.find_consumer(graph_in_name)
if first_node.op_type != "TLastMarker" and not (
first_node.op_type == "IODMA"
and get_by_name(first_node.attribute, "direction").s.decode("UTF-8")
== "in"
):
custom_op = getCustomOp(first_node)
num_iters = np.prod(custom_op.get_folded_input_shape()[1:-1])
stream_width = int(custom_op.get_instream_width())
in_shape = model.get_tensor_shape(graph_in_name)
in_dtype = model.get_tensor_datatype(graph_in_name)
elem_width = in_dtype.bitwidth()
# make new buffer
first_node_in = oh.make_tensor_value_info(
model.make_new_valueinfo_name(), TensorProto.FLOAT, in_shape
)
model.graph.value_info.append(first_node_in)
model.set_tensor_datatype(first_node_in.name, in_dtype)
# reroute final node output to first_node_in_name
first_node.input[0] = first_node_in.name
tlast_node = oh.make_node(
"TLastMarker",
[graph_in_name],
[first_node_in.name],
NumIters=num_iters,
StreamWidth=stream_width,
ElemWidth=elem_width,
DynIters=(1 if self.dyniters else 0),
Direction="in",
Protocol=("external" if self.external else "internal"),
domain="finn",
backend="fpgadataflow",
)
model.graph.node.insert(0, tlast_node)
graph_modified = True
return (model, graph_modified)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment