Skip to content
Snippets Groups Projects
Commit 20e4613f authored by Yaman Umuroglu's avatar Yaman Umuroglu
Browse files

[FIFO] instead of fixed-depth large FIFO in sim, use tensor size

parent 81f010b8
No related branches found
No related tags found
No related merge requests found
...@@ -192,10 +192,11 @@ class InsertAndSetFIFODepths(Transformation): ...@@ -192,10 +192,11 @@ class InsertAndSetFIFODepths(Transformation):
- max_qsrl_depth : FIFOs deeper than this will use Vivado IP instead of - max_qsrl_depth : FIFOs deeper than this will use Vivado IP instead of
Verilog FIFOs (Q_srl.v) Verilog FIFOs (Q_srl.v)
- max_depth : how deep the "max"-sized FIFOs initially inserted will be - max_depth : how deep the "max"-sized FIFOs initially inserted will be
if set to None, use the tensor size as the depth
- swg_exception : call CapConvolutionFIFODepths to make convolution FIFOs - swg_exception : call CapConvolutionFIFODepths to make convolution FIFOs
smaller where appropriate smaller where appropriate
- vivado_ram_style : the StreamingFIFO.ram_style attribute to be used for - vivado_ram_style : the StreamingFIFO.ram_style attribute to be used for
large FIFOs implemented by Vivado large FIFOs implemented by Vivado afterwards
Assumed input graph properties: Assumed input graph properties:
- all nodes are fpgadataflow nodes - all nodes are fpgadataflow nodes
...@@ -210,7 +211,7 @@ class InsertAndSetFIFODepths(Transformation): ...@@ -210,7 +211,7 @@ class InsertAndSetFIFODepths(Transformation):
necessary to insert FIFOs between them to prevent stalls due to bursty necessary to insert FIFOs between them to prevent stalls due to bursty
behavior. The sizes of those FIFOs are hard to predict analytically, so behavior. The sizes of those FIFOs are hard to predict analytically, so
we do the following: we do the following:
- insert very deep (default 16k deep) FIFOs between all fpgadataflow nodes - insert deep (=tensor size) FIFOs between all fpgadataflow nodes
- create stitched design - create stitched design
- run through rtlsim with stream of multiple random input images (to fill pipeline) - run through rtlsim with stream of multiple random input images (to fill pipeline)
- keep track of observed maximum occupancy for each FIFO during rtlsim - keep track of observed maximum occupancy for each FIFO during rtlsim
...@@ -223,7 +224,7 @@ class InsertAndSetFIFODepths(Transformation): ...@@ -223,7 +224,7 @@ class InsertAndSetFIFODepths(Transformation):
fpgapart, fpgapart,
clk_ns=10.0, clk_ns=10.0,
max_qsrl_depth=256, max_qsrl_depth=256,
max_depth=2**14, max_depth=None,
swg_exception=True, swg_exception=True,
vivado_ram_style="auto", vivado_ram_style="auto",
): ):
...@@ -236,6 +237,9 @@ class InsertAndSetFIFODepths(Transformation): ...@@ -236,6 +237,9 @@ class InsertAndSetFIFODepths(Transformation):
self.vivado_ram_style = vivado_ram_style self.vivado_ram_style = vivado_ram_style
def apply(self, model): def apply(self, model):
# these optypes may potentially use external weights
# we'll temporarily change them to use decoupled mode for FIFO sizing
extw_optypes = ["MatrixVectorActivation", "VectorVectorActivation"]
# change external to decoupled and warn user # change external to decoupled and warn user
# this way we are sure we have exactly one input/output # this way we are sure we have exactly one input/output
modified_fc_nodes = [] modified_fc_nodes = []
...@@ -246,9 +250,15 @@ class InsertAndSetFIFODepths(Transformation): ...@@ -246,9 +250,15 @@ class InsertAndSetFIFODepths(Transformation):
) )
assert node.op_type != "StreamingFIFO", "Found existing StreamingFIFO node" assert node.op_type != "StreamingFIFO", "Found existing StreamingFIFO node"
node = getCustomOp(node) node = getCustomOp(node)
node.set_nodeattr("inFIFODepth", self.max_depth) if self.max_depth is not None:
node.set_nodeattr("outFIFODepth", self.max_depth) node.set_nodeattr("inFIFODepth", self.max_depth)
if node.onnx_node.op_type == "MatrixVectorActivation": node.set_nodeattr("outFIFODepth", self.max_depth)
else:
i_depth = np.prod(node.get_folded_input_shape()[:-1])
o_depth = np.prod(node.get_folded_output_shape()[:-1])
node.set_nodeattr("inFIFODepth", i_depth)
node.set_nodeattr("outFIFODepth", o_depth)
if node.onnx_node.op_type in extw_optypes:
mmode = node.get_nodeattr("mem_mode") mmode = node.get_nodeattr("mem_mode")
if mmode == "external": if mmode == "external":
modified_fc_nodes.append(node.onnx_node.name) modified_fc_nodes.append(node.onnx_node.name)
...@@ -370,9 +380,9 @@ class InsertAndSetFIFODepths(Transformation): ...@@ -370,9 +380,9 @@ class InsertAndSetFIFODepths(Transformation):
else: else:
getCustomOp(node).set_nodeattr("inFIFODepth", 0) getCustomOp(node).set_nodeattr("inFIFODepth", 0)
getCustomOp(node).set_nodeattr("outFIFODepth", 0) getCustomOp(node).set_nodeattr("outFIFODepth", 0)
# for every FC node we changed from external to decoupled, # for every extw node we changed from external to decoupled,
# change back and reset implementation # change back and reset implementation
if node.op_type == "MatrixVectorActivation": if node.op_type in extw_optypes:
if node.name in modified_fc_nodes: if node.name in modified_fc_nodes:
node_inst = getCustomOp(node) node_inst = getCustomOp(node)
node_inst.set_nodeattr("mem_mode", "external") node_inst.set_nodeattr("mem_mode", "external")
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment