Skip to content
Snippets Groups Projects
Commit 56057926 authored by Yaman Umuroglu's avatar Yaman Umuroglu
Browse files

[FIFO] add new build option to enable C++-based FIFO sizing

parent f9d86f40
No related branches found
No related tags found
No related merge requests found
...@@ -259,6 +259,10 @@ class DataflowBuildConfig: ...@@ -259,6 +259,10 @@ class DataflowBuildConfig:
AutoFIFOSizingMethod AutoFIFOSizingMethod
] = AutoFIFOSizingMethod.LARGEFIFO_RTLSIM ] = AutoFIFOSizingMethod.LARGEFIFO_RTLSIM
#: Avoid using C++ rtlsim for auto FIFO sizing and rtlsim throughput test
#: if set to True, always using Python instead
force_python_rtlsim: Optional[bool] = False
#: Memory resource type for large FIFOs #: Memory resource type for large FIFOs
#: Only relevant when `auto_fifo_depths = True` #: Only relevant when `auto_fifo_depths = True`
large_fifo_mem_style: Optional[LargeFIFOMemStyle] = LargeFIFOMemStyle.AUTO large_fifo_mem_style: Optional[LargeFIFOMemStyle] = LargeFIFOMemStyle.AUTO
......
...@@ -487,6 +487,7 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig): ...@@ -487,6 +487,7 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig):
cfg._resolve_fpga_part(), cfg._resolve_fpga_part(),
cfg._resolve_hls_clk_period(), cfg._resolve_hls_clk_period(),
vivado_ram_style=cfg.large_fifo_mem_style, vivado_ram_style=cfg.large_fifo_mem_style,
force_python_sim=cfg.force_python_rtlsim,
) )
) )
else: else:
......
...@@ -42,7 +42,7 @@ from finn.transformation.fpgadataflow.insert_dwc import InsertDWC ...@@ -42,7 +42,7 @@ from finn.transformation.fpgadataflow.insert_dwc import InsertDWC
from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
from finn.transformation.fpgadataflow.prepare_ip import PrepareIP from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
from finn.util.fpgadataflow import is_fpgadataflow_node from finn.util.fpgadataflow import is_fpgadataflow_node
from finn.util.pyverilator import pyverilate_stitched_ip from finn.util.pyverilator import pyverilate_stitched_ip, verilator_fifosim
def reset_implementation(node): def reset_implementation(node):
...@@ -227,6 +227,7 @@ class InsertAndSetFIFODepths(Transformation): ...@@ -227,6 +227,7 @@ class InsertAndSetFIFODepths(Transformation):
max_depth=None, max_depth=None,
swg_exception=True, swg_exception=True,
vivado_ram_style="auto", vivado_ram_style="auto",
force_python_sim=False,
): ):
super().__init__() super().__init__()
self.fpgapart = fpgapart self.fpgapart = fpgapart
...@@ -235,6 +236,7 @@ class InsertAndSetFIFODepths(Transformation): ...@@ -235,6 +236,7 @@ class InsertAndSetFIFODepths(Transformation):
self.max_depth = max_depth self.max_depth = max_depth
self.swg_exception = swg_exception self.swg_exception = swg_exception
self.vivado_ram_style = vivado_ram_style self.vivado_ram_style = vivado_ram_style
self.force_python_sim = force_python_sim
def apply(self, model): def apply(self, model):
# these optypes may potentially use external weights # these optypes may potentially use external weights
...@@ -306,57 +308,75 @@ class InsertAndSetFIFODepths(Transformation): ...@@ -306,57 +308,75 @@ class InsertAndSetFIFODepths(Transformation):
model = model.transform(CreateStitchedIP(self.fpgapart, self.clk_ns)) model = model.transform(CreateStitchedIP(self.fpgapart, self.clk_ns))
model.set_metadata_prop("exec_mode", "rtlsim") model.set_metadata_prop("exec_mode", "rtlsim")
# calculate input frequency (number of cycles for each input word) if self.force_python_sim:
first_node = getCustomOp(model.graph.node[0]) # do rtlsim in Python for FIFO sizing
ncycles_per_input = max( # calculate input frequency (number of cycles for each input word)
1, first_node = getCustomOp(model.graph.node[0])
int( ncycles_per_input = max(
math.ceil( 1,
perf["max_cycles"] int(
/ ( math.ceil(
np.prod(first_node.get_folded_input_shape()) perf["max_cycles"]
/ first_node.get_folded_input_shape()[-1] / (
np.prod(first_node.get_folded_input_shape())
/ first_node.get_folded_input_shape()[-1]
)
) )
) ),
), )
)
# set sufficiently large threshold for 1 image to fully execute and exit # set sufficiently large threshold for 1 image to fully execute and exit
ncycles = int(latency + max_cycles) ncycles = int(latency + max_cycles)
# prepare pyverilator model # prepare pyverilator model
sim = pyverilate_stitched_ip(model) sim = pyverilate_stitched_ip(model)
reset_rtlsim(sim) reset_rtlsim(sim)
toggle_clk(sim) toggle_clk(sim)
# set all input valids to 0 and output readies to 1 # set all input valids to 0 and output readies to 1
# set input data to some constant # set input data to some constant
set_signal(sim, "tvalid", 0) set_signal(sim, "tvalid", 0)
set_signal(sim, "tready", 1) set_signal(sim, "tready", 1)
set_signal(sim, "tdata", 0) set_signal(sim, "tdata", 0)
output_detected = False
while ncycles > 0:
toggle_clk(sim)
# set/unset valids
if ncycles % ncycles_per_input == 0:
set_signal(sim, "tvalid", 1)
else:
set_signal(sim, "tvalid", 0)
output_detected = False # since latency estimation is very pessimistic, detect first output
while ncycles > 0: # and fast-forward the sim
toggle_clk(sim) if get_signal(sim, "tvalid") != 0 and not output_detected:
# set/unset valids ncycles = max_cycles
if ncycles % ncycles_per_input == 0: output_detected = True
set_signal(sim, "tvalid", 1) else:
else: ncycles = ncycles - 1
set_signal(sim, "tvalid", 0)
# since latency estimation is very pessimistic, detect first output if not output_detected:
# and fast-forward the sim warnings.warn(
if get_signal(sim, "tvalid") != 0 and not output_detected: "No output detected, calculated FIFO depths may not be correct"
ncycles = max_cycles )
output_detected = True else:
# do rtlsim in C++ for FIFO sizing
# determine # inputs for FIFO sizing according to topology type
swg_nodes = [
x for x in model.graph.node if "ConvolutionInputGenerator" in x.op_type
]
if len(swg_nodes) == 0:
# MLP, no layer overlap
# assuming half the nodes are now FIFOs, use half the # of
# nodes as # inputs to drive the imulation
n_inputs = int(len(model.graph.node) / 2)
else: else:
ncycles = ncycles - 1 # convnet, single input is typically enough to fill entire
# layer pipeline due to overlaps
if not output_detected: n_inputs = 1
warnings.warn( sim = verilator_fifosim(model, n_inputs)
"No output detected, calculated FIFO depths may not be correct"
)
for ind, node in enumerate(fifo_nodes): for ind, node in enumerate(fifo_nodes):
maxcount_name = "maxcount_%d" % ind maxcount_name = "maxcount_%d" % ind
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment