Skip to content
Snippets Groups Projects
Commit 29f696fe authored by auphelia's avatar auphelia
Browse files

Merge branch 'dev' into fix/documentation

parents 888621af a06dc5bd
No related branches found
No related tags found
No related merge requests found
......@@ -259,6 +259,10 @@ class DataflowBuildConfig:
AutoFIFOSizingMethod
] = AutoFIFOSizingMethod.LARGEFIFO_RTLSIM
#: Avoid using C++ rtlsim for auto FIFO sizing and rtlsim throughput test
#: if set to True, always using Python instead
force_python_rtlsim: Optional[bool] = False
#: Memory resource type for large FIFOs
#: Only relevant when `auto_fifo_depths = True`
large_fifo_mem_style: Optional[LargeFIFOMemStyle] = LargeFIFOMemStyle.AUTO
......
......@@ -30,6 +30,7 @@ import json
import numpy as np
import os
import shutil
import warnings
from copy import deepcopy
from distutils.dir_util import copy_tree
from qonnx.core.modelwrapper import ModelWrapper
......@@ -113,6 +114,7 @@ from finn.util.basic import (
get_rtlsim_trace_depth,
pyverilate_get_liveness_threshold_cycles,
)
from finn.util.pyverilator import verilator_fifosim
from finn.util.test import execute_parent
......@@ -531,11 +533,20 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig):
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveReadableTensorNames())
elif cfg.auto_fifo_strategy == "largefifo_rtlsim":
# multi-in/out streams currently not supported in our C++ verilator driver
model_multi_io = len(model.graph.input) > 1 or len(model.graph.output) > 1
force_python_sim = model_multi_io or cfg.force_python_rtlsim
if model_multi_io:
warnings.warn(
"Multi-in/out streams currently not supported "
+ "in FINN C++ verilator driver, falling back to Python"
)
model = model.transform(
InsertAndSetFIFODepths(
cfg._resolve_fpga_part(),
cfg._resolve_hls_clk_period(),
vivado_ram_style=cfg.large_fifo_mem_style,
force_python_sim=force_python_sim,
)
)
else:
......@@ -632,20 +643,48 @@ def step_measure_rtlsim_performance(model: ModelWrapper, cfg: DataflowBuildConfi
# prepare ip-stitched rtlsim
rtlsim_model = deepcopy(model)
rtlsim_model = prepare_for_stitched_ip_rtlsim(rtlsim_model, cfg)
# run with single input to get latency
orig_rtlsim_trace_depth = get_rtlsim_trace_depth()
# multi-in/out streams currently not supported in our C++ verilator driver
model_multi_io = (
len(rtlsim_model.graph.input) > 1 or len(rtlsim_model.graph.output) > 1
)
force_python_rtlsim = cfg.force_python_rtlsim or model_multi_io
if model_multi_io:
warnings.warn(
"Multi-in/out streams currently not supported "
+ "in FINN C++ verilator driver, falling back to Python"
)
rtlsim_bs = int(cfg.rtlsim_batch_size)
assert rtlsim_bs > 0, "rtlsim batch size must be >0"
if cfg.verify_save_rtlsim_waveforms:
# set depth to 3 for layer-by-layer visibility
os.environ["RTLSIM_TRACE_DEPTH"] = "3"
if force_python_rtlsim:
# run with single input to get latency
orig_rtlsim_trace_depth = get_rtlsim_trace_depth()
assert rtlsim_bs > 0, "rtlsim batch size must be >0"
if cfg.verify_save_rtlsim_waveforms:
# set depth to 3 for layer-by-layer visibility
os.environ["RTLSIM_TRACE_DEPTH"] = "3"
rtlsim_model.set_metadata_prop(
"rtlsim_trace",
"%s/rtlsim_perf_batch_%d.vcd" % (report_dir, rtlsim_bs),
)
rtlsim_model.set_metadata_prop(
"rtlsim_trace", "%s/rtlsim_perf_batch_%d.vcd" % (report_dir, rtlsim_bs)
"extra_verilator_args", str(["-CFLAGS", "-O3"])
)
rtlsim_model.set_metadata_prop("extra_verilator_args", str(["-CFLAGS", "-O3"]))
rtlsim_perf_dict = throughput_test_rtlsim(rtlsim_model, rtlsim_bs)
rtlsim_latency = rtlsim_perf_dict["cycles"]
rtlsim_perf_dict["latency_cycles"] = rtlsim_latency
rtlsim_perf_dict = throughput_test_rtlsim(rtlsim_model, rtlsim_bs)
rtlsim_latency = rtlsim_perf_dict["cycles"]
rtlsim_perf_dict["latency_cycles"] = rtlsim_latency
else:
rtlsim_perf_dict = verilator_fifosim(model, rtlsim_bs)
# keep keys consistent between the Python and C++-styles
cycles = rtlsim_perf_dict["cycles"]
clk_ns = float(model.get_metadata_prop("clk_ns"))
fclk_mhz = 1 / (clk_ns * 0.001)
runtime_s = (cycles * clk_ns) * (10**-9)
rtlsim_perf_dict["runtime[ms]"] = runtime_s * 1000
rtlsim_perf_dict["throughput[images/s]"] = rtlsim_bs / runtime_s
rtlsim_perf_dict["fclk[mhz]"] = fclk_mhz
for (key, val) in rtlsim_perf_dict.items():
if "max_count" in key:
del rtlsim_perf_dict[key]
with open(report_dir + "/rtlsim_performance.json", "w") as f:
json.dump(rtlsim_perf_dict, f, indent=2)
if cfg.verify_save_rtlsim_waveforms:
......
......@@ -43,6 +43,7 @@ from finn.util.basic import (
pyverilate_get_liveness_threshold_cycles,
)
from finn.util.hls import CallHLS
from finn.util.pyverilator import make_single_source_file
from . import templates
......@@ -174,7 +175,7 @@ class HLSCustomOp(CustomOp):
# default impl only returns the HLS verilog codegen dir
return [verilog_path]
def get_all_verilog_filenames(self):
def get_all_verilog_filenames(self, abspath=False):
"Return list of all Verilog files used for this node."
verilog_files = []
......@@ -182,7 +183,10 @@ class HLSCustomOp(CustomOp):
for verilog_path in verilog_paths:
for f in os.listdir(verilog_path):
if f.endswith(".v"):
verilog_files += [f]
if abspath:
verilog_files += [verilog_path + "/" + f]
else:
verilog_files += [f]
return verilog_files
def prepare_rtlsim(self):
......@@ -192,13 +196,18 @@ class HLSCustomOp(CustomOp):
if PyVerilator is None:
raise ImportError("Installation of PyVerilator is required.")
verilog_paths = self.get_all_verilog_paths()
verilog_files = self.get_all_verilog_filenames()
verilog_files = self.get_all_verilog_filenames(abspath=True)
single_src_dir = make_build_dir("rtlsim_" + self.onnx_node.name + "_")
tmp_build_dir = make_build_dir("pyverilator_" + self.onnx_node.name + "_")
target_file = single_src_dir + "/" + self.get_verilog_top_module_name() + ".v"
make_single_source_file(verilog_files, target_file)
# build the Verilator emu library
sim = PyVerilator.build(
verilog_files,
build_dir=make_build_dir("pyverilator_" + self.onnx_node.name + "_"),
verilog_path=verilog_paths,
self.get_verilog_top_module_name() + ".v",
build_dir=tmp_build_dir,
verilog_path=[single_src_dir],
trace_depth=get_rtlsim_trace_depth(),
top_module_name=self.get_verilog_top_module_name(),
)
......
/* Copyright (C) 2022, Advanced Micro Devices, Inc.
All rights reserved.
#
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
#
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
#
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
#
* Neither the name of FINN nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
#
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
#include <iostream>
#include <fstream>
#include <cstddef>
#include <chrono>
#include "verilated.h"
#include "verilated_vcd_c.h"
#include "Vfinn_design_wrapper.h"
#ifdef DEBUG
#define TRACE(x) x
#else
#define TRACE(x) ;
#endif
using namespace std;
Vfinn_design_wrapper * top;
// code taken from pyverilator_wrapper.cpp generated by PyVerilator
// this is required by verilator for verilog designs using $time
// main_time is incremented in eval
double main_time = 0;
double sc_time_stamp() {
return main_time;
}
// function definitions
// helper functions for basic verilator tasks
extern "C" { //Open an extern C closed below
Vfinn_design_wrapper* construct() {
Verilated::commandArgs(0, (const char**) nullptr);
TRACE(Verilated::traceEverOn(true));
Vfinn_design_wrapper* top = new Vfinn_design_wrapper();
return top;
}
int eval(Vfinn_design_wrapper* top) {
top->eval();
main_time++;
return 0;
}
int destruct(Vfinn_design_wrapper* top) {
if (top != nullptr) {
delete top;
top = nullptr;
}
return 0;
}
TRACE(
VerilatedVcdC* tfp;
VerilatedVcdC* start_vcd_trace(Vfinn_design_wrapper* top, const char* filename) {
VerilatedVcdC* tfp = new VerilatedVcdC;
top->trace(tfp, 99);
tfp->open(filename);
return tfp;
}
int add_to_vcd_trace(VerilatedVcdC* tfp, int time) {
tfp->dump(time);
return 0;
}
int flush_vcd_trace(VerilatedVcdC* tfp) {
tfp->flush();
return 0;
}
int stop_vcd_trace(VerilatedVcdC* tfp) {
tfp->close();
return 0;
}
)
}
// end of code taken from pyverilator_wrapper.cpp generated by PyVerilator
inline void toggle_clk() {
eval(top);
top->ap_clk = 1;
TRACE(add_to_vcd_trace(tfp, main_time));
eval(top);
top->ap_clk = 0;
TRACE(add_to_vcd_trace(tfp, main_time));
}
void reset() {
top->ap_rst_n = 0;
for(unsigned i = 0; i < 10; i++) {
toggle_clk();
}
top->ap_rst_n = 1;
}
int main(int argc, char *argv[]) {
top = construct();
TRACE(tfp = start_vcd_trace(top, "trace.vcd"));
unsigned n_iters_per_input = @ITERS_PER_INPUT@;
unsigned n_iters_per_output = @ITERS_PER_OUTPUT@;
unsigned n_inputs = @N_INPUTS@;
unsigned max_iters = @MAX_ITERS@;
reset();
top->m_axis_0_tready = 1;
top->s_axis_0_tvalid = 1;
unsigned n_in_txns = 0, n_out_txns = 0, iters = 0, last_output_at = 0;
unsigned latency = 0;
bool exit_criterion = false;
cout << "Simulation starting" << endl;
cout << "Number of inputs to write " << n_iters_per_input * n_inputs << endl;
cout << "Number of outputs to expect " << n_iters_per_output * n_inputs << endl;
cout << "No-output timeout clock cycles " << max_iters << endl;
chrono::steady_clock::time_point begin = chrono::steady_clock::now();
while(!exit_criterion) {
toggle_clk();
iters++;
if(iters % 1000 == 0) {
cout << "Elapsed iters " << iters << " inps " << n_in_txns << " outs " << n_out_txns << endl;
chrono::steady_clock::time_point end = chrono::steady_clock::now();
cout << "Elapsed since last report = " << chrono::duration_cast<chrono::seconds>(end - begin).count() << "[s]" << endl;
begin = end;
}
if(top->s_axis_0_tready == 1 && top->s_axis_0_tvalid == 1) {
n_in_txns++;
if(n_in_txns == n_iters_per_input * n_inputs) {
top->s_axis_0_tvalid = 0;
cout << "All inputs written at cycle " << iters << endl;
}
}
if(top->m_axis_0_tvalid == 1) {
n_out_txns++;
last_output_at = iters;
if(n_out_txns == n_iters_per_output) {
latency = iters;
}
}
exit_criterion = ((n_in_txns >= n_iters_per_input * n_inputs) && (n_out_txns >= n_iters_per_output * n_inputs)) || ((iters-last_output_at) > max_iters);
}
TRACE(flush_vcd_trace(tfp));
TRACE(stop_vcd_trace(tfp));
cout << "Simulation finished" << endl;
cout << "Number of inputs consumed " << n_in_txns << endl;
cout << "Number of outputs produced " << n_out_txns << endl;
cout << "Number of clock cycles " << iters << endl;
ofstream results_file;
results_file.open("results.txt", ios::out | ios::trunc);
results_file << "N_IN_TXNS" << "\t" << n_in_txns << endl;
results_file << "N_OUT_TXNS" << "\t" << n_out_txns << endl;
results_file << "cycles" << "\t" << iters << endl;
results_file << "N" << "\t" << n_inputs << endl;
results_file << "latency_cycles" << "\t" << latency << endl;
@FIFO_DEPTH_LOGGING@
results_file.close();
destruct(top);
return 0;
}
......@@ -42,7 +42,7 @@ from finn.transformation.fpgadataflow.insert_dwc import InsertDWC
from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
from finn.util.fpgadataflow import is_fpgadataflow_node
from finn.util.pyverilator import pyverilate_stitched_ip
from finn.util.pyverilator import pyverilate_stitched_ip, verilator_fifosim
def reset_implementation(node):
......@@ -72,8 +72,9 @@ def optimize_depth(depth):
# Q_srl FIFOs do not benefit from size < 32
# add some slack
return 32
# round to nearest power of two for Vivado IP FIFO implementation
return int(2 ** math.ceil(math.log2(depth)))
# otherwise leave as is
# will be rounded to nearest power of two for Vivado-style FIFO
return int(depth)
class RemoveShallowFIFOs(Transformation):
......@@ -235,6 +236,7 @@ class InsertAndSetFIFODepths(Transformation):
max_depth=None,
swg_exception=True,
vivado_ram_style="auto",
force_python_sim=False,
):
super().__init__()
self.fpgapart = fpgapart
......@@ -243,6 +245,7 @@ class InsertAndSetFIFODepths(Transformation):
self.max_depth = max_depth
self.swg_exception = swg_exception
self.vivado_ram_style = vivado_ram_style
self.force_python_sim = force_python_sim
def apply(self, model):
# these optypes may potentially use external weights
......@@ -314,57 +317,75 @@ class InsertAndSetFIFODepths(Transformation):
model = model.transform(CreateStitchedIP(self.fpgapart, self.clk_ns))
model.set_metadata_prop("exec_mode", "rtlsim")
# calculate input frequency (number of cycles for each input word)
first_node = getCustomOp(model.graph.node[0])
ncycles_per_input = max(
1,
int(
math.ceil(
perf["max_cycles"]
/ (
np.prod(first_node.get_folded_input_shape())
/ first_node.get_folded_input_shape()[-1]
if self.force_python_sim:
# do rtlsim in Python for FIFO sizing
# calculate input frequency (number of cycles for each input word)
first_node = getCustomOp(model.graph.node[0])
ncycles_per_input = max(
1,
int(
math.ceil(
perf["max_cycles"]
/ (
np.prod(first_node.get_folded_input_shape())
/ first_node.get_folded_input_shape()[-1]
)
)
)
),
)
),
)
# set sufficiently large threshold for 1 image to fully execute and exit
ncycles = int(latency + max_cycles)
# set sufficiently large threshold for 1 image to fully execute and exit
ncycles = int(latency + max_cycles)
# prepare pyverilator model
sim = pyverilate_stitched_ip(model)
# prepare pyverilator model
sim = pyverilate_stitched_ip(model)
reset_rtlsim(sim)
toggle_clk(sim)
reset_rtlsim(sim)
toggle_clk(sim)
# set all input valids to 0 and output readies to 1
# set input data to some constant
set_signal(sim, "tvalid", 0)
set_signal(sim, "tready", 1)
set_signal(sim, "tdata", 0)
# set all input valids to 0 and output readies to 1
# set input data to some constant
set_signal(sim, "tvalid", 0)
set_signal(sim, "tready", 1)
set_signal(sim, "tdata", 0)
output_detected = False
while ncycles > 0:
toggle_clk(sim)
# set/unset valids
if ncycles % ncycles_per_input == 0:
set_signal(sim, "tvalid", 1)
else:
set_signal(sim, "tvalid", 0)
output_detected = False
while ncycles > 0:
toggle_clk(sim)
# set/unset valids
if ncycles % ncycles_per_input == 0:
set_signal(sim, "tvalid", 1)
else:
set_signal(sim, "tvalid", 0)
# since latency estimation is very pessimistic, detect first output
# and fast-forward the sim
if get_signal(sim, "tvalid") != 0 and not output_detected:
ncycles = max_cycles
output_detected = True
else:
ncycles = ncycles - 1
# since latency estimation is very pessimistic, detect first output
# and fast-forward the sim
if get_signal(sim, "tvalid") != 0 and not output_detected:
ncycles = max_cycles
output_detected = True
if not output_detected:
warnings.warn(
"No output detected, calculated FIFO depths may not be correct"
)
else:
# do rtlsim in C++ for FIFO sizing
# determine # inputs for FIFO sizing according to topology type
swg_nodes = [
x for x in model.graph.node if "ConvolutionInputGenerator" in x.op_type
]
if len(swg_nodes) == 0:
# MLP, no layer overlap
# assuming half the nodes are now FIFOs, use half the # of
# nodes as # inputs to drive the imulation
n_inputs = int(len(model.graph.node) / 2)
else:
ncycles = ncycles - 1
if not output_detected:
warnings.warn(
"No output detected, calculated FIFO depths may not be correct"
)
# convnet, single input is typically enough to fill entire
# layer pipeline due to overlaps
n_inputs = 1
sim = verilator_fifosim(model, n_inputs)
for ind, node in enumerate(fifo_nodes):
maxcount_name = "maxcount_%d" % ind
......
......@@ -28,33 +28,41 @@
import pkg_resources as pk
import numpy as np
import os
import shutil
from pyverilator import PyVerilator
from qonnx.custom_op.registry import getCustomOp
from finn.util.basic import get_rtlsim_trace_depth, make_build_dir
from finn.util.basic import (
get_rtlsim_trace_depth,
launch_process_helper,
make_build_dir,
)
def pyverilate_stitched_ip(
model,
read_internal_signals=True,
disable_common_warnings=True,
extra_verilator_args=[],
):
"""Given a model with stitched IP, return a PyVerilator sim object.
Trace depth is also controllable, see get_rtlsim_trace_depth()
def make_single_source_file(filtered_verilog_files, target_file):
"""Dump all Verilog code used by stitched IP into a single file.
This is because large models with many files require a verilator
command line too long for bash on most systems"""
:param read_internal_signals If set, it will be possible to examine the
internal (not only port) signals of the Verilog module, but this may
slow down compilation and emulation.
# concatenate all verilog code into a single file
with open(target_file, "w") as wf:
for vfile in filtered_verilog_files:
with open(vfile) as rf:
wf.write("//Added from " + vfile + "\n\n")
lines = rf.read()
for line in lines.split("\n"):
# break down too-long lines, Verilator complains otherwise
if len(line) > 20000:
line = line.replace("&", "\n&")
wf.write("\n" + line)
:param disable_common_warnings If set, disable the set of warnings that
Vivado-HLS-generated Verilog typically triggers in Verilator
(which can be very verbose otherwise)
"""
if PyVerilator is None:
raise ImportError("Installation of PyVerilator is required.")
def prepare_stitched_ip_for_verilator(model):
"""Prepare sources from given stitched IP for verilator simulation, including
generating a single source file and replacing certain Vivado infrastructure
headers with Verilator-compatible ones"""
vivado_stitch_proj_dir = model.get_metadata_prop("vivado_stitch_proj")
with open(vivado_stitch_proj_dir + "/all_verilog_srcs.txt", "r") as f:
......@@ -67,8 +75,6 @@ def pyverilate_stitched_ip(
return os.path.basename(os.path.realpath(x))
top_module_file_name = file_to_basename(model.get_metadata_prop("wrapper_filename"))
top_module_name = top_module_file_name.strip(".v")
build_dir = make_build_dir("pyverilator_ipstitched_")
# dump all Verilog code to a single file
# this is because large models with many files require
......@@ -79,7 +85,7 @@ def pyverilate_stitched_ip(
# remove duplicates from list by doing list -> set -> list
src_exts = [".v", ".sv"]
all_verilog_src_files = list(
all_verilog_files = list(
set(
filter(
lambda x: any(map(lambda y: x.endswith(y), src_exts)), all_verilog_srcs
......@@ -87,7 +93,9 @@ def pyverilate_stitched_ip(
)
)
verilog_header_dir = make_build_dir("pyverilator_vh_")
verilog_header_dir = vivado_stitch_proj_dir + "/pyverilator_vh"
os.makedirs(verilog_header_dir, exist_ok=True)
# use custom version of axis infrastructure vh
# to enable Verilator to simulate AMD/Xilinx components (e.g DWC)
custom_vh = pk.resource_filename(
......@@ -105,7 +113,7 @@ def pyverilate_stitched_ip(
# remove all but one instances of regslice_core.v
filtered_verilog_files = []
remove_entry = False
for vfile in all_verilog_src_files:
for vfile in all_verilog_files:
if "regslice_core" in vfile:
if not remove_entry:
filtered_verilog_files.append(vfile)
......@@ -113,17 +121,159 @@ def pyverilate_stitched_ip(
else:
filtered_verilog_files.append(vfile)
# concatenate all verilog code into a single file
with open(vivado_stitch_proj_dir + "/" + top_module_file_name, "w") as wf:
for vfile in filtered_verilog_files:
with open(vfile) as rf:
wf.write("//Added from " + vfile + "\n\n")
lines = rf.read()
for line in lines.split("\n"):
# break down too-long lines, Verilator complains otherwise
if len(line) > 20000:
line = line.replace("&", "\n&")
wf.write("\n" + line)
target_file = vivado_stitch_proj_dir + "/" + top_module_file_name
make_single_source_file(filtered_verilog_files, target_file)
return vivado_stitch_proj_dir
def verilator_fifosim(model, n_inputs, max_iters=100000000):
"""Create a Verilator model of stitched IP and use a simple C++
driver to drive the input stream. Useful for FIFO sizing, latency
and throughput measurement."""
vivado_stitch_proj_dir = prepare_stitched_ip_for_verilator(model)
build_dir = make_build_dir("verilator_fifosim_")
fifosim_cpp_fname = pk.resource_filename(
"finn.qnn-data", "cpp/verilator_fifosim.cpp"
)
with open(fifosim_cpp_fname, "r") as f:
fifosim_cpp_template = f.read()
assert len(model.graph.input) == 1, "Only a single input stream is supported"
assert len(model.graph.output) == 1, "Only a single output stream is supported"
iname = model.graph.input[0].name
first_node = model.find_consumer(iname)
oname = model.graph.output[0].name
last_node = model.find_producer(oname)
assert (first_node is not None) and (
last_node is not None
), "Failed to find first/last nodes"
fnode_inst = getCustomOp(first_node)
lnode_inst = getCustomOp(last_node)
ishape_folded = fnode_inst.get_folded_input_shape()
oshape_folded = lnode_inst.get_folded_output_shape()
fifo_log = []
fifo_log_templ = ' results_file << "maxcount%s" << "\\t" '
fifo_log_templ += "<< to_string(top->maxcount%s) << endl;"
fifo_nodes = model.get_nodes_by_op_type("StreamingFIFO")
fifo_ind = 0
for fifo_node in fifo_nodes:
fifo_node = getCustomOp(fifo_node)
if fifo_node.get_nodeattr("depth_monitor") == 1:
suffix = "" if fifo_ind == 0 else "_%d" % fifo_ind
fifo_log.append(fifo_log_templ % (suffix, suffix))
fifo_ind += 1
fifo_log = "\n".join(fifo_log)
template_dict = {
"ITERS_PER_INPUT": np.prod(ishape_folded[:-1]),
"ITERS_PER_OUTPUT": np.prod(oshape_folded[:-1]),
"N_INPUTS": n_inputs,
"MAX_ITERS": max_iters,
"FIFO_DEPTH_LOGGING": fifo_log,
}
for (key, val) in template_dict.items():
fifosim_cpp_template = fifosim_cpp_template.replace(f"@{key}@", str(val))
with open(build_dir + "/verilator_fifosim.cpp", "w") as f:
f.write(fifosim_cpp_template)
which_verilator = shutil.which("verilator")
if which_verilator is None:
raise Exception("'verilator' executable not found")
verilator_args = [
"perl",
which_verilator,
"-Wno-fatal",
"-Mdir",
build_dir,
"-y",
vivado_stitch_proj_dir,
"--CFLAGS",
"--std=c++11",
"-O3",
"--x-assign",
"fast",
"--x-initial",
"fast",
"--noassert",
"--cc",
"finn_design_wrapper.v",
"--top-module",
"finn_design_wrapper",
"--exe",
"verilator_fifosim.cpp",
"--threads",
"4",
]
proc_env = os.environ.copy()
gcc_args = "-O3 -march=native"
proc_env["OPT_FAST"] = gcc_args
make_args = [
"make",
"-j4",
"-C",
build_dir,
"-f",
"Vfinn_design_wrapper.mk",
"Vfinn_design_wrapper",
]
with open(build_dir + "/compile.sh", "w") as f:
f.write("#!/bin/bash" + "\n")
f.write("export OPT_FAST='%s'\n" % gcc_args)
f.write(" ".join(verilator_args) + "\n")
f.write(" ".join(make_args) + "\n")
launch_process_helper(verilator_args, cwd=build_dir)
launch_process_helper(make_args, proc_env=proc_env, cwd=build_dir)
sim_launch_args = ["./Vfinn_design_wrapper"]
launch_process_helper(sim_launch_args, cwd=build_dir)
with open(build_dir + "/results.txt", "r") as f:
results = f.read().strip().split("\n")
ret_dict = {}
for result_line in results:
key, val = result_line.split("\t")
ret_dict[key] = int(val)
return ret_dict
def pyverilate_stitched_ip(
model,
read_internal_signals=True,
disable_common_warnings=True,
extra_verilator_args=[],
):
"""Given a model with stitched IP, return a PyVerilator sim object.
Trace depth is also controllable, see get_rtlsim_trace_depth()
:param read_internal_signals If set, it will be possible to examine the
internal (not only port) signals of the Verilog module, but this may
slow down compilation and emulation.
:param disable_common_warnings If set, disable the set of warnings that
Vivado-HLS-generated Verilog typically triggers in Verilator
(which can be very verbose otherwise)
"""
if PyVerilator is None:
raise ImportError("Installation of PyVerilator is required.")
vivado_stitch_proj_dir = prepare_stitched_ip_for_verilator(model)
verilog_header_dir = vivado_stitch_proj_dir + "/pyverilator_vh"
def file_to_basename(x):
return os.path.basename(os.path.realpath(x))
top_module_file_name = file_to_basename(model.get_metadata_prop("wrapper_filename"))
top_module_name = top_module_file_name.strip(".v")
build_dir = make_build_dir("pyverilator_ipstitched_")
verilator_args = []
# disable common verilator warnings that should be harmless but commonly occur
......
......@@ -49,14 +49,19 @@ def fetch_test_model(topology, wbits=2, abits=2):
@pytest.mark.slow
@pytest.mark.vivado
@pytest.mark.fpgadataflow
def test_fifosizing_linear():
@pytest.mark.parametrize(
"method", ["largefifo_rtlsim_python", "largefifo_rtlsim_cpp", "characterize"]
)
def test_fifosizing_linear(method):
force_python_rtlsim = "python" in method
method_key = "largefifo_rtlsim" if "largefifo_rtlsim" in method else "characterize"
tmp_output_dir = fetch_test_model("tfc")
cfg = build_cfg.DataflowBuildConfig(
output_dir=tmp_output_dir,
auto_fifo_depths=True,
auto_fifo_strategy="characterize",
auto_fifo_strategy=method_key,
target_fps=10000,
force_python_rtlsim=force_python_rtlsim,
synth_clk_period_ns=10.0,
board="Pynq-Z1",
rtlsim_batch_size=100,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment