Skip to content
Snippets Groups Projects
Commit 5d1b1740 authored by Lucian Petrica's avatar Lucian Petrica
Browse files

Merge remote-tracking branch 'origin/dev' into optimize_streamer

parents f78176a6 dda60a00
No related branches found
No related tags found
No related merge requests found
Showing
with 163 additions and 18 deletions
......@@ -81,3 +81,6 @@ MANIFEST
# SSH key dir mounted into Docker
/ssh_keys/
# PYNQ board files
/board_files/
......@@ -37,7 +37,7 @@ RUN apt-get update
RUN apt-get -y upgrade
RUN apt-get install -y build-essential libglib2.0-0 libsm6 libxext6 libxrender-dev
RUN apt-get install -y verilator zsh
RUN apt-get -y install sshpass
RUN apt-get -y install sshpass wget unzip
RUN echo "StrictHostKeyChecking no" >> /etc/ssh/ssh_config
# cloning dependency repos
......
......@@ -43,7 +43,7 @@ RUN apt-get update
RUN apt-get -y upgrade
RUN apt-get install -y build-essential libglib2.0-0 libsm6 libxext6 libxrender-dev
RUN apt-get install -y verilator nano zsh rsync
RUN apt-get -y install sshpass
RUN apt-get -y install sshpass wget unzip
RUN echo "StrictHostKeyChecking no" >> /etc/ssh/ssh_config
COPY requirements.txt .
......
......@@ -57,4 +57,19 @@ if [ ! -z "$VITIS_PATH" ];then
export XILINX_VITIS=$VITIS_PATH
source $VITIS_PATH/settings64.sh
fi
# download PYNQ board files if not already there
if [ ! -d "/workspace/finn/board_files" ]; then
gecho "Downloading PYNQ board files for Vivado"
wget -q https://github.com/cathalmccabe/pynq-z1_board_files/raw/master/pynq-z1.zip
wget -q https://d2m32eurp10079.cloudfront.net/Download/pynq-z2.zip
unzip -q pynq-z1.zip
unzip -q pynq-z2.zip
mkdir /workspace/finn/board_files
mv pynq-z1/ board_files/
mv pynq-z2/ board_files/
rm pynq-z1.zip
rm pynq-z2.zip
fi
exec "$@"
......@@ -574,7 +574,7 @@
"target_dir = os.getenv(\"PYNQ_TARGET_DIR\", \"/home/xilinx/finn\")\n",
"\n",
"model = ModelWrapper(build_dir + \"/end2end_cnv_w1a1_synth.onnx\")\n",
"model = model.transform(MakePYNQDriver())\n",
"model = model.transform(MakePYNQDriver(platform="zynq"))\n",
"model = model.transform(DeployToPYNQ(ip, port, username, password, target_dir))\n",
"model.save(build_dir + \"/end2end_cnv_w1a1_pynq_deploy.onnx\")"
]
......
......@@ -730,7 +730,7 @@
" 'ip_path': ('s', False, ''),\n",
" 'ip_vlnv': ('s', False, ''),\n",
" 'exec_mode': ('s', False, ''),\n",
" 'sim_cycles': ('i', False, 0),\n",
" 'cycles_rtlsim': ('i', False, 0),\n",
" 'rtlsim_trace': ('s', False, ''),\n",
" 'res_estimate': ('s', False, ''),\n",
" 'res_hls': ('s', False, ''),\n",
......@@ -1422,7 +1422,7 @@
"source": [
"from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver\n",
"model = ModelWrapper(build_dir + \"/tfc_w1_a1_post_synthesis.onnx\")\n",
"model = model.transform(MakePYNQDriver())"
"model = model.transform(MakePYNQDriver(platform="zynq"))"
]
},
{
......
# Copyright (c) 2020, Xilinx
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of FINN nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import finn.custom_op.registry as registry
from finn.util.fpgadataflow import is_fpgadataflow_node
def exp_cycles_per_layer(model):
"""Estimates the number of cycles per sample for dataflow layers in the given model.
Ensure that all nodes have unique names (by calling the GiveUniqueNodeNames
transformation) prior to calling this analysis pass to ensure all nodes are
visible in the results.
Returns {node name : cycle estimation}."""
cycle_dict = {}
for node in model.graph.node:
if is_fpgadataflow_node(node) is True:
op_type = node.op_type
inst = registry.custom_op[op_type](node)
cycle_dict[node.name] = inst.get_exp_cycles()
return cycle_dict
......@@ -35,6 +35,9 @@ from finn.util.fpgadataflow import is_fpgadataflow_node
def hls_synth_res_estimation(model):
"""Extracts the FPGA resource results from the Vivado HLS synthesis estimates.
Ensure that all nodes have unique names (by calling the GiveUniqueNodeNames
transformation) prior to calling this analysis pass to ensure all nodes are
visible in the results.
Returns {node name : resources_dict}."""
......
......@@ -30,15 +30,23 @@ import os
import xml.etree.ElementTree as ET
from finn.transformation.move_reshape import _is_fpgadataflow_node
from finn.core.modelwrapper import ModelWrapper
from finn.custom_op.registry import getCustomOp
def post_synth_res(model):
def post_synth_res(model, override_synth_report_filename=None):
"""Extracts the FPGA resource results from the Vivado synthesis.
Ensure that all nodes have unique names (by calling the GiveUniqueNodeNames
transformation) prior to calling this analysis pass to ensure all nodes are
visible in the results.
Returns {node name : resources_dict}."""
res_dict = {}
synth_report_filename = model.get_metadata_prop("vivado_synth_rpt")
if override_synth_report_filename is not None:
synth_report_filename = override_synth_report_filename
else:
synth_report_filename = model.get_metadata_prop("vivado_synth_rpt")
if os.path.isfile(synth_report_filename):
tree = ET.parse(synth_report_filename)
root = tree.getroot()
......@@ -50,7 +58,11 @@ def post_synth_res(model):
raise Exception("Please run synthesis first")
for node in model.graph.node:
if _is_fpgadataflow_node(node):
if node.op_type == "StreamingDataflowPartition":
sdp_model = ModelWrapper(getCustomOp(node).get_nodeattr("model"))
sdp_res_dict = post_synth_res(sdp_model, synth_report_filename)
res_dict.update(sdp_res_dict)
elif _is_fpgadataflow_node(node):
row = root.findall(".//*[@contents='%s']/.." % node.name)
if row != []:
node_dict = {}
......
......@@ -32,6 +32,9 @@ from finn.util.fpgadataflow import is_fpgadataflow_node
def res_estimation(model):
"""Estimates the resources needed for the given model.
Ensure that all nodes have unique names (by calling the GiveUniqueNodeNames
transformation) prior to calling this analysis pass to ensure all nodes are
visible in the results.
Returns {node name : resource estimation}."""
......
......@@ -51,8 +51,20 @@ def execute_node(node, context, graph):
if node.op_type == "StreamingDataflowPartition":
sdp_node = getCustomOp(node)
model = ModelWrapper(sdp_node.get_nodeattr("model"))
ret = execute_onnx(model, context, True)
context.update(ret)
inp_ctx = dict(filter(lambda x: x[0] in node.input, context.items()))
# input may have been renamed in partition
assert len(inp_ctx) == 1
old_iname = node.input[0]
new_iname = model.graph.input[0].name
if old_iname != new_iname:
inp_ctx[new_iname] = inp_ctx[old_iname]
del inp_ctx[old_iname]
ret = execute_onnx(model, inp_ctx, False)
# output may have been renamed in partition
assert len(ret) == 1
node_oname = node.output[0]
model_oname = model.graph.output[0].name
context[node_oname] = ret[model_oname]
else:
if node.domain == "finn":
......
......@@ -62,11 +62,15 @@ def remote_exec(model, execution_context):
bash_command = ["/bin/bash", "-c", cmd]
process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE)
process_compile.communicate()
# set platform attribute for correct remote execution
platform = model.get_metadata_prop("platform")
assert platform in ["alveo", "zynq", "zynq-iodma"]
cmd = (
"sshpass -p {} ssh {}@{} -p {} "
'"cd {}/{}; echo "{}" | '
'sudo -S python3.6 driver.py --exec_mode="execute" --batchsize=1" '
'--bitfile="resizer.bit" --inputfile="input.npy" --outputfile="output.npy"'
'--bitfile="resizer.bit" --inputfile="input.npy" --outputfile="output.npy" '
'--platform="{}" '
).format(
pynq_password,
pynq_username,
......@@ -75,6 +79,7 @@ def remote_exec(model, execution_context):
pynq_target_dir,
deployment_folder,
pynq_password,
platform,
)
bash_command = ["/bin/bash", "-c", cmd]
process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE)
......
......@@ -102,7 +102,7 @@ def rtlsim_exec(model, execution_context):
sim = PyVerilator(rtlsim_so, auto_eval=False)
ret = _run_rtlsim(sim, packed_input, num_out_values, trace_file)
packed_output = ret[0]
model.set_metadata_prop("sim_cycles", str(ret[1]))
model.set_metadata_prop("cycles_rtlsim", str(ret[1]))
# unpack output and put into context
o_folded_tensor = rtlsim_output_to_npy(
packed_output, None, o_dt, o_folded_shape, packedBits, targetBits
......@@ -171,7 +171,7 @@ def _run_rtlsim(sim, inp, num_out_values, trace_file=None, reset=True):
no_change_count = no_change_count + 1
if len(outputs) == num_out_values:
sim_cycles = observation_count
cycles_rtlsim = observation_count
output_observed = True
if no_change_count == liveness_threshold:
......@@ -191,4 +191,4 @@ def _run_rtlsim(sim, inp, num_out_values, trace_file=None, reset=True):
sim.flush_vcd_trace()
sim.stop_vcd_trace()
return (outputs, sim_cycles)
return (outputs, cycles_rtlsim)
......@@ -125,7 +125,7 @@ def throughput_test_rtlsim(model, batchsize=100):
os.environ["LIVENESS_THRESHOLD"] = "-1"
rtlsim_exec(model, ctx)
# extract metrics
cycles = int(model.get_metadata_prop("sim_cycles"))
cycles = int(model.get_metadata_prop("cycles_rtlsim"))
clk_ns = float(model.get_metadata_prop("clk_ns"))
fclk_mhz = 1 / (clk_ns * 0.001)
runtime_s = (cycles * clk_ns) * (10 ** -9)
......
......@@ -82,7 +82,8 @@ class HLSCustomOp(CustomOp):
"ip_path": ("s", False, ""),
"ip_vlnv": ("s", False, ""),
"exec_mode": ("s", False, ""),
"sim_cycles": ("i", False, 0),
"cycles_rtlsim": ("i", False, 0),
"cycles_estimate": ("i", False, 0),
"rtlsim_trace": ("s", False, ""),
"res_estimate": ("s", False, ""),
"res_hls": ("s", False, ""),
......@@ -209,6 +210,12 @@ class HLSCustomOp(CustomOp):
HLSCustomOp class but has to be filled by every node"""
return 0
def get_exp_cycles(self):
"""Function for estimation of expected cycles for set folding,
is member function of HLSCustomOp class but has to be filled
by every node"""
return 0
def code_generation_ipgen(self, model, fpgapart, clk):
"""Generates c++ code and tcl script for ip generation."""
node = self.onnx_node
......@@ -436,7 +443,7 @@ compilation transformations?
no_change_count = no_change_count + 1
if len(outputs) == num_out_values:
self.set_nodeattr("sim_cycles", observation_count)
self.set_nodeattr("cycles_rtlsim", observation_count)
output_observed = True
if no_change_count == liveness_threshold:
......@@ -465,7 +472,7 @@ compilation transformations?
trace_file = self.onnx_node.name + ".vcd"
num_out_values = self.get_number_output_values()
total_cycle_count = rtlsim_multi_io(sim, io_dict, num_out_values, trace_file)
self.set_nodeattr("sim_cycles", total_cycle_count)
self.set_nodeattr("cycles_rtlsim", total_cycle_count)
def execute_node(self, context, graph):
"""Executes single node using cppsim or rtlsim."""
......
......@@ -170,6 +170,10 @@ class AddStreams_Batch(HLSCustomOp):
def get_number_output_values(self):
return np.prod(self.get_folded_output_shape()[:-1])
def get_exp_cycles(self):
# Channels/PE * batch size * fmdim * fmdim
return np.prod(self.get_folded_output_shape()[:-1])
def execute_node(self, context, graph):
mode = self.get_nodeattr("exec_mode")
node = self.onnx_node
......
......@@ -224,6 +224,10 @@ class ChannelwiseOp_Batch(HLSCustomOp):
nf = np.prod(self.get_folded_output_shape()[:-1])
return nf
def get_exp_cycles(self):
# Channels/PE * batch size * fmdim * fmdim
return np.prod(self.get_folded_output_shape()[:-1])
def get_template_param_values(self):
"""Returns the template parameter values according to input, output and weight
data types."""
......
......@@ -177,6 +177,23 @@ class ConvolutionInputGenerator(HLSCustomOp):
num_output_elems = np.prod(folded_oshape[:-1])
return num_output_elems
def get_exp_cycles(self):
simd = self.get_nodeattr("SIMD")
ifm_ch = self.get_nodeattr("IFMChannels")
k = self.get_nodeattr("ConvKernelDim")
ifm_dim = self.get_nodeattr("IFMDim")
ofm_dim = self.get_nodeattr("OFMDim")
stride = self.get_nodeattr("Stride")
# since mmv != 1 is not supported yet, we set mmv for now to 1
mmv = 1
# see https://github.com/Xilinx/finn-hlslib/blob/master/slidingwindow.h
cycles_write_block = (ofm_dim * k * k * (ifm_ch / simd)) / mmv
cycles_read_block = stride * ifm_dim * (ifm_ch / simd)
max_cycles = max(cycles_write_block, cycles_read_block)
exp_cycles = ifm_dim * k * (ifm_ch / simd) + ofm_dim * max_cycles
return int(exp_cycles)
def execute_node(self, context, graph):
mode = self.get_nodeattr("exec_mode")
node = self.onnx_node
......
......@@ -36,6 +36,14 @@ class DownSampler(HLSCustomOp):
stride = self.get_nodeattr("Stride")
return int(np.floor((idim - 1) / stride) + 1)
def get_exp_cycles(self):
idim = self.get_nodeattr("ImgDim")
channels = self.get_nodeattr("NumChannels")
simd = self.get_nodeattr("SIMD")
batch_size = self.get_nodeattr("numInputVectors")
exp_cycles = channels / simd * batch_size * idim * idim
return int(exp_cycles)
def get_normal_input_shape(self):
idim = self.get_nodeattr("ImgDim")
num_ch = self.get_nodeattr("NumChannels")
......
......@@ -164,6 +164,10 @@ class DuplicateStreams_Batch(HLSCustomOp):
def get_number_output_values(self):
return 2 * np.prod(self.get_folded_output_shape()[1:-1])
def get_exp_cycles(self):
# Channels/PE * batch size * fmdim * fmdim
return np.prod(self.get_folded_output_shape()[:-1])
def execute_node(self, context, graph):
mode = self.get_nodeattr("exec_mode")
node = self.onnx_node
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment