From 520e5d2693ef83f8f9b4590e545ae5d897925795 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu <yamanu@xilinx.com> Date: Wed, 1 Jun 2022 19:08:27 +0200 Subject: [PATCH] [Refactor] move files over from finn-base --- src/finn/core/onnx_exec.py | 120 +++++++++ src/finn/core/remote_exec.py | 119 +++++++++ src/finn/core/rtlsim_exec.py | 156 +++++++++++ src/finn/util/basic.py | 211 +++++++++++++++ src/finn/util/data_packing.py | 455 ++++++++++++++++++++++++++++++++ src/finn/util/fpgadataflow.py | 43 +++ src/finn/util/hls.py | 74 ++++++ src/finn/util/platforms.py | 480 ++++++++++++++++++++++++++++++++++ src/finn/util/vivado.py | 83 ++++++ 9 files changed, 1741 insertions(+) create mode 100644 src/finn/core/onnx_exec.py create mode 100644 src/finn/core/remote_exec.py create mode 100644 src/finn/core/rtlsim_exec.py create mode 100644 src/finn/util/basic.py create mode 100644 src/finn/util/data_packing.py create mode 100644 src/finn/util/fpgadataflow.py create mode 100644 src/finn/util/hls.py create mode 100644 src/finn/util/platforms.py create mode 100644 src/finn/util/vivado.py diff --git a/src/finn/core/onnx_exec.py b/src/finn/core/onnx_exec.py new file mode 100644 index 000000000..d305ac9a6 --- /dev/null +++ b/src/finn/core/onnx_exec.py @@ -0,0 +1,120 @@ +# Copyright (c) 2022, Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import qonnx.analysis.topology as ta +from qonnx.core.onnx_exec import execute_onnx as execute_onnx_base + +from finn.core.remote_exec import remote_exec +from finn.core.rtlsim_exec import rtlsim_exec + + +def execute_onnx( + model, input_dict, return_full_exec_context=False, start_node=None, end_node=None +): + """Executes given ONNX ModelWrapper with given named inputs. + If return_full_exec_context is False, a dict of named outputs is returned + as indicated by the model.graph.output. + If return return_full_exec_context is True, the full set of tensors used by + the execution (including inputs, weights, activations and final outputs) + will be returned as a dict. + When start_node and end_node are set to None, the whole graph is executed. + If they are set to particular ONNX nodes, only the subgraph between (and + including) those nodes is executed. + """ + + # check if model has an execution mode set + # if None, execute model node using the QONNX-provided execute_onnx impl + # if set to "remote_pynq" execute model on PYNQ board + # if set to "rtlsim" execute model using pyverilator + model_exec_mode = model.get_metadata_prop("exec_mode") + if (model_exec_mode is None) or (model_exec_mode == ""): + return execute_onnx_base( + model, input_dict, return_full_exec_context, start_node, end_node + ) + + if not model.check_all_tensor_shapes_specified(): + raise Exception("Found unspecified tensor shapes, try infer_shapes") + ret = model.analysis(ta.nodes_topologically_sorted) + assert ( + ret["nodes_topologically_sorted"] is True + ), """Nodes must be + topologically sorted.""" + + graph = model.graph + # first, we need to make sure that every variable required by the graph has + # some buffer associated with it. this includes graph inputs (which includes + # the input data as well as the trained parameters) and the graph ValueInfo + # (intermediate tensors between layers) + # this is provided by the execution_context, which is a dict of np.ndarray + execution_context = model.make_empty_exec_context() + # fill in any inputs provided to this function + for inp_name in input_dict.keys(): + if inp_name in execution_context: + if execution_context[inp_name].shape == input_dict[inp_name].shape: + execution_context[inp_name] = input_dict[inp_name] + else: + raise Exception( + "Shape mismatch for provided input %s: found %s expected %s " + % ( + inp_name, + str(execution_context[inp_name].shape), + str(input_dict[inp_name].shape), + ) + ) + # else: + # raise Exception("Provided input not found in graph context: %s" % inp_name) + + # check if model has an execution mode set + # if None, execute model node by node using execute_node() + # if set to "remote_pynq" execute model on PYNQ board + # if set to "rtlsim" execute model using pyverilator + model_exec_mode = model.get_metadata_prop("exec_mode") + if (model_exec_mode is None) or (model_exec_mode == ""): + return execute_onnx_base() + elif model_exec_mode == "remote_pynq": + # use remote exec metadata built into model to execute on a remote PYNQ + remote_exec(model, execution_context) + elif model_exec_mode == "rtlsim": + # use stitched IP for rtlsim + rtlsim_exec(model, execution_context) + else: + raise Exception( + """Metadata property "exec_mode" is set to an unknown value. + Can be left unset or has to be set to "remote_pynq" for remote execution + on PYNQ board or "rtlsim" for execution using pyverilator!""" + ) + + if return_full_exec_context: + return execution_context + else: + # provide outputs as dict + output_dict = dict() + for out_tensor in graph.output: + out_name = out_tensor.name + output_dict[out_name] = execution_context[out_name] + return output_dict diff --git a/src/finn/core/remote_exec.py b/src/finn/core/remote_exec.py new file mode 100644 index 000000000..f487b48f8 --- /dev/null +++ b/src/finn/core/remote_exec.py @@ -0,0 +1,119 @@ +# Copyright (c) 2020 Xilinx, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of Xilinx nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import numpy as np +import os +import subprocess +import warnings + + +def remote_exec(model, execution_context): + """Executes the given model remotely on the pynq board. The metadata properties + related to the pynq board have to be set. The execution context contains the + input values.""" + # TODO fix for multi input-output + pynq_ip = model.get_metadata_prop("pynq_ip") + pynq_port = int(model.get_metadata_prop("pynq_port")) + pynq_username = model.get_metadata_prop("pynq_username") + pynq_password = model.get_metadata_prop("pynq_password") + pynq_target_dir = model.get_metadata_prop("pynq_target_dir") + deployment_dir = model.get_metadata_prop("pynq_deploy_dir") + platform = model.get_metadata_prop("platform") + assert platform in ["alveo", "zynq-iodma"] + bitfile = model.get_metadata_prop("bitfile") + bitfile = os.path.basename(bitfile) + if pynq_password == "": + if "zynq" in platform: + raise Exception("PYNQ board remote exec needs password for sudo") + else: + local_prefix = "" # assume we are using an ssh key + warnings.warn("Empty password, make sure you've set up an ssh key") + else: + local_prefix = "sshpass -p %s " % pynq_password + + if platform == "alveo": + # Alveo can run without sudo + remote_prefix = "" + elif "zynq" in platform: + # PYNQ Zynq boards need to execute with sudo + remote_prefix = "echo %s | sudo -S " % pynq_password + + inp = execution_context[model.graph.input[0].name] + # make copy of array before saving it + inp = inp.copy() + batchsize = inp.shape[0] + np.save(os.path.join(deployment_dir, "input.npy"), inp) + # extracting last folder of absolute path (deployment_dir) + deployment_folder = os.path.basename(os.path.normpath(deployment_dir)) + # copy input to PYNQ board + cmd = local_prefix + "scp -P{} -r {}/input.npy {}@{}:{}/{}".format( + pynq_port, + deployment_dir, + pynq_username, + pynq_ip, + pynq_target_dir, + deployment_folder, + ) + bash_command = ["/bin/bash", "-c", cmd] + process_scp_in = subprocess.Popen(bash_command, stdout=subprocess.PIPE) + process_scp_in.communicate() + + # use platform attribute for correct remote execution + if platform == "alveo": + remote_cmd = "bash -ic 'bash alveo_run.sh execute %d' \"" % batchsize + else: + remote_cmd = ( + "python3.6 driver.py --exec_mode=execute --batchsize={} " + "--bitfile={} --inputfile=input.npy --outputfile=output.npy " + '--platform={} "' + ).format(batchsize, bitfile, platform) + cmd = ( + local_prefix + 'ssh {}@{} -p {} "cd {}/{}; ' + remote_prefix + remote_cmd + ).format(pynq_username, pynq_ip, pynq_port, pynq_target_dir, deployment_folder) + bash_command = ["/bin/bash", "-c", cmd] + process_exec_accel = subprocess.Popen(bash_command, stdout=subprocess.PIPE) + process_exec_accel.communicate() + # remove stale output file from local dir, if any + try: + os.remove("{}/output.npy".format(deployment_dir)) + except FileNotFoundError: + pass + # copy generated output to local + cmd = local_prefix + "scp -P{} {}@{}:{}/{}/output.npy {}".format( + pynq_port, + pynq_username, + pynq_ip, + pynq_target_dir, + deployment_folder, + deployment_dir, + ) + bash_command = ["/bin/bash", "-c", cmd] + process_scp_out = subprocess.Popen(bash_command, stdout=subprocess.PIPE) + process_scp_out.communicate() + outp = np.load("{}/output.npy".format(deployment_dir)) + execution_context[model.graph.output[0].name] = outp diff --git a/src/finn/core/rtlsim_exec.py b/src/finn/core/rtlsim_exec.py new file mode 100644 index 000000000..231a5ea7c --- /dev/null +++ b/src/finn/core/rtlsim_exec.py @@ -0,0 +1,156 @@ +# Copyright (c) 2020 Xilinx, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of Xilinx nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os +from pyverilator.util.axi_utils import ( + pyverilate_stitched_ip, + reset_rtlsim, + rtlsim_multi_io, +) +from qonnx.custom_op.registry import getCustomOp + +from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy + +try: + from pyverilator import PyVerilator +except ModuleNotFoundError: + PyVerilator = None + + +def rtlsim_exec(model, execution_context, pre_hook=None, post_hook=None): + """Use PyVerilator to execute given model with stitched IP. The execution + context contains the input values. Hook functions can be optionally + specified to observe/alter the state of the circuit, receiving the + PyVerilator sim object as their first argument: + - pre_hook : hook function to be called before sim start (after reset) + - post_hook : hook function to be called after sim end + """ + if PyVerilator is None: + raise ImportError("Installation of PyVerilator is required.") + # ensure stitched ip project already exists + assert os.path.isfile( + model.get_metadata_prop("wrapper_filename") + ), """The + file name from metadata property "wrapper_filename" doesn't exist.""" + assert os.path.isdir( + model.get_metadata_prop("vivado_stitch_proj") + ), """The + directory from metadata property "vivado_stitch_proj" doesn't exist""" + trace_file = model.get_metadata_prop("rtlsim_trace") + if trace_file is None: + trace_file = "" + extra_verilator_args = model.get_metadata_prop("extra_verilator_args") + if extra_verilator_args is None: + extra_verilator_args = [] + else: + extra_verilator_args = eval(extra_verilator_args) + + # extract i/o info to prepare io_dict + io_dict = {"inputs": {}, "outputs": {}} + if_dict = eval(model.get_metadata_prop("vivado_stitch_ifnames")) + # go over and prepare inputs + for i, i_vi in enumerate(model.graph.input): + i_name = i_vi.name + i_tensor = execution_context[i_name] + i_dt = model.get_tensor_datatype(i_name) + first_node_onnx = model.find_consumer(i_name) + first_node = getCustomOp(first_node_onnx) + node_inp_ind = list(first_node_onnx.input).index(i_name) + if node_inp_ind == 0: + # default node input (input 0) + i_stream_w = first_node.get_instream_width() + i_folded_shape = first_node.get_folded_input_shape() + else: + # not input 0; node must support specifying inp index + # for these functions + i_stream_w = first_node.get_instream_width(node_inp_ind) + i_folded_shape = first_node.get_folded_input_shape(node_inp_ind) + batchsize = i_tensor.shape[0] + # override batch size for input + i_folded_shape = list(i_folded_shape) + i_folded_shape[0] = batchsize + i_folded_shape = tuple(i_folded_shape) + # TODO any other layout transformations need to happen here! + i_tensor = i_tensor.reshape(i_folded_shape) + # pack input for rtlsim + packed_input = npy_to_rtlsim_input(i_tensor, i_dt, i_stream_w) + # add to io_dict + if_name = if_dict["s_axis"][i][0] + io_dict["inputs"][if_name] = packed_input + # go over outputs to determine how many values will be produced + num_out_values = 0 + o_tensor_info = [] + for o, o_vi in enumerate(model.graph.output): + # output in io_dict just needs an empty list + if_name = if_dict["m_axis"][o][0] + io_dict["outputs"][if_name] = [] + # extract output shape + o_name = o_vi.name + o_shape = model.get_tensor_shape(o_name) + o_dt = model.get_tensor_datatype(o_name) + last_node = getCustomOp(model.find_producer(o_name)) + o_folded_shape = last_node.get_folded_output_shape() + # override batch size from actual input + o_shape = list(o_shape) + o_shape[0] = batchsize + o_shape = tuple(o_shape) + o_folded_shape = list(o_folded_shape) + o_folded_shape[0] = batchsize + o_folded_shape = tuple(o_folded_shape) + o_stream_w = last_node.get_outstream_width() + o_tensor_info.append((o_stream_w, o_dt, o_folded_shape, o_shape)) + num_out_values += batchsize * last_node.get_number_output_values() + + # prepare pyverilator model + rtlsim_so = model.get_metadata_prop("rtlsim_so") + if (rtlsim_so is None) or (not os.path.isfile(rtlsim_so)): + sim = pyverilate_stitched_ip(model, extra_verilator_args=extra_verilator_args) + model.set_metadata_prop("rtlsim_so", sim.lib._name) + else: + sim = PyVerilator(rtlsim_so, auto_eval=False) + + # reset and call rtlsim, including any pre/post hooks + reset_rtlsim(sim) + if pre_hook is not None: + pre_hook(sim) + n_cycles = rtlsim_multi_io(sim, io_dict, num_out_values, trace_file, sname="_") + if post_hook is not None: + post_hook(sim) + + # unpack outputs and put back into execution context + for o, o_vi in enumerate(model.graph.output): + o_name = o_vi.name + if_name = if_dict["m_axis"][o][0] + o_stream_w, o_dt, o_folded_shape, o_shape = o_tensor_info[o] + packed_output = io_dict["outputs"][if_name] + o_folded_tensor = rtlsim_output_to_npy( + packed_output, None, o_dt, o_folded_shape, o_stream_w, o_dt.bitwidth() + ) + execution_context[o_name] = o_folded_tensor.reshape(o_shape) + + model.set_metadata_prop("cycles_rtlsim", str(n_cycles)) diff --git a/src/finn/util/basic.py b/src/finn/util/basic.py new file mode 100644 index 000000000..2cf806fe1 --- /dev/null +++ b/src/finn/util/basic.py @@ -0,0 +1,211 @@ +# Copyright (c) 2020 Xilinx, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of Xilinx nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os +import subprocess +import sys +import tempfile + +# mapping from PYNQ board names to FPGA part names +pynq_part_map = dict() +pynq_part_map["Ultra96"] = "xczu3eg-sbva484-1-e" +pynq_part_map["Pynq-Z1"] = "xc7z020clg400-1" +pynq_part_map["Pynq-Z2"] = "xc7z020clg400-1" +pynq_part_map["ZCU102"] = "xczu9eg-ffvb1156-2-e" +pynq_part_map["ZCU104"] = "xczu7ev-ffvc1156-2-e" +pynq_part_map["ZCU111"] = "xczu28dr-ffvg1517-2-e" +pynq_part_map["RFSoC2x2"] = "xczu28dr-ffvg1517-2-e" + +# native AXI HP port width (in bits) for PYNQ boards +pynq_native_port_width = dict() +pynq_native_port_width["Pynq-Z1"] = 64 +pynq_native_port_width["Pynq-Z2"] = 64 +pynq_native_port_width["Ultra96"] = 128 +pynq_native_port_width["ZCU102"] = 128 +pynq_native_port_width["ZCU104"] = 128 +pynq_native_port_width["ZCU111"] = 128 +pynq_native_port_width["RFSoC2x2"] = 128 + +# Alveo device and platform mappings +alveo_part_map = dict() +alveo_part_map["U50"] = "xcu50-fsvh2104-2L-e" +alveo_part_map["U200"] = "xcu200-fsgd2104-2-e" +alveo_part_map["U250"] = "xcu250-figd2104-2L-e" +alveo_part_map["U280"] = "xcu280-fsvh2892-2L-e" + +alveo_default_platform = dict() +alveo_default_platform["U50"] = "xilinx_u50_gen3x16_xdma_201920_3" +alveo_default_platform["U200"] = "xilinx_u200_xdma_201830_2" +alveo_default_platform["U250"] = "xilinx_u250_xdma_201830_2" +alveo_default_platform["U280"] = "xilinx_u280_xdma_201920_3" + + +def get_rtlsim_trace_depth(): + """Return the trace depth for rtlsim via PyVerilator. Controllable + via the RTLSIM_TRACE_DEPTH environment variable. If the env.var. is + undefined, the default value of 1 is returned. A trace depth of 1 + will only show top-level signals and yield smaller .vcd files. + + The following depth values are of interest for whole-network stitched IP + rtlsim: + - level 1 shows top-level input/output streams + - level 2 shows per-layer input/output streams + - level 3 shows per full-layer I/O including FIFO count signals + """ + + try: + return int(os.environ["RTLSIM_TRACE_DEPTH"]) + except KeyError: + return 1 + + +def get_remote_vivado(): + """Return the address of the remote Vivado synthesis server as set by the, + REMOTE_VIVADO environment variable, otherwise return None""" + + try: + return os.environ["REMOTE_VIVADO"] + except KeyError: + return None + + +def get_finn_root(): + "Return the root directory that FINN is cloned into." + + try: + return os.environ["FINN_ROOT"] + except KeyError: + raise Exception( + """Environment variable FINN_ROOT must be set + correctly. Please ensure you have launched the Docker contaier correctly. + """ + ) + + +def make_build_dir(prefix=""): + """Creates a folder with given prefix to be used as a build dir. + Use this function instead of tempfile.mkdtemp to ensure any generated files + will survive on the host after the FINN Docker container exits.""" + try: + tmpdir = tempfile.mkdtemp(prefix=prefix) + newdir = tmpdir.replace("/tmp", os.environ["FINN_BUILD_DIR"]) + os.makedirs(newdir) + return newdir + except KeyError: + raise Exception( + """Environment variable FINN_BUILD_DIR must be set + correctly. Please ensure you have launched the Docker contaier correctly. + """ + ) + + +class CppBuilder: + """Builds the g++ compiler command to produces the executable of the c++ code + in code_gen_dir which is passed to the function build() of this class.""" + + def __init__(self): + self.include_paths = [] + self.cpp_files = [] + self.executable_path = "" + self.code_gen_dir = "" + self.compile_components = [] + self.compile_script = "" + + def append_includes(self, library_path): + """Adds given library path to include_paths list.""" + self.include_paths.append(library_path) + + def append_sources(self, cpp_file): + """Adds given c++ file to cpp_files list.""" + self.cpp_files.append(cpp_file) + + def set_executable_path(self, path): + """Sets member variable "executable_path" to given path.""" + self.executable_path = path + + def build(self, code_gen_dir): + """Builds the g++ compiler command according to entries in include_paths + and cpp_files lists. Saves it in bash script in given folder and + executes it.""" + # raise error if includes are empty + self.code_gen_dir = code_gen_dir + self.compile_components.append("g++ -o " + str(self.executable_path)) + for cpp_file in self.cpp_files: + self.compile_components.append(cpp_file) + for lib in self.include_paths: + self.compile_components.append(lib) + bash_compile = "" + for component in self.compile_components: + bash_compile += str(component) + " " + self.compile_script = str(self.code_gen_dir) + "/compile.sh" + with open(self.compile_script, "w") as f: + f.write("#!/bin/bash \n") + f.write(bash_compile + "\n") + bash_command = ["bash", self.compile_script] + process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE) + process_compile.communicate() + + +def launch_process_helper(args, proc_env=None, cwd=None): + """Helper function to launch a process in a way that facilitates logging + stdout/stderr with Python loggers. + Returns (cmd_out, cmd_err).""" + if proc_env is None: + proc_env = os.environ.copy() + with subprocess.Popen( + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=proc_env, cwd=cwd + ) as proc: + (cmd_out, cmd_err) = proc.communicate() + if cmd_out is not None: + cmd_out = cmd_out.decode("utf-8") + sys.stdout.write(cmd_out) + if cmd_err is not None: + cmd_err = cmd_err.decode("utf-8") + sys.stderr.write(cmd_err) + return (cmd_out, cmd_err) + + +def which(program): + "Python equivalent of the shell cmd 'which'." + + # source: + # https://stackoverflow.com/questions/377017/test-if-executable-exists-in-python + def is_exe(fpath): + return os.path.isfile(fpath) and os.access(fpath, os.X_OK) + + fpath, fname = os.path.split(program) + if fpath: + if is_exe(program): + return program + else: + for path in os.environ["PATH"].split(os.pathsep): + exe_file = os.path.join(path, program) + if is_exe(exe_file): + return exe_file + + return None diff --git a/src/finn/util/data_packing.py b/src/finn/util/data_packing.py new file mode 100644 index 000000000..de8aca405 --- /dev/null +++ b/src/finn/util/data_packing.py @@ -0,0 +1,455 @@ +# Copyright (c) 2020 Xilinx, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of Xilinx nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import binascii +import numpy as np +import os +import sys +from bitstring import BitArray +from qonnx.core.datatype import DataType +from qonnx.util.basic import roundup_to_integer_multiple + + +def array2hexstring(array, dtype, pad_to_nbits, prefix="0x", reverse=False): + """ + Pack given one-dimensional NumPy array with FINN DataType dtype into a hex + string. + Any BIPOLAR values will be converted to a single bit with a 0 representing + -1. + pad_to_nbits is used to prepend leading zeros to ensure packed strings of + fixed width. The minimum value for pad_to_nbits is 4, since a single hex + digit is four bits. reverse can be used to reverse the array prior to + packing. + + Examples: + + array2hexstring([1, 1, 1, 0], DataType["BINARY"], 4) = "0xe" + + array2hexstring([1, 1, 1, 0], DataType["BINARY"], 8) = "0x0e" + + array2hexstring([1, 1, 0, 1], DataType["BINARY"], 4, reverse=True) = "0xb" + + array2hexstring([1, 1, 1, 0], DataType["BINARY"], 8, reverse=True) = "0x07" + """ + if pad_to_nbits < 4: + pad_to_nbits = 4 + # ensure input is a numpy array with float values + if type(array) != np.ndarray or array.dtype != np.float32: + # try to convert to a float numpy array (container dtype is float) + array = np.asarray(array, dtype=np.float32) + # ensure one-dimensional array to pack + assert array.ndim == 1, "The given array is not one-dimensional." + if dtype == DataType["BIPOLAR"]: + # convert bipolar values to binary + array = (array + 1) / 2 + dtype = DataType["BINARY"] + # reverse prior to packing, if desired + if reverse: + array = np.flip(array, -1) + lineval = BitArray(length=0) + bw = dtype.bitwidth() + # special handling for fixed point: rescale, then pack as integers + if dtype.is_fixed_point(): + sf = dtype.scale_factor() + array = array / sf + # replace dtype with signed integer equivalent + dtype = DataType["INT" + str(bw)] + for val in array: + # ensure that this value is permitted by chosen dtype + assert dtype.allowed(val), "This value is not permitted by chosen dtype." + if dtype.is_integer(): + if dtype.signed(): + lineval.append(BitArray(int=int(val), length=bw)) + else: + lineval.append(BitArray(uint=int(val), length=bw)) + else: + lineval.append(BitArray(float=val, length=bw)) + if pad_to_nbits >= lineval.len: + # extend to the desired output width (a minimum of 4 bits) + lineval.prepend(BitArray(length=pad_to_nbits - lineval.len)) + else: + raise Exception("Number of bits is greater than pad_to_nbits") + # represent as hex + return prefix + lineval.hex + + +def hexstring2npbytearray(hexstring, remove_prefix="0x"): + """Convert a hex string into a NumPy array of dtype uint8. + + Example: + + hexstring2npbytearray("0f01") = array([15, 1], dtype=uint8) + """ + # remove prefix if found + if hexstring.startswith(remove_prefix): + lrp = len(remove_prefix) + hexstring = hexstring[lrp:] + # use Python's built-in bytearray + return np.asarray(bytearray.fromhex(hexstring), dtype=np.uint8) + + +def npbytearray2hexstring(npbytearray, prefix="0x"): + """Convert a NumPy array of uint8 dtype into a hex string. + + Example: + + npbytearray2hexstring(array([15, 1], dtype=uint8)) = "0x0f01" + """ + return prefix + binascii.hexlify(bytearray(npbytearray)).decode("utf-8") + + +def pack_innermost_dim_as_hex_string( + ndarray, dtype, pad_to_nbits, reverse_inner=False, prefix="0x" +): + """Pack the innermost dimension of the given numpy ndarray into hex + strings using array2hexstring. + + Examples: + + A = [[1, 1, 1, 0], [0, 1, 1, 0]] + + eA = ["0e", "06"] + + pack_innermost_dim_as_hex_string(A, DataType["BINARY"], 8) == eA + + B = [[[3, 3], [3, 3]], [[1, 3], [3, 1]]] + + eB = [[ "0f", "0f"], ["07", "0d"]] + + pack_innermost_dim_as_hex_string(B, DataType["UINT2"], 8) == eB + """ + + if type(ndarray) != np.ndarray or ndarray.dtype != np.float32: + # try to convert to a float numpy array (container dtype is float) + ndarray = np.asarray(ndarray, dtype=np.float32) + + def fun(x): + return array2hexstring( + x, dtype, pad_to_nbits, reverse=reverse_inner, prefix=prefix + ) + + return np.apply_along_axis(fun, ndarray.ndim - 1, ndarray) + + +def unpack_innermost_dim_from_hex_string( + ndarray, dtype, out_shape, packedBits, reverse_inner=False +): + """Convert a NumPy array of hex strings into a FINN NumPy array by unpacking + the hex strings into the specified data type. out_shape can be specified + such that any padding in the packing dimension is removed. If reverse_inner + is set, the innermost unpacked dimension will be reversed.""" + + if type(ndarray) != np.ndarray: + raise Exception( + """unpack_innermost_dim_from_hex_string needs ndarray + as input""" + ) + if ndarray.dtype.kind not in {"U", "S"}: + raise Exception( + """unpack_innermost_dim_from_hex_string needs ndarray of + hex strings as input""" + ) + # convert ndarray into flattened list + data = ndarray.flatten().tolist() + targetBits = dtype.bitwidth() + # calculate outer and inner dim shapes + outer_dim_elems = 1 + for dim in range(len(out_shape) - 1): + outer_dim_elems = outer_dim_elems * out_shape[dim] + inner_dim_elems = out_shape[-1] + + array = [] + if dtype.is_fixed_point(): + # convert fixed point as signed integer + conv_dtype = DataType["INT" + str(targetBits)] + else: + conv_dtype = dtype + for outer_elem in range(outer_dim_elems): + ar_list = [] + ar_elem = data[0] + data.pop(0) + ar_elem = ar_elem.split("x") + ar_elem_bin = bin(int(ar_elem[1], 16))[2:].zfill(packedBits) + ar_elem_bin = [int(x) for x in ar_elem_bin] + + ar_elem_bin.reverse() + for i in range(inner_dim_elems): + upper_limit = (i + 1) * targetBits + lower_limit = i * targetBits + elem = ar_elem_bin[lower_limit:upper_limit] + elem.reverse() + elem_str = "".join(map(str, elem)) + if conv_dtype == DataType["FLOAT32"]: + ar_list.append(BitArray(bin=elem_str).float) + elif conv_dtype.is_integer(): + ar_list.append(int(elem_str, 2)) + else: + raise Exception("Not implemented for conv_dtype " + conv_dtype.name) + # reverse inner dimension back to "normal" positions + if reverse_inner is False: + ar_list.reverse() + + # interpret output values correctly + + # interpret values as bipolar + if conv_dtype == DataType["BIPOLAR"]: + ar_list = [2 * x - 1 for x in ar_list] + # interpret values as signed values + elif conv_dtype.name.startswith("INT"): + mask = 2 ** (conv_dtype.bitwidth() - 1) + ar_list = [-(x & mask) + (x & ~mask) for x in ar_list] + + array.append(ar_list) + array = np.asarray(array, dtype=np.float32).reshape(out_shape) + if dtype.is_fixed_point(): + # convert signed integer to fixed point by applying scale + array = array * dtype.scale_factor() + return array + + +def numpy_to_hls_code( + ndarray, dtype, hls_var_name, pack_innermost_dim=True, no_decl=False +): + """Return C++ code representation of a numpy ndarray with FINN DataType + dtype, using hls_var_name as the resulting C++ variable name. If + pack_innermost_dim is specified, the innermost dimension of the ndarray + will be packed into a hex string using array2hexstring. If no_decl is + set to True, no variable name and type will be generated as part of the + emitted string. + """ + hls_dtype = dtype.get_hls_datatype_str() + if type(ndarray) != np.ndarray or ndarray.dtype != np.float32: + # try to convert to a float numpy array (container dtype is float) + ndarray = np.asarray(ndarray, dtype=np.float32) + if pack_innermost_dim: + idimlen = ndarray.shape[-1] + idimbits = idimlen * dtype.bitwidth() + idimbits = roundup_to_integer_multiple(idimbits, 4) + ndarray = pack_innermost_dim_as_hex_string(ndarray, dtype, idimbits) + hls_dtype = "ap_uint<%d>" % idimbits + ndims = ndarray.ndim + # add type string and variable name + # e.g. "const ap_uint<64>" "weightMem0" + ret = "%s %s" % (hls_dtype, hls_var_name) + # add dimensions + for d in range(ndims): + ret += "[%d]" % ndarray.shape[d] + orig_printops = np.get_printoptions() + np.set_printoptions(threshold=sys.maxsize) + + # define a function to convert a single element into a C++ init string + # a single element can be a hex string if we are using packing + def elem2str(x): + if type(x) == str or type(x) == np.str_ or type(x) == np.str: + return '%s("%s", 16)' % (hls_dtype, x) + elif type(x) == np.float32: + if dtype.is_integer(): + return str(int(x)) + else: + return str(x) + else: + raise Exception("Unsupported type for numpy_to_hls_code") + + strarr = np.array2string(ndarray, separator=", ", formatter={"all": elem2str}) + np.set_printoptions(**orig_printops) + strarr = strarr.replace("[", "{").replace("]", "}") + if no_decl: + ret = strarr + ";" + else: + ret = ret + " = \n" + strarr + ";" + return ret + + +def npy_to_rtlsim_input(input_file, input_dtype, pad_to_nbits, reverse_inner=True): + """Convert the multidimensional NumPy array of integers (stored as floats) + from input_file into a flattened sequence of Python arbitrary-precision + integers, packing the innermost dimension. See + finn.util.basic.pack_innermost_dim_as_hex_string() for more info on how the + packing works. If reverse_inner is set, the innermost dimension will be + reversed prior to packing.""" + pad_to_nbits = roundup_to_integer_multiple(pad_to_nbits, 4) + if issubclass(type(input_file), np.ndarray): + inp = input_file + elif os.path.isfile(input_file): + inp = np.load(input_file) + else: + raise Exception("input_file must be ndarray or filename for .npy") + if inp.shape[-1] == 1 and input_dtype.is_integer(): + packed_data = inp.flatten().astype(input_dtype.to_numpy_dt()) + else: + packed_data = pack_innermost_dim_as_hex_string( + inp, input_dtype, pad_to_nbits, reverse_inner=reverse_inner + ) + packed_data = packed_data.flatten() + packed_data = [int(x[2:], 16) for x in packed_data] + return packed_data + + +def rtlsim_output_to_npy( + output, path, dtype, shape, packedBits, targetBits, reverse_inner=True +): + """Convert a flattened sequence of Python arbitrary-precision integers + output into a NumPy array, saved as npy file at path. Each arbitrary-precision + integer is assumed to be a packed array of targetBits-bit elements, which + will be unpacked as the innermost dimension of the NumPy array. If path is + not None it will also be saved as a npy file.""" + + # TODO should have its own testbench? + output = np.asarray([hex(int(x)) for x in output]) + out_array = unpack_innermost_dim_from_hex_string( + output, dtype, shape, packedBits=packedBits, reverse_inner=reverse_inner + ) + # make copy before saving the array + out_array = out_array.copy() + if path is not None: + np.save(path, out_array) + return out_array + + +def finnpy_to_packed_bytearray( + ndarray, dtype, reverse_inner=False, reverse_endian=False, fast_mode=False +): + """Given a numpy ndarray with FINN DataType dtype, pack the innermost + dimension and return the packed representation as an ndarray of uint8. + The packed innermost dimension will be padded to the nearest multiple + of 8 bits. The returned ndarray has the same number of dimensions as the + input. + + If fast_mode is enabled, will attempt to use shortcuts to save + on runtime for certain cases: + * 8-bit ndarray -> 8-bit + * ndarray -> 1-bit and total bits % 8 == 0 + This mode is currently not well-tested, use at your own risk! + """ + + # handle fast_mode cases (currently only called from driver): + if issubclass(type(ndarray), np.ndarray) and fast_mode: + inp_is_byte = ndarray.dtype in [np.uint8, np.int8] + out_is_byte = dtype.bitwidth() == 8 + double_reverse = reverse_inner and reverse_endian + # fast mode case: byte -> byte: cast + if inp_is_byte and out_is_byte and double_reverse: + return ndarray.view(np.uint8) + # fast mode case: xxx -> bit with nbits % 8 == 0: np.packbits + out_is_bit = dtype.bitwidth() == 1 + bits = dtype.bitwidth() * ndarray.shape[-1] + bits_padded = roundup_to_integer_multiple(bits, 8) + no_pad = bits_padded == bits + if out_is_bit and no_pad and double_reverse: + in_as_int8 = ndarray.astype(np.int8) + # bipolar -> binary if needed + if dtype == DataType["BIPOLAR"]: + in_as_int8 = (in_as_int8 + 1) // 2 + # reverse inner + in_as_int8 = np.flip(in_as_int8, axis=-1) + # pack with numpy + packed_data = np.packbits(in_as_int8, axis=-1) + # reverse endianness and return + return np.flip(packed_data, axis=-1) + + if (not issubclass(type(ndarray), np.ndarray)) or ndarray.dtype != np.float32: + # try to convert to a float numpy array (container dtype is float) + ndarray = np.asarray(ndarray, dtype=np.float32) + # pack innermost dim to hex strings padded to 8 bits + bits = dtype.bitwidth() * ndarray.shape[-1] + bits_padded = roundup_to_integer_multiple(bits, 8) + packed_hexstring = pack_innermost_dim_as_hex_string( + ndarray, dtype, bits_padded, reverse_inner=reverse_inner + ) + + def fn(x): + return np.asarray(list(map(hexstring2npbytearray, x))) + + if packed_hexstring.ndim == 0: + # scalar, call hexstring2npbytearray directly + ret = hexstring2npbytearray(np.asscalar(packed_hexstring)) + else: + # convert ndarray of hex strings to byte array + ret = np.apply_along_axis(fn, packed_hexstring.ndim - 1, packed_hexstring) + if reverse_endian: + # reverse the endianness of packing dimension + ret = np.flip(ret, axis=-1) + return ret + + +def packed_bytearray_to_finnpy( + packed_bytearray, + dtype, + output_shape=None, + reverse_inner=False, + reverse_endian=False, + fast_mode=False, +): + """Given a packed numpy uint8 ndarray, unpack it into a FINN array of + given DataType. + + output_shape can be specified to remove padding from the + packed dimension, or set to None to be inferred from the input. + + If fast_mode is enabled, will attempt to use shortcuts (casting) to save + on runtime for certain cases. + This mode is currently not well-tested, use at your own risk. + + """ + + if ( + not issubclass(type(packed_bytearray), np.ndarray) + ) or packed_bytearray.dtype != np.uint8: + raise Exception("packed_bytearray_to_finnpy needs NumPy uint8 arrays") + if packed_bytearray.ndim == 0: + raise Exception("packed_bytearray_to_finnpy expects at least 1D ndarray") + packed_dim = packed_bytearray.ndim - 1 + packed_bits = packed_bytearray.shape[packed_dim] * 8 + target_bits = dtype.bitwidth() + if output_shape is None: + # determine output shape from input shape + assert ( + packed_bits % target_bits == 0 + ), """packed_bits are not divisable by + target_bits.""" + n_target_elems = packed_bits // target_bits + output_shape = packed_bytearray.shape[:-1] + (n_target_elems,) + # handle no-packing cases (if fast_mode) via casting to save on compute + out_is_byte = target_bits in [8, 16] + double_reverse = reverse_inner and reverse_endian + if out_is_byte and double_reverse and fast_mode: + no_unpad = np.prod(packed_bytearray.shape) == np.prod(output_shape) + if no_unpad: + as_np_type = packed_bytearray.view(dtype.to_numpy_dt()) + return as_np_type.reshape(output_shape).astype(np.float32) + if reverse_endian: + packed_bytearray = np.flip(packed_bytearray, axis=-1) + # convert innermost dim of byte array to hex strings + packed_hexstring = np.apply_along_axis( + npbytearray2hexstring, packed_dim, packed_bytearray + ) + ret = unpack_innermost_dim_from_hex_string( + packed_hexstring, dtype, output_shape, packed_bits, reverse_inner + ) + + return ret diff --git a/src/finn/util/fpgadataflow.py b/src/finn/util/fpgadataflow.py new file mode 100644 index 000000000..769ddb946 --- /dev/null +++ b/src/finn/util/fpgadataflow.py @@ -0,0 +1,43 @@ +# Copyright (c) 2020 Xilinx, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of Xilinx nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from qonnx.util.basic import get_by_name, is_finn_op + + +def is_fpgadataflow_node(node): + """Returns True if given node is fpgadataflow node. Otherwise False.""" + is_node = False + if node is not None: + if is_finn_op(node.domain): + n_backend = get_by_name(node.attribute, "backend") + if n_backend is not None: + backend_value = n_backend.s.decode("UTF-8") + if backend_value == "fpgadataflow": + is_node = True + + return is_node diff --git a/src/finn/util/hls.py b/src/finn/util/hls.py new file mode 100644 index 000000000..fb23af046 --- /dev/null +++ b/src/finn/util/hls.py @@ -0,0 +1,74 @@ +# Copyright (c) 2021 Xilinx, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of Xilinx nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +import os +import subprocess + +from finn.util.basic import which + + +class CallHLS: + """Call either vivado_hls or vitis_hls to run HLS build tcl scripts.""" + + def __init__(self, backend="vivado_hls"): + self.tcl_script = "" + self.ipgen_path = "" + self.code_gen_dir = "" + self.ipgen_script = "" + assert backend in [ + "vivado_hls", + "vitis_hls", + ], "Unrecognized backend for CallHLS" + self.backend = backend + + def append_tcl(self, tcl_script): + """Sets the tcl script to be executed.""" + self.tcl_script = tcl_script + + def set_ipgen_path(self, path): + """Sets member variable ipgen_path to given path.""" + self.ipgen_path = path + + def build(self, code_gen_dir): + """Builds the bash script with given parameters and saves it in given folder. + To guarantee the generation in the correct folder the bash script contains a + cd command.""" + assert which(self.backend) is not None, "%s not found in PATH" % self.backend + self.code_gen_dir = code_gen_dir + self.ipgen_script = str(self.code_gen_dir) + "/ipgen.sh" + working_dir = os.environ["PWD"] + f = open(self.ipgen_script, "w") + f.write("#!/bin/bash \n") + f.write("cd {}\n".format(code_gen_dir)) + f.write("%s %s\n" % (self.backend, self.tcl_script)) + f.write("cd {}\n".format(working_dir)) + f.close() + bash_command = ["bash", self.ipgen_script] + process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE) + process_compile.communicate() diff --git a/src/finn/util/platforms.py b/src/finn/util/platforms.py new file mode 100644 index 000000000..8212cb571 --- /dev/null +++ b/src/finn/util/platforms.py @@ -0,0 +1,480 @@ +# Copyright (c) 2021, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import numpy as np +from abc import abstractmethod + +# contains the amount of available FPGA resources for several +# Xilinx platforms, as well as certain resource limit guidelines +# for creating designs that can achieve timing closure + +# explicit value for res types/costs we don't care about +DONT_CARE = -1 +# recommended resource limits from Xilinx for timing closure +# respectively for LUT, FF, BRAM_18K, URAM, DSP res types +DEFAULT_RES_LIMITS = np.array([0.7, 0.5, 0.80, 0.80, 0.80]) +DEFAULT_AVG_CONSTRAINTS = [((2, 3, 4), 0.7)] # + +# resources required to instantiate certain infrastructure components +# such as memory controllers and network interfaces +DDR_RESOURCE_REQUIREMENTS = { + "LUT": 33256, + "FF": 44889, + "BRAM_18K": 199, + "URAM": 0, + "DSP": 3, +} +HBM_RESOURCE_REQUIREMENTS = { + "LUT": 10718, + "FF": 21793, + "BRAM_18K": 8, + "URAM": 0, + "DSP": 0, +} + +# we assume use of VNx Alveo UDP stack +# see: https://gitenterprise.xilinx.com/mruiznog/vitis_network_layer +ETH_RESOURCE_REQUIREMENTS = { + "LUT": 35219, + "FF": 86269, + "BRAM_18K": 183, + "URAM": 0, + "DSP": 0, +} + + +class Platform: + def __init__( + self, + nslr=1, + ndevices=1, + sll_count=[], + hbm_slr=-1, + ddr_slr=[0], + eth_slr=0, + eth_gbps=0, + limits=DEFAULT_RES_LIMITS, + avg_constraints=DEFAULT_AVG_CONSTRAINTS, + ): + self.nslr = nslr + self.sll_count = sll_count + self.eth_slr = eth_slr + self.eth_gbps = eth_gbps + self.ndevices = ndevices + self.hbm_slr = hbm_slr + self.ddr_slr = ddr_slr + # limits must be a np.array either of + # the same shape as compute_resources + # or broadcastable to it + self.res_limits = limits + # list of tuples of the form ( tuple of resource positions to avg, limit ) + self.avg_constraints = avg_constraints + + @property + @abstractmethod + def compute_resources(self): + pass + + @property + def guide_resources(self): + guide = [] + # TODO: assert limits is of correct size + guide_res = ( + np.tile(np.array(self.compute_resources), (self.ndevices, 1)) + ).astype(int) + for i in range(self.nslr * self.ndevices): + # when in multi-FPGA mode, subtract cost of UDP connection from eth_slr + local_slr = i % self.nslr + if self.ndevices > 1 and local_slr == self.eth_slr: + guide_res[i][0] -= ETH_RESOURCE_REQUIREMENTS["LUT"] + guide_res[i][1] -= ETH_RESOURCE_REQUIREMENTS["FF"] + guide_res[i][2] -= ETH_RESOURCE_REQUIREMENTS["BRAM_18K"] + guide_res[i][3] -= ETH_RESOURCE_REQUIREMENTS["URAM"] + guide_res[i][4] -= ETH_RESOURCE_REQUIREMENTS["DSP"] + # subtract the cost of memory controllers + # if we have a choice between DDR and HBM, use HBM + if local_slr == self.hbm_slr: + guide_res[i][0] -= HBM_RESOURCE_REQUIREMENTS["LUT"] + guide_res[i][1] -= HBM_RESOURCE_REQUIREMENTS["FF"] + guide_res[i][2] -= HBM_RESOURCE_REQUIREMENTS["BRAM_18K"] + guide_res[i][3] -= HBM_RESOURCE_REQUIREMENTS["URAM"] + guide_res[i][4] -= HBM_RESOURCE_REQUIREMENTS["DSP"] + elif local_slr in self.ddr_slr: + guide_res[i][0] -= DDR_RESOURCE_REQUIREMENTS["LUT"] + guide_res[i][1] -= DDR_RESOURCE_REQUIREMENTS["FF"] + guide_res[i][2] -= DDR_RESOURCE_REQUIREMENTS["BRAM_18K"] + guide_res[i][3] -= DDR_RESOURCE_REQUIREMENTS["URAM"] + guide_res[i][4] -= DDR_RESOURCE_REQUIREMENTS["DSP"] + guide.append(list(guide_res[i])) + return guide + + @property + def resource_count_dict(self): + res = dict() + for i in range(self.nslr * self.ndevices): + slr_res = dict() + slr_res["LUT"] = self.compute_resources[i % self.nslr][0] + slr_res["FF"] = self.compute_resources[i % self.nslr][1] + slr_res["BRAM_18K"] = self.compute_resources[i % self.nslr][2] + slr_res["URAM"] = self.compute_resources[i % self.nslr][3] + slr_res["DSP"] = self.compute_resources[i % self.nslr][4] + res["slr" + str(i)] = slr_res + return res + + @property + def compute_connection_cost(self): + x = np.full((self.nslr * self.ndevices, self.nslr * self.ndevices), DONT_CARE) + # build connection cost matrix for one device's SLRs + xlocal = np.full((self.nslr, self.nslr), DONT_CARE) + for i in range(self.nslr): + for j in range(self.nslr): + if i == j: + xlocal[i][j] = 0 + elif abs(i - j) == 1: + xlocal[i][j] = 1 + # tile connection cost matrices for entire system + for i in range(self.ndevices): + x[ + i * self.nslr : (i + 1) * self.nslr, i * self.nslr : (i + 1) * self.nslr + ] = xlocal + # set cost for ethernet connections, assuming daisy-chaining + for i in range(self.ndevices - 1): + x[i * self.nslr + self.eth_slr][(i + 1) * self.nslr + self.eth_slr] = 10 + x[(i + 1) * self.nslr + self.eth_slr][i * self.nslr + self.eth_slr] = 10 + return x + + @property + def compute_connection_resource(self): + sll = np.full((self.nslr * self.ndevices, self.nslr * self.ndevices), 0) + # build connection resource matrix for one device's SLRs + slllocal = np.full((self.nslr, self.nslr), -1) + for i in range(self.nslr): + for j in range(self.nslr): + if i == j: + # no SLL constraint when going from one SLR to itself + slllocal[i][j] = -1 + else: + slllocal[i][j] = self.sll_count[i][j] + # tile connection cost matrices for entire system + for i in range(self.ndevices): + sll[ + i * self.nslr : (i + 1) * self.nslr, i * self.nslr : (i + 1) * self.nslr + ] = slllocal + # set cost for ethernet connections, assuming daisy-chaining + eth = np.full((self.nslr * self.ndevices, self.nslr * self.ndevices), 0) + # no Eth throughput constraints from one SLR to itself + for i in range(self.ndevices * self.nslr): + eth[i][i] = -1 + # apply symmetric ETH throughput constraints between the SLRs that have GTXes + for i in range(self.ndevices - 1): + eth[i * self.nslr + self.eth_slr][ + (i + 1) * self.nslr + self.eth_slr + ] = self.eth_gbps * (10**9) + eth[(i + 1) * self.nslr + self.eth_slr][ + i * self.nslr + self.eth_slr + ] = self.eth_gbps * (10**9) + # pack sll and eth info in one list-of-list-of-tuple structure + constraints = [] + for i in range(self.ndevices * self.nslr): + constraints_line = [] + for j in range(self.ndevices * self.nslr): + # make sure not to constrain both resources at the same time + # constrain for Eth throughput between SLRs on different devices + # constrain for SLLs between SLRs on same device + is_offchip = i // self.nslr != j // self.nslr + constraints_line.append( + (-1 if is_offchip else sll[i][j], eth[i][j] if is_offchip else -1) + ) + constraints.append(constraints_line) + return constraints + + def map_device_to_slr(self, idx): + """Given a global SLR index, return device id and local slr index""" + assert idx <= self.nslr * self.ndevices + return (idx % self.nslr, idx // self.nslr) + + +class Zynq7020_Platform(Platform): + def __init__( + self, + ndevices=1, + limits=DEFAULT_RES_LIMITS, + avg_constraints=DEFAULT_AVG_CONSTRAINTS, + ): + super(Zynq7020_Platform, self).__init__( + nslr=1, + ndevices=ndevices, + sll_count=[[0]], + ddr_slr=[], + eth_slr=0, + eth_gbps=1, + limits=limits, + avg_constraints=avg_constraints, + ) + + @property + def compute_resources(self): + return [[53200, 2 * 53200, 280, 0, 220] for i in range(1)] + + +class ZU3EG_Platform(Platform): + def __init__( + self, + ndevices=1, + limits=DEFAULT_RES_LIMITS, + avg_constraints=DEFAULT_AVG_CONSTRAINTS, + ): + super(ZU3EG_Platform, self).__init__( + nslr=1, + ndevices=ndevices, + sll_count=[[0]], + ddr_slr=[], + eth_slr=0, + eth_gbps=1, + limits=limits, + avg_constraints=avg_constraints, + ) + + @property + def compute_resources(self): + return [[71000, 2 * 71000, 412, 0, 360] for i in range(1)] + + +class ZU7EV_Platform(Platform): + def __init__( + self, + ndevices=1, + limits=DEFAULT_RES_LIMITS, + avg_constraints=DEFAULT_AVG_CONSTRAINTS, + ): + super(ZU7EV_Platform, self).__init__( + nslr=1, + ndevices=ndevices, + sll_count=[[0]], + ddr_slr=[], + eth_slr=0, + eth_gbps=1, + limits=limits, + avg_constraints=avg_constraints, + ) + + @property + def compute_resources(self): + return [[230000, 2 * 230000, 610, 92, 1728] for i in range(1)] + + +class ZU9EG_Platform(Platform): + def __init__( + self, + ndevices=1, + limits=DEFAULT_RES_LIMITS, + avg_constraints=DEFAULT_AVG_CONSTRAINTS, + ): + super(ZU9EG_Platform, self).__init__( + nslr=1, + ndevices=ndevices, + sll_count=[[0]], + ddr_slr=[], + eth_slr=0, + eth_gbps=1, + limits=limits, + avg_constraints=avg_constraints, + ) + + @property + def compute_resources(self): + return [[274000, 2 * 274000, 1824, 0, 2520] for i in range(1)] + + +class ZU28DR_Platform(Platform): + def __init__( + self, + ndevices=1, + limits=DEFAULT_RES_LIMITS, + avg_constraints=DEFAULT_AVG_CONSTRAINTS, + ): + super(ZU28DR_Platform, self).__init__( + nslr=1, + ndevices=ndevices, + sll_count=[[0]], + ddr_slr=[], + eth_slr=0, + eth_gbps=1, + limits=limits, + avg_constraints=avg_constraints, + ) + + @property + def compute_resources(self): + return [[425000, 2 * 425000, 2160, 80, 4272] for i in range(1)] + + +class Alveo_NxU50_Platform(Platform): + def __init__( + self, + ndevices=1, + limits=DEFAULT_RES_LIMITS, + avg_constraints=DEFAULT_AVG_CONSTRAINTS, + ): + # according to Vivado: 23040 SLR0 <-> SLR1 + sll_counts = [[0, 5000], [5000, 0]] + super(Alveo_NxU50_Platform, self).__init__( + nslr=2, + ndevices=ndevices, + sll_count=sll_counts, + ddr_slr=[], + hbm_slr=0, + eth_slr=1, + eth_gbps=100, + limits=limits, + avg_constraints=avg_constraints, + ) + + @property + def compute_resources(self): + # According to UG1120: + # U50 has identical resource counts on both SLRs + # return [[365000,2*365000,2*564, 304, 2580] for i in range(2)] + # we observe from Vivado that the resource counts are actually: + return [ + [374400, 2 * 374400, 2 * 564, 304, 2592], + [368160, 2 * 368160, 2 * 564, 304, 2760], + ] + + +class Alveo_NxU200_Platform(Platform): + def __init__( + self, + ndevices=1, + limits=DEFAULT_RES_LIMITS, + avg_constraints=DEFAULT_AVG_CONSTRAINTS, + ): + sll_counts = [[0, 5000, 0], [5000, 0, 5000], [0, 5000, 0]] + super(Alveo_NxU200_Platform, self).__init__( + nslr=3, + ndevices=ndevices, + sll_count=sll_counts, + ddr_slr=[0, 2], + eth_slr=2, + eth_gbps=100, + limits=limits, + avg_constraints=avg_constraints, + ) + + @property + def compute_resources(self): + # According to UG1120: + # return [[355000, 723000, 2*638, 320, 2265], + # [160000, 331000, 2*326, 160, 1317], + # [355000, 723000, 2*638, 320, 2265]] + # we observe from Vivado that the resource counts are actually: + return [ + [385920, 2 * 385920, 2 * 714, 320, 2268], + [199680, 2 * 199680, 2 * 420, 160, 1320], + [385920, 2 * 385920, 2 * 714, 320, 2268], + ] + + +class Alveo_NxU250_Platform(Platform): + def __init__( + self, + ndevices=1, + limits=DEFAULT_RES_LIMITS, + avg_constraints=DEFAULT_AVG_CONSTRAINTS, + ): + sll_counts = [ + [0, 5000, 0, 0], + [5000, 0, 5000, 0], + [0, 5000, 0, 5000], + [0, 0, 5000, 0], + ] + super(Alveo_NxU250_Platform, self).__init__( + nslr=4, + ndevices=ndevices, + sll_count=sll_counts, + ddr_slr=[0, 1, 2, 3], + eth_slr=3, + eth_gbps=100, + limits=limits, + avg_constraints=avg_constraints, + ) + + @property + def compute_resources(self): + # According to UG1120: + # U250 has identical resource counts on all 4 SLRs: + # return [[345000,2*345000,2*500, 320, 2877] for i in range(4)] + # we observe from Vivado that the resource counts are actually: + return [[375000, 2 * 375000, 2 * 576, 320, 2880] for i in range(4)] + + +class Alveo_NxU280_Platform(Platform): + def __init__( + self, + ndevices=1, + limits=DEFAULT_RES_LIMITS, + avg_constraints=DEFAULT_AVG_CONSTRAINTS, + ): + sll_counts = [[0, 5000, 0], [5000, 0, 5000], [0, 5000, 0]] + super(Alveo_NxU280_Platform, self).__init__( + nslr=3, + ndevices=ndevices, + sll_count=sll_counts, + ddr_slr=[0, 1], + hbm_slr=0, + eth_slr=2, + eth_gbps=100, + limits=limits, + avg_constraints=avg_constraints, + ) + + @property + def compute_resources(self): + # according to UG1120 + # return [[369000, 746000, 2*507, 320, 2733], + # [333000, 675000, 2*468, 320, 2877], + # [367000, 729000, 2*512, 320, 2880]] + # observed from Vivado: + return [ + [400800, 2 * 400800, 2 * 600, 320, 2736], + [382080, 2 * 382080, 2 * 576, 320, 2880], + [380640, 2 * 380640, 2 * 576, 320, 2880], + ] + + +platforms = dict() +platforms["U50"] = Alveo_NxU50_Platform +platforms["U200"] = Alveo_NxU200_Platform +platforms["U250"] = Alveo_NxU250_Platform +platforms["U280"] = Alveo_NxU280_Platform +platforms["Pynq-Z1"] = Zynq7020_Platform +platforms["Pynq-Z2"] = Zynq7020_Platform +platforms["Ultra96"] = ZU3EG_Platform +platforms["ZCU104"] = ZU7EV_Platform +platforms["ZCU102"] = ZU9EG_Platform +platforms["ZCU111"] = ZU28DR_Platform diff --git a/src/finn/util/vivado.py b/src/finn/util/vivado.py new file mode 100644 index 000000000..bc8ca40d8 --- /dev/null +++ b/src/finn/util/vivado.py @@ -0,0 +1,83 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os + +from finn.util.basic import launch_process_helper, which + + +def out_of_context_synth( + verilog_dir, + top_name, + fpga_part="xczu3eg-sbva484-1-e", + clk_name="ap_clk_0", + clk_period_ns=5.0, +): + "Run out-of-context Vivado synthesis, return resources and slack." + + # ensure that the OH_MY_XILINX envvar is set + if "OHMYXILINX" not in os.environ: + raise Exception("The environment variable OHMYXILINX is not defined.") + # ensure that vivado is in PATH: source $VIVADO_PATH/settings64.sh + if which("vivado") is None: + raise Exception("vivado is not in PATH, ensure settings64.sh is sourced.") + omx_path = os.environ["OHMYXILINX"] + script = "vivadocompile.sh" + # vivadocompile.sh <top-level-entity> <clock-name (optional)> <fpga-part (optional)> + call_omx = "zsh %s/%s %s %s %s %f" % ( + omx_path, + script, + top_name, + clk_name, + fpga_part, + float(clk_period_ns), + ) + call_omx = call_omx.split() + launch_process_helper(call_omx, proc_env=os.environ.copy(), cwd=verilog_dir) + + vivado_proj_folder = "%s/results_%s" % (verilog_dir, top_name) + res_counts_path = vivado_proj_folder + "/res.txt" + + with open(res_counts_path, "r") as myfile: + res_data = myfile.read().split("\n") + ret = {} + ret["vivado_proj_folder"] = vivado_proj_folder + for res_line in res_data: + res_fields = res_line.split("=") + print(res_fields) + try: + ret[res_fields[0]] = float(res_fields[1]) + except ValueError: + ret[res_fields[0]] = 0 + except IndexError: + ret[res_fields[0]] = 0 + if ret["WNS"] == 0: + ret["fmax_mhz"] = 0 + else: + ret["fmax_mhz"] = 1000.0 / (clk_period_ns - ret["WNS"]) + return ret -- GitLab