diff --git a/.gitignore b/.gitignore index f838c1695130d232ac6a2b888aed0cea31aafaa7..8b3166a44070a4575aac86c445c4504b594cda08 100644 --- a/.gitignore +++ b/.gitignore @@ -78,3 +78,6 @@ MANIFEST # Jenkins cfg dir /docker/jenkins_home + +# SSH key dir mounted into Docker +/ssh_keys/ diff --git a/docker/Dockerfile.finn_ci b/docker/Dockerfile.finn_ci index 2668927602ebb8de5fdc3d7c25b20a0c8c4a2e55..5772b16abc8b927def1e2dfbbb8193a2f964f87d 100644 --- a/docker/Dockerfile.finn_ci +++ b/docker/Dockerfile.finn_ci @@ -37,7 +37,7 @@ WORKDIR /workspace RUN apt-get update RUN apt-get -y upgrade RUN apt-get install -y build-essential libglib2.0-0 libsm6 libxext6 libxrender-dev -RUN apt install verilator +RUN apt-get install -y verilator zsh RUN apt-get -y install sshpass RUN echo "StrictHostKeyChecking no" >> /etc/ssh/ssh_config @@ -52,6 +52,8 @@ RUN git clone https://github.com/Xilinx/finn-hlslib.git /workspace/finn-hlslib RUN git clone https://github.com/maltanar/pyverilator /workspace/pyverilator # PYNQ-HelloWorld RUN git clone https://github.com/maltanar/PYNQ-HelloWorld.git /workspace/PYNQ-HelloWorld +# oh-my-xilinx +RUN git clone https://bitbucket.org/maltanar/oh-my-xilinx.git /workspace/oh-my-xilinx # checkout desired FINN branch for testing RUN git clone --branch $FINN_CI_BRANCH https://github.com/Xilinx/finn /workspace/finn @@ -64,6 +66,8 @@ ENV PYTHONPATH "${PYTHONPATH}:/workspace/finn/src" ENV PYTHONPATH "${PYTHONPATH}:/workspace/pyverilator" ENV PYNQSHELL_PATH "/workspace/PYNQ-HelloWorld/boards" ENV VIVADO_IP_CACHE "$BUILD_PATH/vivado_ip_cache" +ENV PATH "${PATH}:/workspace/oh-my-xilinx" +ENV OHMYXILINX "/workspace/oh-my-xilinx" # colorful terminal output RUN echo "PS1='\[\033[1;36m\]\u\[\033[1;31m\]@\[\033[1;32m\]\h:\[\033[1;35m\]\w\[\033[1;31m\]\$\[\033[0m\] '" >> /root/.bashrc diff --git a/docker/Dockerfile.finn_dev b/docker/Dockerfile.finn_dev index 1200c7d5d15bbd62e15f19f84e70d5fe0b8aca28..0e12b504a26ccdb8fd78e162f04cfdeab5a186f1 100644 --- a/docker/Dockerfile.finn_dev +++ b/docker/Dockerfile.finn_dev @@ -37,16 +37,12 @@ ARG PASSWD ARG JUPYTER_PORT ARG NETRON_PORT -EXPOSE $JUPYTER_PORT -EXPOSE $NETRON_PORT - WORKDIR /workspace RUN apt-get update RUN apt-get -y upgrade RUN apt-get install -y build-essential libglib2.0-0 libsm6 libxext6 libxrender-dev -RUN apt-get install verilator -RUN apt-get install nano +RUN apt-get install -y verilator nano zsh rsync RUN apt-get -y install sshpass RUN echo "StrictHostKeyChecking no" >> /etc/ssh/ssh_config @@ -81,12 +77,16 @@ RUN git clone https://github.com/Xilinx/finn-hlslib.git /workspace/finn-hlslib RUN git clone https://github.com/maltanar/pyverilator /workspace/pyverilator # PYNQ-HelloWorld RUN git clone https://github.com/maltanar/PYNQ-HelloWorld.git /workspace/PYNQ-HelloWorld +# oh-my-xilinx +RUN git clone https://bitbucket.org/maltanar/oh-my-xilinx.git /workspace/oh-my-xilinx # for this developer-oriented Docker container we assume the FINN repo is cloned and mounted from the host # at /workspace/finn -- see run-docker.sh for an example of how to do this. ENV PYTHONPATH "${PYTHONPATH}:/workspace/finn/src" ENV PYTHONPATH "${PYTHONPATH}:/workspace/pyverilator" ENV PYNQSHELL_PATH "/workspace/PYNQ-HelloWorld/boards" +ENV PATH "${PATH}:/workspace/oh-my-xilinx" +ENV OHMYXILINX "/workspace/oh-my-xilinx" WORKDIR /home/$UNAME/finn RUN echo "PS1='\[\033[1;36m\]\u\[\033[1;31m\]@\[\033[1;32m\]\h:\[\033[1;35m\]\w\[\033[1;31m\]\$\[\033[0m\] '" >> /home/$UNAME/.bashrc @@ -100,5 +100,8 @@ RUN chmod 755 /usr/local/bin/finn_entrypoint.sh RUN chmod 755 /usr/local/bin/quicktest.sh USER $UNAME +EXPOSE $JUPYTER_PORT +EXPOSE $NETRON_PORT + ENTRYPOINT ["finn_entrypoint.sh"] CMD ["bash"] diff --git a/docker/finn_entrypoint.sh b/docker/finn_entrypoint.sh index 7298726edc0fedbddc477413c4d5488fc7ef318c..0074cce02f7de57dc778e0b671c484233df72a8a 100644 --- a/docker/finn_entrypoint.sh +++ b/docker/finn_entrypoint.sh @@ -18,6 +18,7 @@ CNPY_COMMIT=4e8810b1a8637695171ed346ce68f6984e585ef4 HLSLIB_COMMIT=13e9b0772a27a3a1efc40c878d8e78ed09efb716 PYVERILATOR_COMMIT=c97a5ba41bbc7c419d6f25c74cdf3bdc3393174f PYNQSHELL_COMMIT=0c82a61b0ec1a07fa275a14146233824ded7a13d +OMX_COMMIT=1bae737669901e762f581af73348332b5c4b2ada gecho "Setting up known-good commit versions for FINN dependencies" @@ -42,6 +43,10 @@ git -C /workspace/pyverilator checkout $PYVERILATOR_COMMIT --quiet gecho "PYNQ shell @ $PYNQSHELL_COMMIT" git -C /workspace/PYNQ-HelloWorld pull --quiet git -C /workspace/PYNQ-HelloWorld checkout $PYNQSHELL_COMMIT --quiet +# oh-my-xilinx +gecho "oh-my-xilinx @ $OMX_COMMIT" +git -C /workspace/oh-my-xilinx pull --quiet +git -C /workspace/oh-my-xilinx checkout $OMX_COMMIT --quiet # source Vivado env.vars source $VIVADO_PATH/settings64.sh diff --git a/docs/finn/internals.rst b/docs/finn/internals.rst index 7a4bc687eeb827320991f7d3f1ef8cc35e97f3da..010cdece978cde078c3df4c64177fa1c5455aa0a 100644 --- a/docs/finn/internals.rst +++ b/docs/finn/internals.rst @@ -16,6 +16,9 @@ Custom Quantization Annotations ONNX does not support datatypes smaller than 8-bit integers, whereas in FINN we are interested in smaller integers down to ternary and bipolar. To make this work, FINN uses the quantization_annotation field in ONNX to annotate tensors with their FINN DataType (:py:mod:`finn.core.datatype.DataType`) information. However, all tensors are expected to use single-precision floating point (float32) storage in FINN. This means we store even a 1-bit value as floating point for the purposes of representation. The FINN compiler flow is responsible for eventually producing a packed representation for the target hardware, where the 1-bit is actually stored as 1-bit. +Note that FINN uses floating point tensors as a carrier data type to represent integers. Floating point arithmetic can introduce rounding errors, e.g. (int_num * float_scale) / float_scale is not always equal to int_num. +When using the custom ONNX execution flow, FINN will attempt to sanitize any rounding errors for integer tensors. See (:py:mod:`finn.util.basic.sanitize_quant_values`) for more information. + Custom Operations/Nodes ======================= diff --git a/run-docker.sh b/run-docker.sh index e07556716db335421f57a390f1e6a17168ac058b..00ca8f86985a78d8f2af099c51dcd4b80cd2e974 100755 --- a/run-docker.sh +++ b/run-docker.sh @@ -65,6 +65,11 @@ DOCKER_INST_NAME="finn_dev_${DOCKER_UNAME}" # ensure Docker tag and inst. name are all lowercase DOCKER_TAG=$(echo "$DOCKER_TAG" | tr '[:upper:]' '[:lower:]') DOCKER_INST_NAME=$(echo "$DOCKER_INST_NAME" | tr '[:upper:]' '[:lower:]') +# Absolute path to this script, e.g. /home/user/bin/foo.sh +SCRIPT=$(readlink -f "$0") +# Absolute path this script is in, thus /home/user/bin +SCRIPTPATH=$(dirname "$SCRIPT") + # the settings below will be taken from environment variables if available, # otherwise the defaults below will be used : ${JUPYTER_PORT=8888} @@ -74,11 +79,7 @@ DOCKER_INST_NAME=$(echo "$DOCKER_INST_NAME" | tr '[:upper:]' '[:lower:]') : ${PYNQ_BOARD="Pynq-Z1"} : ${PYNQ_TARGET_DIR="/home/xilinx/$DOCKER_INST_NAME"} : ${NUM_DEFAULT_WORKERS=1} - -# Absolute path to this script, e.g. /home/user/bin/foo.sh -SCRIPT=$(readlink -f "$0") -# Absolute path this script is in, thus /home/user/bin -SCRIPTPATH=$(dirname "$SCRIPT") +: ${FINN_SSH_KEY_DIR="$SCRIPTPATH/ssh_keys"} BUILD_LOCAL=/tmp/$DOCKER_INST_NAME VIVADO_HLS_LOCAL=$VIVADO_PATH @@ -87,6 +88,7 @@ VIVADO_IP_CACHE=$BUILD_LOCAL/vivado_ip_cache # ensure build dir exists locally mkdir -p $BUILD_LOCAL mkdir -p $VIVADO_IP_CACHE +mkdir -p $FINN_SSH_KEY_DIR gecho "Instance is named as $DOCKER_INST_NAME" gecho "Mounting $BUILD_LOCAL into $BUILD_LOCAL" @@ -133,6 +135,7 @@ docker run -t --rm --name $DOCKER_INST_NAME $DOCKER_INTERACTIVE --init \ -v $SCRIPTPATH:/workspace/finn \ -v $BUILD_LOCAL:$BUILD_LOCAL \ -v $VIVADO_PATH:$VIVADO_PATH \ +-v $FINN_SSH_KEY_DIR:/home/$DOCKER_UNAME/.ssh \ -e VIVADO_PATH=$VIVADO_PATH \ -e FINN_INST_NAME=$DOCKER_INST_NAME \ -e FINN_ROOT="/workspace/finn" \ diff --git a/src/finn/core/onnx_exec.py b/src/finn/core/onnx_exec.py index c2f68a35076418e0cf2edb578bdb8d548772fc78..218df22e07537034b377abc077aa7902bc0c4cfc 100644 --- a/src/finn/core/onnx_exec.py +++ b/src/finn/core/onnx_exec.py @@ -39,6 +39,7 @@ from finn.core.remote_exec import remote_exec from finn.core.rtlsim_exec import rtlsim_exec from finn.custom_op.registry import getCustomOp import finn.analysis.topology as ta +from finn.util.basic import sanitize_quant_values def execute_node(node, context, graph): @@ -102,10 +103,7 @@ def execute_node(node, context, graph): raise Exception( """Output shapes disagree after node execution: found %s vs expected %s""" - % ( - str(output_list[list_ind].shape), - str(context[outp].shape), - ) + % (str(output_list[list_ind].shape), str(context[outp].shape)) ) context[outp] = output_list[list_ind] @@ -162,7 +160,14 @@ def execute_onnx(model, input_dict, return_full_exec_context=False): # we can simply walk down the list since the ONNX spec guarantees that it is # topologically sorted for node in graph.node: + # call util function match input values to quantization annotation + execution_context = sanitize_quant_values( + model, node.input, execution_context + ) execute_node(node, execution_context, graph) + execution_context = sanitize_quant_values( + model, node.output, execution_context + ) elif model_exec_mode == "remote_pynq": # use remote exec metadata built into model to execute on a remote PYNQ remote_exec(model, execution_context) diff --git a/src/finn/core/rtlsim_exec.py b/src/finn/core/rtlsim_exec.py index ad44dab578b396c80af35af2ede031baca798150..1e1bee3aa7435d5cab6cbf5ea23dd37dcdfa4380 100644 --- a/src/finn/core/rtlsim_exec.py +++ b/src/finn/core/rtlsim_exec.py @@ -66,6 +66,11 @@ def rtlsim_exec(model, execution_context): i_stream_w = first_node.get_instream_width() # convert input into time multiplexed shape i_folded_shape = first_node.get_folded_input_shape() + batchsize = i_tensor.shape[0] + # override batch size for input + i_folded_shape = list(i_folded_shape) + i_folded_shape[0] = batchsize + i_folded_shape = tuple(i_folded_shape) # TODO any other layout transformations need to happen here! i_tensor = i_tensor.reshape(i_folded_shape) # extract output shape @@ -74,12 +79,20 @@ def rtlsim_exec(model, execution_context): o_dt = model.get_tensor_datatype(o_name) last_node = getCustomOp(model.find_producer(o_name)) o_folded_shape = last_node.get_folded_output_shape() + # override batch size from actual input + o_shape = list(o_shape) + o_shape[0] = batchsize + o_shape = tuple(o_shape) + o_folded_shape = list(o_folded_shape) + o_folded_shape[0] = batchsize + o_folded_shape = tuple(o_folded_shape) o_stream_w = last_node.get_outstream_width() packedBits = o_stream_w targetBits = o_dt.bitwidth() # pack input packed_input = npy_to_rtlsim_input(i_tensor, i_dt, i_stream_w) num_out_values = last_node.get_number_output_values() + num_out_values *= batchsize # prepare pyverilator model rtlsim_so = model.get_metadata_prop("rtlsim_so") if (rtlsim_so is None) or (not os.path.isfile(rtlsim_so)): diff --git a/src/finn/core/throughput_test.py b/src/finn/core/throughput_test.py index 8d3dabcf8af51327d5d951464c6d9b36e2f67497..4444e7584f843cd0edb016b520d01d71e659b904 100644 --- a/src/finn/core/throughput_test.py +++ b/src/finn/core/throughput_test.py @@ -28,6 +28,10 @@ import os import subprocess +import numpy as np + +from finn.util.basic import gen_finn_dt_tensor +from finn.core.rtlsim_exec import rtlsim_exec def throughput_test(model, batchsize=1000): @@ -88,3 +92,50 @@ def throughput_test(model, batchsize=1000): return res except FileNotFoundError: return None + + +def throughput_test_rtlsim(model, batchsize=100): + """Runs a throughput test for the given IP-stitched model. When combined + with tracing, useful to determine bottlenecks and required FIFO sizes.""" + + assert ( + model.get_metadata_prop("exec_mode") == "rtlsim" + ), """Top-level exec_mode + metadata_prop must be set to rtlsim""" + + # create random input + iname = model.graph.input[0].name + ishape = model.get_tensor_shape(iname) + ishape_batch = ishape + ishape_batch[0] = batchsize + idt = model.get_tensor_datatype(iname) + dummy_input = gen_finn_dt_tensor(idt, ishape_batch) + # compute input/output sizes + oname = model.graph.output[0].name + oshape = model.get_tensor_shape(oname) + oshape_batch = oshape + oshape_batch[0] = batchsize + odt = model.get_tensor_datatype(oname) + i_bytes = (np.prod(ishape_batch) * idt.bitwidth()) / 8 + o_bytes = (np.prod(oshape_batch) * odt.bitwidth()) / 8 + # make empty exec context and insert input + ctx = model.make_empty_exec_context() + ctx[iname] = dummy_input + # remove liveness threshold, launch rtlsim + os.environ["LIVENESS_THRESHOLD"] = "-1" + rtlsim_exec(model, ctx) + # extract metrics + cycles = int(model.get_metadata_prop("sim_cycles")) + clk_ns = float(model.get_metadata_prop("clk_ns")) + fclk_mhz = 1 / (clk_ns * 0.001) + runtime_s = (cycles * clk_ns) * (10 ** -9) + res = dict() + res["cycles"] = cycles + res["runtime[ms]"] = runtime_s * 1000 + res["throughput[images/s]"] = batchsize / runtime_s + res["DRAM_in_bandwidth[Mb/s]"] = i_bytes * 0.000001 / runtime_s + res["DRAM_out_bandwidth[Mb/s]"] = o_bytes * 0.000001 / runtime_s + res["fclk[mhz]"] = fclk_mhz + res["N"] = batchsize + + return res diff --git a/src/finn/transformation/fpgadataflow/prepare_cppsim.py b/src/finn/transformation/fpgadataflow/prepare_cppsim.py index a1524322ec03a4e96ef41f999144e3eed349c5af..4f050be8540ddf5ef48699d1658b571852ff4510 100644 --- a/src/finn/transformation/fpgadataflow/prepare_cppsim.py +++ b/src/finn/transformation/fpgadataflow/prepare_cppsim.py @@ -29,9 +29,12 @@ import os import finn.custom_op.registry as registry -from finn.transformation import Transformation from finn.util.basic import make_build_dir from finn.util.fpgadataflow import is_fpgadataflow_node +from finn.transformation import Transformation +from finn.util.basic import get_num_default_workers +import multiprocessing as mp +import copy def _codegen_single_node(node, model): @@ -66,8 +69,40 @@ class PrepareCppSim(Transformation): that contains generated C++ code that can be used to simulate node using cppsim. The subsequent transformation is CompileCppSim""" + def __init__(self, num_workers=None): + super().__init__() + if num_workers is None: + self._num_workers = get_num_default_workers() + else: + self._num_workers = num_workers + assert self._num_workers >= 0, "Number of workers must be nonnegative." + if self._num_workers == 0: + self._num_workers = mp.cpu_count() + + def prepareCppSim_node(self, node): + print(node.name) + if is_fpgadataflow_node(node) is True: + _codegen_single_node(node, self.model) + return (node, False) + def apply(self, model): - for node in model.graph.node: - if is_fpgadataflow_node(node) is True: - _codegen_single_node(node, model) - return (model, False) + # Remove old nodes from the current model + self.model = copy.deepcopy(model) + old_nodes = [] + for i in range(len(model.graph.node)): + old_nodes.append(model.graph.node.pop()) + + # Execute transformation in parallel + with mp.Pool(self._num_workers) as p: + new_nodes_and_bool = p.map(self.prepareCppSim_node, old_nodes, chunksize=1) + + # extract nodes and check if the transformation needs to run again + # Note: .pop() had initially reversed the node order + run_again = False + for node, run in reversed(new_nodes_and_bool): + # Reattach new nodes to old model + model.graph.node.append(node) + if run is True: + run_again = True + + return (model, run_again) diff --git a/src/finn/transformation/fpgadataflow/synth_ooc.py b/src/finn/transformation/fpgadataflow/synth_ooc.py new file mode 100644 index 0000000000000000000000000000000000000000..1d49970c819961d1794cc89e998108639ca15593 --- /dev/null +++ b/src/finn/transformation/fpgadataflow/synth_ooc.py @@ -0,0 +1,64 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os +from shutil import copy2 + +from finn.transformation import Transformation +from finn.util.vivado import out_of_context_synth +from finn.util.basic import make_build_dir + + +class SynthOutOfContext(Transformation): + """Run out-of-context Vivado synthesis on a stitched IP design.""" + + def __init__(self, part, clk_period_ns, clk_name="ap_clk_0"): + super().__init__() + self.part = part + self.clk_period_ns = clk_period_ns + self.clk_name = clk_name + + def apply(self, model): + def file_to_basename(x): + return os.path.basename(os.path.realpath(x)) + + vivado_stitch_proj_dir = model.get_metadata_prop("vivado_stitch_proj") + assert vivado_stitch_proj_dir is not None, "Need stitched IP to run." + top_module_name = model.get_metadata_prop("wrapper_filename") + top_module_name = file_to_basename(top_module_name).strip(".v") + build_dir = make_build_dir("synth_out_of_context_") + with open(vivado_stitch_proj_dir + "/all_verilog_srcs.txt", "r") as f: + all_verilog_srcs = f.read().split() + for file in all_verilog_srcs: + if file.endswith(".v"): + copy2(file, build_dir) + ret = out_of_context_synth( + build_dir, top_module_name, self.part, self.clk_name, self.clk_period_ns + ) + model.set_metadata_prop("res_total_ooc_synth", str(ret)) + return (model, False) diff --git a/src/finn/transformation/lower_convs_to_matmul.py b/src/finn/transformation/lower_convs_to_matmul.py index 3da785d8dd21b2c6701bffc8ce3869fb14b237a9..aa231a43a3865a161a501b4997ff2f538800554f 100644 --- a/src/finn/transformation/lower_convs_to_matmul.py +++ b/src/finn/transformation/lower_convs_to_matmul.py @@ -80,14 +80,19 @@ class LowerConvsToMatMul(Transformation): inp_trans_out = inp_trans_out.name model.set_tensor_datatype(inp_trans_out, idt) - im2col_out = helper.make_tensor_value_info( - model.make_new_valueinfo_name(), - TensorProto.FLOAT, - (1, ofm_dim, ofm_dim, ifm_ch * k * k), - ) - graph.value_info.append(im2col_out) - im2col_out = im2col_out.name - model.set_tensor_datatype(im2col_out, idt) + need_im2col = True + if k == 1 and pad == 0 and stride == 1: + need_im2col = False + + if need_im2col: + im2col_out = helper.make_tensor_value_info( + model.make_new_valueinfo_name(), + TensorProto.FLOAT, + (1, ofm_dim, ofm_dim, ifm_ch * k * k), + ) + graph.value_info.append(im2col_out) + im2col_out = im2col_out.name + model.set_tensor_datatype(im2col_out, idt) matmul_out = helper.make_tensor_value_info( model.make_new_valueinfo_name(), @@ -104,19 +109,23 @@ class LowerConvsToMatMul(Transformation): "Transpose", [cnv_input], [inp_trans_out], perm=[0, 2, 3, 1] ) # lower input tensor - im2col_node = helper.make_node( - "Im2Col", - [inp_trans_out], - [im2col_out], - domain="finn", - stride=stride, - kernel_size=k, - pad_amount=pad, - input_shape="(1,{},{},{})".format(ifm_dim, ifm_dim, ifm_ch), - ) + matmul_input = inp_trans_out + if need_im2col: + matmul_input = im2col_out + im2col_node = helper.make_node( + "Im2Col", + [inp_trans_out], + [im2col_out], + domain="finn", + stride=stride, + kernel_size=k, + pad_amount=pad, + input_shape="(1,{},{},{})".format(ifm_dim, ifm_dim, ifm_ch), + ) + # do matmul matmul_node = helper.make_node( - "MatMul", [im2col_out, weight_name], [matmul_out] + "MatMul", [matmul_input, weight_name], [matmul_out] ) # NHWC -> NCHW out_trans_node = helper.make_node( @@ -124,9 +133,13 @@ class LowerConvsToMatMul(Transformation): ) # insert nodes where the conv is to preserve topological ordering graph.node.insert(node_ind, inp_trans_node) - graph.node.insert(node_ind + 1, im2col_node) - graph.node.insert(node_ind + 2, matmul_node) - graph.node.insert(node_ind + 3, out_trans_node) + if need_im2col: + graph.node.insert(node_ind + 1, im2col_node) + graph.node.insert(node_ind + 2, matmul_node) + graph.node.insert(node_ind + 3, out_trans_node) + else: + graph.node.insert(node_ind + 1, matmul_node) + graph.node.insert(node_ind + 2, out_trans_node) # remove old nodes graph.node.remove(n) model = model.transform(InferShapes()) diff --git a/src/finn/transformation/streamline/reorder.py b/src/finn/transformation/streamline/reorder.py index 0b6259a61d3eb67b7b38d4c6939019ce2893a875..b46b82c77a3f1b70a3b05d87cd3c48fc1d94fd45 100644 --- a/src/finn/transformation/streamline/reorder.py +++ b/src/finn/transformation/streamline/reorder.py @@ -27,12 +27,14 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import numpy as np +import warnings from onnx import helper as oh from finn.transformation import Transformation from finn.transformation.infer_shapes import InferShapes from finn.core.onnx_exec import execute_node from finn.util.basic import get_by_name +from finn.custom_op.registry import getCustomOp class MoveAddPastMul(Transformation): @@ -531,3 +533,67 @@ class MoveMulPastFork(MoveOpPastFork): class MoveLinearPastFork(MoveOpPastFork): def __init__(self): super().__init__(["Add", "Mul"]) + + +class MoveMaxPoolPastMultiThreshold(Transformation): + """Move MaxPool nodes past MultiThreshold nodes on linear segments of the graph.""" + + def apply(self, model): + graph = model.graph + node_ind = 0 + graph_modified = False + nodes = [n for n in graph.node] + for n in nodes: + node_ind += 1 + if n.op_type == "MaxPool" and not model.is_fork_node(n): + consumer = model.find_consumer(n.output[0]) + pads = get_by_name(n.attribute, "pads") + has_padding = False + if pads is not None: + pads = list(pads.ints) + has_padding = np.prod(pads) != 0 + if consumer is not None and consumer.op_type == "MultiThreshold": + mt_out = consumer.output[0] + mt_odt = model.get_tensor_datatype(mt_out) + if mt_odt.signed() and has_padding: + warnings.warn( + "Skipping padded MaxPool + signed-output MultiThreshold" + ) + continue + # check for non-decreasing thresholds and nonnegative + # scale factor in MultiThreshold + # otherwise we cannot do the reordering + T = model.get_initializer(consumer.input[1]) + T_sorted = np.sort(T, axis=1) + assert ( + T == T_sorted + ).all(), "MultiThreshold must have non-decreasing thresholds" + mt_inst = getCustomOp(consumer) + if mt_inst.get_nodeattr("out_scale") < 0: + warnings.warn("Skipping MultiThreshold with negative out_scale") + continue + + # remove old nodes + graph.node.remove(n) + graph.node.remove(consumer) + + # swap conections + group_in = n.input[0] + # new tensor because dims change + group_middle = model.make_new_valueinfo_name() + group_out = consumer.output[0] + + consumer.input[0] = group_in + consumer.output[0] = group_middle + + n.input[0] = group_middle + n.output[0] = group_out + + # insert them back in + graph.node.insert(node_ind - 1, consumer) + graph.node.insert(node_ind, n) + + graph_modified = True + + model = model.transform(InferShapes()) + return (model, graph_modified) diff --git a/src/finn/util/basic.py b/src/finn/util/basic.py index d3bfb73fe239d7194fab3760555663895a209e84..585d6b90a59ca9f3dac56a6133de705c2f56f025 100644 --- a/src/finn/util/basic.py +++ b/src/finn/util/basic.py @@ -31,6 +31,7 @@ import random import string import subprocess import tempfile +import warnings import numpy as np @@ -70,6 +71,16 @@ def get_rtlsim_trace_depth(): return 1 +def get_remote_vivado(): + """Return the address of the remote Vivado synthesis server as set by the, + REMOTE_VIVADO environment variable, otherwise return None""" + + try: + return os.environ["REMOTE_VIVADO"] + except KeyError: + return None + + def get_num_default_workers(): """Return the number of workers for parallel transformations. Controllable via the NUM_DEFAULT_WORKERS environment variable. If the env.var. is @@ -254,6 +265,69 @@ def calculate_signed_dot_prod_range(dt_a, dt_b, len): return (min_prod, max_prod) +def sanitize_quant_values(model, node_tensors, execution_context, check_values=False): + """ Sanitize given list of tensors in execution_context by rounding values + that are supposed to be integers (as indicated by their quantization + annotation). Will raise an assertion if the amount of rounding is too large. + Returns the sanitized execution context. + + If check_values is specified, an extra DataType.allowed() check will be + performed on any rounded tensors. + + Background: + FINN uses floating point tensors as a carrier data type to represent + integers. Floating point arithmetic can introduce rounding errors, e.g. + (int_num * float_scale) / float_scale is not always equal to int_num. + We use this function to ensure that the values that are supposed to be + integers are indeed integers. + """ + + for tensor in node_tensors: + dtype = model.get_tensor_datatype(tensor) + # floats don't need sanitization, skip to next + # introduces less quicker runtime + if dtype == DataType.FLOAT32: + continue + current_values = execution_context[tensor] + updated_values = current_values + has_to_be_rounded = False + # TODO: vectorize with numpy + for value in np.nditer(current_values): + if not dtype.allowed(value): + has_to_be_rounded = True + break + if has_to_be_rounded: + updated_values = np.round(current_values) + warnings.warn( + "The values of tensor {} can't be represented " + "with the set FINN datatype ({}), they will be rounded to match the " + "FINN datatype.".format(tensor, dtype) + ) + # check if rounded values are not too far from original values + max_error = max(np.abs(current_values - updated_values).flatten()) + if max_error <= 1e-4: + if check_values is True: + # check again if values can now be represented with set finn datatype + # TODO: vectorize with numpy + for value in np.nditer(updated_values): + if not dtype.allowed(value): + raise Exception( + """Values can't be represented with set + finn datatype ({}) for input {}""".format( + dtype, tensor + ) + ) + execution_context[tensor] = updated_values + else: + raise Exception( + """Rounding error is too high to match set FINN + datatype ({}) for input {}""".format( + dtype, tensor + ) + ) + return execution_context + + class CppBuilder: """Builds the g++ compiler command to produces the executable of the c++ code in code_gen_dir which is passed to the function build() of this class.""" diff --git a/src/finn/util/create.py b/src/finn/util/create.py new file mode 100644 index 0000000000000000000000000000000000000000..853cdd0d44a05426b34bf1db3caa58d9289b2e9e --- /dev/null +++ b/src/finn/util/create.py @@ -0,0 +1,178 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import numpy as np +from finn.core.modelwrapper import ModelWrapper +from onnx import TensorProto, helper +from finn.core.datatype import DataType +from finn.util.basic import calculate_signed_dot_prod_range, gen_finn_dt_tensor + + +def hls_random_mlp_maker(layer_spec): + """Create an MLP of given specification using HLSCustomOp instances. + Generate random weights/thresholds of appropriate size.""" + ret = [] + for l in layer_spec: + idt = l["idt"] + wdt = l["wdt"] + mw = l["mw"] + mh = l["mh"] + act = l["act"] + l["W"] = gen_finn_dt_tensor(wdt, (mw, mh)) + if act is None: + # no activation, produce accumulators + T = None + tdt = None + if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR: + odt = DataType.UINT32 + else: + odt = DataType.INT32 + else: + odt = act + (min, max) = calculate_signed_dot_prod_range(idt, wdt, mw) + n_steps = act.get_num_possible_values() - 1 + T = np.random.randint(min, max - 1, (mh, n_steps)).astype(np.float32) + # provide non-decreasing thresholds + T = np.sort(T, axis=1) + # generate thresholds for activation + if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR: + tdt = DataType.UINT32 + # bias thresholds to be positive + T = np.ceil((T + mw) / 2) + assert (T >= 0).all() + else: + tdt = DataType.INT32 + l["T"] = T + l["tdt"] = tdt + l["odt"] = odt + ret.append(l) + + return hls_mlp_maker(ret) + + +def hls_mlp_maker(layer_spec): + """Create an MLP of given specification using HLSCustomOp instances.""" + + current_in_name = "" + current_out_name = "" + i = 0 + + graph = helper.make_graph(nodes=[], name="mlp", inputs=[], outputs=[]) + + model = helper.make_model(graph, producer_name="finn") + model = ModelWrapper(model) + + for l in layer_spec: + current_W_name = "W_%d" % i + current_T_name = "T_%d" % i + current_in_name = "act_%d" % i + current_out_name = "act_%d" % (i + 1) + + W = l["W"] + (mw, mh) = W.shape + T = l["T"] + pe = l["pe"] + simd = l["simd"] + wdt = l["wdt"] + idt = l["idt"] + tdt = l["tdt"] + odt = l["odt"] + + if i == 0: + global_in = helper.make_tensor_value_info( + current_in_name, TensorProto.FLOAT, [1, mw] + ) + model.graph.input.append(global_in) + + if i == len(layer_spec) - 1: + global_out = helper.make_tensor_value_info( + current_out_name, TensorProto.FLOAT, [1, mh] + ) + model.graph.output.append(global_out) + + # there are two ways to implement bipolar weights and inputs for + # StreamingFC: + # - specify their datatypes as such + # - specify their datatypes as BINARY as use binaryXnorMode + if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR: + # we'll internally convert weights/inputs to binary and specify the + # datatypes as such, and also set the binaryXnorMode attribute to 1 + export_wdt = DataType.BINARY + export_idt = DataType.BINARY + binary_xnor_mode = 1 + else: + export_wdt = wdt + export_idt = idt + binary_xnor_mode = 0 + + if T is not None: + no_act = 0 + node_inp_list = [current_in_name, current_W_name, current_T_name] + if odt == DataType.BIPOLAR: + actval = 0 + else: + actval = odt.min() + else: + # no thresholds + node_inp_list = [current_in_name, current_W_name] + actval = 0 + no_act = 1 + FCLayer_node = helper.make_node( + "StreamingFCLayer_Batch", + node_inp_list, + [current_out_name], + domain="finn", + backend="fpgadataflow", + resType="ap_resource_lut()", + MW=mw, + MH=mh, + SIMD=simd, + PE=pe, + inputDataType=export_idt.name, + weightDataType=export_wdt.name, + outputDataType=odt.name, + ActVal=actval, + binaryXnorMode=binary_xnor_mode, + noActivation=no_act, + ) + + model.graph.node.append(FCLayer_node) + model.set_tensor_datatype(current_in_name, idt) + model.set_tensor_datatype(current_out_name, odt) + model.set_tensor_datatype(current_W_name, wdt) + if binary_xnor_mode: + # convert bipolar to binary + model.set_initializer(current_W_name, (W + 1) / 2) + else: + model.set_initializer(current_W_name, W) + if T is not None: + model.set_tensor_datatype(current_T_name, tdt) + model.set_initializer(current_T_name, T) + i += 1 + + return model diff --git a/src/finn/util/vivado.py b/src/finn/util/vivado.py new file mode 100644 index 0000000000000000000000000000000000000000..6b6df3940cfeeed292345382471719c49f725de6 --- /dev/null +++ b/src/finn/util/vivado.py @@ -0,0 +1,147 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os +import subprocess +import stat +from finn.util.basic import get_remote_vivado + + +def which(program): + "Python equivalent of the shell cmd 'which'." + + # source: + # https://stackoverflow.com/questions/377017/test-if-executable-exists-in-python + def is_exe(fpath): + return os.path.isfile(fpath) and os.access(fpath, os.X_OK) + + fpath, fname = os.path.split(program) + if fpath: + if is_exe(program): + return program + else: + for path in os.environ["PATH"].split(os.pathsep): + exe_file = os.path.join(path, program) + if is_exe(exe_file): + return exe_file + + return None + + +def out_of_context_synth( + verilog_dir, + top_name, + fpga_part="xczu3eg-sbva484-1-e", + clk_name="ap_clk_0", + clk_period_ns=5.0, + remote_server=get_remote_vivado(), +): + "Run out-of-context Vivado synthesis, return resources and slack." + + # ensure that the OH_MY_XILINX envvar is set + if "OHMYXILINX" not in os.environ: + raise Exception("The environment variable OHMYXILINX is not defined.") + # ensure that vivado is in PATH: source $VIVADO_PATH/settings64.sh + if which("vivado") is None: + raise Exception("vivado is not in PATH, ensure settings64.sh is sourced.") + omx_path = os.environ["OHMYXILINX"] + if remote_server is None: + script = "vivadocompile.sh" + else: + script = "vivadoprojgen.sh" + # vivadocompile.sh <top-level-entity> <clock-name (optional)> <fpga-part (optional)> + call_omx = "zsh %s/%s %s %s %s %f" % ( + omx_path, + script, + top_name, + clk_name, + fpga_part, + float(clk_period_ns), + ) + call_omx = call_omx.split() + proc = subprocess.Popen( + call_omx, cwd=verilog_dir, stdout=subprocess.PIPE, env=os.environ + ) + proc.communicate() + + vivado_proj_folder = "%s/results_%s" % (verilog_dir, top_name) + res_counts_path = vivado_proj_folder + "/res.txt" + if remote_server is not None: + print("Using remote Vivado OOC synth, remote server %s" % remote_server) + run_synth = """ +#!/bin/bash +which vivado; +cd %s; +vivado -mode tcl -source %s.tcl -tclargs %s; +cat %s + """ % ( + vivado_proj_folder, + top_name, + top_name, + res_counts_path, + ) + with open(vivado_proj_folder + "/run.sh", "w") as f: + f.write(run_synth) + st = os.stat(vivado_proj_folder + "/run.sh") + os.chmod(vivado_proj_folder + "/run.sh", st.st_mode | stat.S_IEXEC) + # note that this assumes the same temp folder can be created on the + # remote server + # note we set target path as / due to use of -R (relative) + remote_server_uri = remote_server + ":/" + copy_files = "rsync -avzR %s %s" % (verilog_dir + "/", remote_server_uri) + copy_files = copy_files.split() + proc = subprocess.Popen(copy_files, cwd=verilog_dir, env=os.environ) + proc.communicate() + vivado_cmd = "bash -ic %s/run.sh" % vivado_proj_folder + run_vivado = ["ssh", "-t", remote_server, vivado_cmd] + proc = subprocess.Popen(run_vivado, cwd=verilog_dir, env=os.environ) + proc.communicate() + remote_server_result = remote_server + ":" + res_counts_path + copy_results = "rsync -avz %s %s" % (remote_server_result, res_counts_path) + copy_results = copy_results.split() + proc = subprocess.Popen(copy_results, cwd=verilog_dir, env=os.environ) + proc.communicate() + + with open(res_counts_path, "r") as myfile: + res_data = myfile.read().split("\n") + ret = {} + ret["vivado_proj_folder"] = vivado_proj_folder + for res_line in res_data: + res_fields = res_line.split("=") + print(res_fields) + try: + ret[res_fields[0]] = float(res_fields[1]) + except ValueError: + ret[res_fields[0]] = 0 + except IndexError: + ret[res_fields[0]] = 0 + if ret["WNS"] == 0: + ret["fmax_mhz"] = 0 + else: + ret["fmax_mhz"] = 1000.0 / (clk_period_ns - ret["WNS"]) + return ret diff --git a/tests/brevitas/test_brevitas_relu_act_export.py b/tests/brevitas/test_brevitas_relu_act_export.py index c9d8f2d812bc7bea1a2fd2598a7711099ad421e6..c5ddad12ca3e8d353682fbb20449d44358485f69 100644 --- a/tests/brevitas/test_brevitas_relu_act_export.py +++ b/tests/brevitas/test_brevitas_relu_act_export.py @@ -23,6 +23,7 @@ export_onnx_path = "test_act.onnx" def test_brevitas_act_export_relu(abits, max_val, scaling_impl_type): min_val = -1.0 ishape = (1, 15) + b_act = QuantReLU( bit_width=abits, max_val=max_val, @@ -67,3 +68,60 @@ scaling_impl.learned_value": torch.tensor( assert np.isclose(produced, expected, atol=1e-3).all() os.remove(export_onnx_path) + + +@pytest.mark.parametrize("abits", [1, 2, 4, 8]) +@pytest.mark.parametrize("max_val", [1.0, 1.5, 1 - 2 ** (-7)]) +@pytest.mark.parametrize("scaling_per_channel", [True, False]) +def test_brevitas_act_export_relu_imagenet(abits, max_val, scaling_per_channel): + out_channels = 32 + ishape = (1, out_channels, 1, 1) + min_val = -1.0 + b_act = QuantReLU( + bit_width=abits, + quant_type=QuantType.INT, + scaling_impl_type=ScalingImplType.PARAMETER, + scaling_per_channel=scaling_per_channel, + restrict_scaling_type=RestrictValueType.LOG_FP, + scaling_min_val=2e-16, + max_val=6.0, + return_quant_tensor=True, + per_channel_broadcastable_shape=(1, out_channels, 1, 1), + ) + if scaling_per_channel is True: + rand_tensor = (2) * torch.rand((1, out_channels, 1, 1)) + else: + rand_tensor = torch.tensor(1.2398) + checkpoint = { + "act_quant_proxy.fused_activation_quant_proxy.tensor_quant.\ +scaling_impl.learned_value": rand_tensor.type( + torch.FloatTensor + ) + } + b_act.load_state_dict(checkpoint) + bo.export_finn_onnx(b_act, ishape, export_onnx_path) + model = ModelWrapper(export_onnx_path) + model = model.transform(InferShapes()) + inp_tensor = np.random.uniform(low=min_val, high=max_val, size=ishape).astype( + np.float32 + ) + idict = {model.graph.input[0].name: inp_tensor} + odict = oxe.execute_onnx(model, idict, True) + produced = odict[model.graph.output[0].name] + inp_tensor = torch.from_numpy(inp_tensor).float() + b_act.eval() + expected = b_act.forward(inp_tensor).tensor.detach().numpy() + if not np.isclose(produced, expected, atol=1e-3).all(): + print(abits, max_val) + print("scale: ", b_act.quant_act_scale().type(torch.FloatTensor).detach()) + if abits < 5: + print( + "thres:", + ", ".join(["{:8.4f}".format(x) for x in b_act.export_thres[0]]), + ) + print("input:", ", ".join(["{:8.4f}".format(x) for x in inp_tensor[0]])) + print("prod :", ", ".join(["{:8.4f}".format(x) for x in produced[0]])) + print("expec:", ", ".join(["{:8.4f}".format(x) for x in expected[0]])) + + assert np.isclose(produced, expected, atol=1e-3).all() + os.remove(export_onnx_path) diff --git a/tests/core/test_basic_onnx_exec.py b/tests/core/test_basic_onnx_exec.py index a7b6da9965aa5912870812a8c1f8d6da2ee0d181..7b0412432cc6360cb9c42d66417bd187ed142563 100644 --- a/tests/core/test_basic_onnx_exec.py +++ b/tests/core/test_basic_onnx_exec.py @@ -35,6 +35,8 @@ import onnx.numpy_helper as np_helper import finn.core.onnx_exec as oxe from finn.core.modelwrapper import ModelWrapper from finn.transformation.infer_shapes import InferShapes +from finn.core.datatype import DataType +from finn.util.basic import gen_finn_dt_tensor def test_mnist_onnx_download_extract_run(): @@ -53,3 +55,30 @@ def test_mnist_onnx_download_extract_run(): assert np.isclose( np_helper.to_array(output_tensor), output_dict["Plus214_Output_0"], atol=1e-3 ).all() + + +def test_onnx_exec_internal_rounding(): + inp0 = onnx.helper.make_tensor_value_info("inp0", onnx.TensorProto.FLOAT, [2, 2]) + inp1 = onnx.helper.make_tensor_value_info("inp1", onnx.TensorProto.FLOAT, [1]) + outp = onnx.helper.make_tensor_value_info("outp", onnx.TensorProto.FLOAT, [2, 2]) + mul_node = onnx.helper.make_node("Mul", inputs=["inp0", "inp1"], outputs=["outp"],) + graph = onnx.helper.make_graph( + nodes=[mul_node], name="mul_graph", inputs=[inp0, inp1], outputs=[outp] + ) + + model = onnx.helper.make_model(graph, producer_name="mul-model") + model = ModelWrapper(model) + idt = DataType.INT2 + model.set_tensor_datatype("inp0", idt) + model.set_tensor_datatype("inp1", idt) + model.transform(InferShapes()) + + mul_value = np.asarray([-1], dtype=np.float32) + inp_int = gen_finn_dt_tensor(idt, [2, 2]) + scale = np.random.uniform(low=0, high=1, size=(2, 2)).astype(np.float32) + inp_rounded = (inp_int * scale) / (scale + 1e-7) + input_dict = {"inp0": inp_rounded, "inp1": mul_value} + output_dict = oxe.execute_onnx(model, input_dict) + produced = output_dict["outp"] + expected = np.multiply(inp_int, mul_value) + assert (produced == expected).all() diff --git a/tests/end2end/test_end2end_cnv_w1a1.py b/tests/end2end/test_end2end_cnv_w1a1.py index c3359dcc82650bf0e9e8a5bc5276f5ca770ee96c..e3f281904d7db1349d74d6eb70cad20a8f3d10af 100644 --- a/tests/end2end/test_end2end_cnv_w1a1.py +++ b/tests/end2end/test_end2end_cnv_w1a1.py @@ -72,6 +72,7 @@ from finn.util.test import get_test_model_trained, load_test_checkpoint_or_skip from finn.transformation.fpgadataflow.annotate_resources import AnnotateResources from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO +from finn.core.throughput_test import throughput_test_rtlsim build_dir = "/tmp/" + os.environ["FINN_INST_NAME"] test_pynq_board = os.getenv("PYNQ_BOARD", default="Pynq-Z1") @@ -142,15 +143,15 @@ def test_end2end_cnv_w1a1_fold_and_tlastmarker(): fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch") # each tuple is (PE, SIMD, in_fifo_depth) for a layer folding = [ - (16, 3, 128), - (32, 32, 128), - (16, 32, 128), - (16, 32, 128), - (4, 32, 81), + (16, 3, 256), + (32, 32, 256), + (16, 32, 256), + (16, 32, 256), + (4, 32, 214), (1, 32, 2), - (1, 4, 2), - (1, 8, 128), - (5, 1, 3), + (1, 4, 126), + (1, 8, 62), + (5, 1, 6), ] for fcl, (pe, simd, ififodepth) in zip(fc_layers, folding): fcl_inst = getCustomOp(fcl) @@ -159,10 +160,12 @@ def test_end2end_cnv_w1a1_fold_and_tlastmarker(): fcl_inst.set_nodeattr("inFIFODepth", ififodepth) swg_layers = model.get_nodes_by_op_type("ConvolutionInputGenerator") + swg_idepth = [2, 51, 9, 106, 2, 2] for i in range(len(swg_layers)): swg_inst = getCustomOp(swg_layers[i]) simd = folding[i][1] swg_inst.set_nodeattr("SIMD", simd) + swg_inst.set_nodeattr("inFIFODepth", swg_idepth[i]) model = model.transform(InsertDWC()) model = model.transform(InsertFIFO()) @@ -221,6 +224,20 @@ def test_end2end_cnv_w1a1_verify_dataflow_part(): assert np.isclose(res_cppsim, res_rtlsim_whole).all() +@pytest.mark.vivado +def test_end2end_cnv_w1a1_throughput_test_rtlsim(): + model = load_test_checkpoint_or_skip( + build_dir + "/end2end_cnv_w1a1_ipstitch_whole_rtlsim.onnx" + ) + model.set_metadata_prop("rtlsim_trace", "rtlsim_trace.vcd") + # os.environ["RTLSIM_TRACE_DEPTH"] = "4" + # run through IP-stitched rtlsim with increasing batch sizes and + # check the number of cycles it takes to execute + ret = throughput_test_rtlsim(model, 10) + # TODO check for expected performance + assert ret["cycles"] > 0 + + @pytest.mark.vivado def test_end2end_cnv_w1a1_verify_all(): # use the streamlined model as the "golden" model for right answers diff --git a/tests/end2end/test_end2end_tfc_w1a1.py b/tests/end2end/test_end2end_tfc_w1a1.py index 13758e01e1df96a79658f5ebc7501c9fb43d0882..ebfed5e571f1e7e2499c3501c6859239a329677a 100644 --- a/tests/end2end/test_end2end_tfc_w1a1.py +++ b/tests/end2end/test_end2end_tfc_w1a1.py @@ -72,6 +72,7 @@ from finn.util.basic import pynq_part_map from finn.util.test import get_test_model_trained, load_test_checkpoint_or_skip from finn.transformation.fpgadataflow.annotate_resources import AnnotateResources from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim +from finn.core.throughput_test import throughput_test_rtlsim import finn.util.vcd as vcd build_dir = "/tmp/" + os.environ["FINN_INST_NAME"] @@ -225,6 +226,21 @@ def test_end2end_tfc_w1a1_verify_fifo_fullness(): ) +@pytest.mark.vivado +def test_end2end_tfc_w1a1_throughput_test_rtlsim(): + model = load_test_checkpoint_or_skip( + build_dir + "/end2end_tfc_w1a1_ipstitch_whole_rtlsim.onnx" + ) + # run through IP-stitched rtlsim with increasing batch sizes and + # check the number of cycles it takes to execute + ret = throughput_test_rtlsim(model, 1) + assert ret["cycles"] == 205 + ret = throughput_test_rtlsim(model, 10) + assert ret["cycles"] == 844 + ret = throughput_test_rtlsim(model, 100) + assert ret["cycles"] == 7234 + + @pytest.mark.vivado def test_end2end_tfc_w1a1_verify_all(): # use the streamlined model as the "golden" model for right answers diff --git a/tests/fpgadataflow/test_fpgadataflow_ip_stitch.py b/tests/fpgadataflow/test_fpgadataflow_ipstitch.py similarity index 94% rename from tests/fpgadataflow/test_fpgadataflow_ip_stitch.py rename to tests/fpgadataflow/test_fpgadataflow_ipstitch.py index 16100522aa94fd25d234efa1d03edfdc866ca1bb..b830693c32afe629dd6fc70868d0bddacac4c887 100644 --- a/tests/fpgadataflow/test_fpgadataflow_ip_stitch.py +++ b/tests/fpgadataflow/test_fpgadataflow_ipstitch.py @@ -53,6 +53,7 @@ from finn.transformation.general import GiveUniqueNodeNames from finn.util.basic import gen_finn_dt_tensor, pynq_part_map from finn.util.fpgadataflow import pyverilate_stitched_ip from finn.util.test import load_test_checkpoint_or_skip +from finn.transformation.fpgadataflow.synth_ooc import SynthOutOfContext test_pynq_board = os.getenv("PYNQ_BOARD", default="Pynq-Z1") test_fpga_part = pynq_part_map[test_pynq_board] @@ -281,6 +282,27 @@ def test_fpgadataflow_ipstitch_rtlsim(): assert (rtlsim_res == x).all() +@pytest.mark.vivado +@pytest.mark.slow +def test_fpgadataflow_ipstitch_synth_ooc(): + model = load_test_checkpoint_or_skip( + ip_stitch_model_dir + "/test_fpgadataflow_ip_stitch.onnx" + ) + model = model.transform(SynthOutOfContext(test_fpga_part, 5)) + ret = model.get_metadata_prop("res_total_ooc_synth") + assert ret is not None + # example expected output: (details may differ based on Vivado version etc) + # "{'vivado_proj_folder': ..., + # 'LUT': 708.0, 'FF': 1516.0, 'DSP': 0.0, 'BRAM': 0.0, 'WNS': 0.152, '': 0, + # 'fmax_mhz': 206.27062706270627}" + ret = eval(ret) + assert ret["LUT"] > 0 + assert ret["FF"] > 0 + assert ret["DSP"] == 0 + assert ret["BRAM"] == 0 + assert ret["fmax_mhz"] > 100 + + @pytest.mark.vivado def test_fpgadataflow_ipstitch_pynq_projgen(): model = load_test_checkpoint_or_skip( diff --git a/tests/transformation/test_conv_lowering.py b/tests/transformation/test_conv_lowering.py index 2cbc8e558940517168678b05c3bb46af8170abce..73891ded1b9691c7c48a2075ad6ca4668fcf6bfe 100644 --- a/tests/transformation/test_conv_lowering.py +++ b/tests/transformation/test_conv_lowering.py @@ -26,12 +26,13 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import onnx.helper as oh +from onnx import TensorProto import os import pkg_resources as pk import brevitas.onnx as bo import numpy as np - from finn.core.modelwrapper import ModelWrapper from finn.transformation.fold_constants import FoldConstants from finn.transformation.infer_shapes import InferShapes @@ -65,3 +66,51 @@ def test_conv_lowering_cnv_w1a1(): assert np.isclose(produced, expected).all() assert np.argmax(produced) == 3 os.remove(export_onnx_path) + + +def test_conv_lowering_conv_1x1(): + np.random.seed(0) + + in_feature_dim = 7 + in_chn = 3 + kernel_size = 1 + out_feature_dim = in_feature_dim + + input_shape = [1, in_chn, in_feature_dim, in_feature_dim] + output_shape = [1, in_chn, out_feature_dim, out_feature_dim] + + conv_param_shape = [in_chn, in_chn, kernel_size, kernel_size] + + conv_config = {} + conv_config["dilations"] = [1, 1] + conv_config["group"] = 1 + conv_config["kernel_shape"] = [kernel_size, kernel_size] + conv_config["pads"] = [0, 0, 0, 0] + conv_config["strides"] = [1, 1] + + top_in = oh.make_tensor_value_info("top_in", TensorProto.FLOAT, input_shape) + top_out = oh.make_tensor_value_info("top_out", TensorProto.FLOAT, output_shape) + + value_info = [oh.make_tensor_value_info("p1", TensorProto.FLOAT, conv_param_shape)] + + modelproto = oh.make_model( + oh.make_graph( + name="test", + inputs=[top_in], + outputs=[top_out], + value_info=value_info, + nodes=[oh.make_node("Conv", ["top_in", "p1"], ["top_out"], **conv_config)], + ) + ) + model = ModelWrapper(modelproto) + model = model.transform(InferShapes()) + model.set_initializer("p1", np.random.rand(*conv_param_shape).astype(np.float32)) + + new_model = model.transform(LowerConvsToMatMul()) + inp_dict = {"top_in": np.random.rand(*input_shape).astype(np.float32)} + + assert oxe.compare_execution(model, new_model, inp_dict) + assert new_model.graph.node[0].op_type == "Transpose" + assert new_model.graph.node[1].op_type == "MatMul" + assert new_model.graph.node[2].op_type == "Transpose" + assert len(new_model.graph.node) == 3 diff --git a/tests/transformation/test_move_maxpool_past_multithreshold.py b/tests/transformation/test_move_maxpool_past_multithreshold.py new file mode 100644 index 0000000000000000000000000000000000000000..2fc19debf8d6fc89d15e3d731f1e54daa491c321 --- /dev/null +++ b/tests/transformation/test_move_maxpool_past_multithreshold.py @@ -0,0 +1,100 @@ +from onnx import TensorProto, helper +import numpy as np + +import finn.core.onnx_exec as oxe +from finn.core.modelwrapper import ModelWrapper +from finn.transformation.streamline.reorder import MoveMaxPoolPastMultiThreshold +from finn.transformation.infer_shapes import InferShapes +from finn.transformation.infer_datatypes import InferDataTypes + + +def get_multithreshold_rand_params(channels, num_of_thres, seed=None): + if seed is not None: + np.random.seed(seed) + steps = np.random.rand(channels, 1) * 2 + bias = np.random.rand(channels, 1) * 10 + thres = [np.arange(num_of_thres) for chn in range(channels)] + thres = ((thres - bias) * steps).astype(np.float32) + return thres + + +def test_move_maxpool_past_multithreshold(): + # generate test vectors of correct shape + ch = 64 + ifmdim = 16 + ofmdim = 16 // 4 + input_shape = (1, ch, ifmdim, ifmdim) + output_shape = (1, ch, ofmdim, ofmdim) + + top_in = helper.make_tensor_value_info("top_in", TensorProto.FLOAT, input_shape) + top_out = helper.make_tensor_value_info("top_out", TensorProto.FLOAT, output_shape) + + maxpool_config = {} + maxpool_config["pads"] = [1, 1, 1, 1] + maxpool_config["kernel_shape"] = [3, 3] + maxpool_config["strides"] = [2, 2] + + value_info = [] + thres1_shape = [1, 1] + value_info += [ + helper.make_tensor_value_info("thres1", TensorProto.FLOAT, thres1_shape) + ] + + thres2_shape = [ch, 14] + value_info += [ + helper.make_tensor_value_info("thres2", TensorProto.FLOAT, thres2_shape) + ] + + nodes = [] + nodes += [helper.make_node("MaxPool", ["top_in"], ["t1"], **maxpool_config)] + nodes += [ + helper.make_node( + "MultiThreshold", + ["t1", "thres1"], + ["t2"], + domain="finn", + out_dtype="BIPOLAR", + out_bias=-1.0, + out_scale=1.0, + ) + ] + nodes += [helper.make_node("MaxPool", ["t2"], ["t3"], **maxpool_config)] + nodes += [ + helper.make_node( + "MultiThreshold", + ["t3", "thres2"], + ["top_out"], + domain="finn", + out_dtype="UINT4", + ) + ] + + modelproto = helper.make_model( + helper.make_graph( + name="test", + inputs=[top_in], + outputs=[top_out], + value_info=value_info, + nodes=nodes, + ) + ) + model = ModelWrapper(modelproto) + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + + model.set_initializer("thres1", np.array([[0]])) + model.set_initializer( + "thres2", get_multithreshold_rand_params(*thres2_shape, seed=0) + ) + + # Transform + new_model = model.transform(MoveMaxPoolPastMultiThreshold()) + inp_dict = {"top_in": np.random.rand(*input_shape).astype(np.float32)} + + # Test + assert oxe.compare_execution(model, new_model, inp_dict) + assert new_model.graph.node[0].op_type == "MaxPool" + assert new_model.graph.node[1].op_type == "MultiThreshold" + assert new_model.graph.node[2].op_type == "MultiThreshold" + assert new_model.graph.node[3].op_type == "MaxPool" + assert len(new_model.graph.node) == 4 diff --git a/tests/util/test_create.py b/tests/util/test_create.py new file mode 100644 index 0000000000000000000000000000000000000000..7173add35abf04a35c33b0ef10b42ffdb296a653 --- /dev/null +++ b/tests/util/test_create.py @@ -0,0 +1,64 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import pytest +import finn.util.create as create +from finn.core.datatype import DataType + + +@pytest.mark.parametrize("bitwidth", [DataType.BIPOLAR, DataType.INT2, DataType.INT4]) +def test_hls_random_mlp_maker(bitwidth): + w = bitwidth + a = bitwidth + layer_spec = [ + { + "mw": 185, + "mh": 100, + "simd": 185, + "pe": 100, + "idt": DataType.BIPOLAR, + "wdt": w, + "act": a, + }, + {"mw": 100, "mh": 100, "simd": 100, "pe": 100, "idt": a, "wdt": w, "act": a}, + {"mw": 100, "mh": 100, "simd": 100, "pe": 100, "idt": a, "wdt": w, "act": a}, + {"mw": 100, "mh": 100, "simd": 100, "pe": 100, "idt": a, "wdt": w, "act": a}, + { + "mw": 100, + "mh": 1, + "simd": 100, + "pe": 1, + "idt": a, + "wdt": w, + "act": DataType.BIPOLAR, + }, + ] + + ret = create.hls_random_mlp_maker(layer_spec) + assert len(ret.graph.node) == 5 + ret.save("mlp-%s.onnx" % str(bitwidth))