diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 2fbb9265beb49644f08a2c6e916ab9c23d4bd339..20f5b48f7acc65ab18702ef2509e9791f919b825 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -13,10 +13,10 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Setup Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v3 - name: Run Lint - uses: pre-commit/action@v2.0.0 + uses: pre-commit/action@v3.0.0 diff --git a/.github/workflows/quicktest-dev-pr.yml b/.github/workflows/quicktest-dev-pr.yml index d188007465cd27662ffadfb3ece0d8bf2e8e28be..ec92c84665d868b8a4376c82ecdf72395f1367a8 100644 --- a/.github/workflows/quicktest-dev-pr.yml +++ b/.github/workflows/quicktest-dev-pr.yml @@ -17,37 +17,9 @@ jobs: - name: checkout uses: actions/checkout@v2 - - name: set up Docker Buildx - uses: docker/setup-buildx-action@v1 - - - name: cache Docker layers - uses: actions/cache@v2 - with: - path: /tmp/.buildx-cache - key: ${{ runner.os }}-buildx-${{ github.sha }} - restore-keys: | - ${{ runner.os }}-buildx- - - - name: Build and push - uses: docker/build-push-action@v2 - with: - file: docker/Dockerfile.finn - context: . - push: false - load: true - tags: finn_gha - cache-from: type=local,src=/tmp/.buildx-cache - cache-to: type=local,dest=/tmp/.buildx-cache-new - - - # Temp fix - # https://github.com/docker/build-push-action/issues/252 - # https://github.com/moby/buildkit/issues/1896 - name: Move cache - run: | - rm -rf /tmp/.buildx-cache - mv /tmp/.buildx-cache-new /tmp/.buildx-cache - - - name: DockerRunQuicktest run: | - docker run --init --hostname finn_gha -w $(pwd) -v $(pwd):$(pwd) -e FINN_BUILD_DIR=/tmp/finn_gha -e FINN_INST_NAME=finn_gha finn_gha quicktest.sh + export FINN_ROOT=$(pwd) + export FINN_BUILD_DIR=/tmp/finn_gha + export FINN_INST_NAME=finn_gha + ./run-docker.sh quicktest diff --git a/AUTHORS.rst b/AUTHORS.rst index 1d42d35a3b269176fcab79d8239b84ac8442fa43..d011ce3d7ad74125b7013b7a7e987eb22e70a9f3 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -2,8 +2,9 @@ Contributors ============ -* Yaman Umuroglu (@maltanar) (maintainer) -* Jakoba Petri-Koenig (@auphelia) +* Jakoba Petri-Koenig (@auphelia) (maintainer) +* Thomas Preusser (@preusser) +* Yaman Umuroglu (@maltanar) * Andrea Rigoni (@AndreaRigoni) * Hendrik Borras (@HenniOVP) * Lucian Petrica (@quetric) @@ -22,3 +23,6 @@ Contributors * Javier Duarte (@jmduarte) * Uma Maheshwari (@umav1511) * José Rosa (@pinxau1000) +* Aziz Bahri (@azizb-xlnx) +* Fionn O'Donohoe (@fionnodonohoe-xlnx) +* Matthias Gehre (@mgehre-amd) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index deed89651db34d3821df35c8a1eb0f85b72f23a5..d376a1b42b0f1f3856f40b3993533785fb254a9b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,7 +2,7 @@ We welcome contributions to FINN. Please follow the steps below and be sure that your contribution complies with our guidelines. -1. Share your proposal via <a href="https://github.com/Xilinx/finn/issues" target="_blank">Github issues</a>. If you are looking for some issues to get started with, we have a list of <a href="https://github.com/Xilinx/finn/labels/good%20first%20issue">good first issues</a> in the issue tracker. Feel free to ask questions on the <a href="https://gitter.im/xilinx-finn/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge">FINN gitter channel as well</a>. +1. Share your proposal via <a href="https://github.com/Xilinx/finn/issues" target="_blank">Github issues</a>. If you are looking for some issues to get started with, we have a list of <a href="https://github.com/Xilinx/finn/labels/good%20first%20issue">good first issues</a> in the issue tracker. Feel free to ask questions in the <a href="https://github.com/Xilinx/finn/discussions">FINN GitHub discussions</a> as well. We welcome submissions to: @@ -31,4 +31,4 @@ Please follow the steps below and be sure that your contribution complies with o 3. We will review your contribution and, if any additional fixes or modifications are necessary, may provide feedback to guide you. When accepted, your pull request will -be merged to the repository. If you have more questions please contact us via the <a href="https://gitter.im/xilinx-finn/community" target="_blank">FINN gitter channel</a>. +be merged to the repository. If you have more questions please contact us. diff --git a/README.md b/README.md index 4cc995fc8c991ccc851e95fd30897aeea8ca266a..1b8efc8f19d0b664a17320585f5ea60acbe03eb4 100644 --- a/README.md +++ b/README.md @@ -24,9 +24,7 @@ Please see the [Getting Started](https://finn.readthedocs.io/en/latest/getting_s ## What's New in FINN? -* **2021-11-05:** v0.7 is released, introducing QONNX support, three new example networks and many other improvements. Read more on the [v0.7 release blog post](https://xilinx.github.io/finn//2021/11/05/finn-v07-is-released.html). -* **2021-06-15:** v0.6 is released, with ResNet-50 on U250 and ZCU104 MobileNet-v1 in finn-examples showcasing new features plus a lot more. Read more on the [v0.6 release blog post](https://xilinx.github.io/finn//2021/06/15/finn-v06-is-released.html). -* **2020-12-17:** v0.5b (beta) is released, with a new [examples repo](https://github.com/Xilinx/finn-examples) including MobileNet-v1. Read more on the <a href="https://xilinx.github.io/finn/2020/12/17/finn-v05b-beta-is-released.html">release blog post</a>. +* Please find all news under [GitHub discussions Announcements](https://github.com/Xilinx/finn/discussions/categories/announcements). ## Documentation diff --git a/docker/Dockerfile.finn b/docker/Dockerfile.finn index 1a1ae4b4f4d55dff231ccda6c3885ecef436b0b8..da88c7fbfa5fd3454cd7945dd5febd31736db0ce 100644 --- a/docker/Dockerfile.finn +++ b/docker/Dockerfile.finn @@ -28,6 +28,9 @@ FROM pytorch/pytorch:1.7.1-cuda11.0-cudnn8-runtime LABEL maintainer="Yaman Umuroglu <yamanu@xilinx.com>" + +ARG XRT_DEB_VERSION="xrt_202210.2.13.466_18.04-amd64-xrt" + WORKDIR /workspace # some Vitis deps require a timezone to be specified, which hangs in Docker @@ -49,7 +52,6 @@ RUN apt-get update && \ libsm6 \ libxext6 \ libxrender-dev \ - verilator \ nano \ zsh \ rsync \ @@ -65,6 +67,16 @@ RUN apt-get update && \ RUN echo "StrictHostKeyChecking no" >> /etc/ssh/ssh_config RUN locale-gen "en_US.UTF-8" +# install Verilator from source to get the right version +RUN apt-get install -y git perl python3 make autoconf g++ flex bison ccache libgoogle-perftools-dev numactl perl-doc libfl2 libfl-dev zlibc zlib1g zlib1g-dev +RUN git clone https://github.com/verilator/verilator +RUN cd verilator && \ + git checkout v4.012 && \ + autoconf && \ + ./configure && \ + make -j4 && \ + make install + # install XRT RUN wget https://www.xilinx.com/bin/public/openDownload?filename=$XRT_DEB_VERSION.deb -O /tmp/$XRT_DEB_VERSION.deb RUN apt install -y /tmp/$XRT_DEB_VERSION.deb diff --git a/docker/finn_entrypoint.sh b/docker/finn_entrypoint.sh index 556e6d040db3140916d75632b9bdead3c1d38747..b5c702111ac706df8d29947ce8768e499ee3627a 100644 --- a/docker/finn_entrypoint.sh +++ b/docker/finn_entrypoint.sh @@ -113,5 +113,6 @@ else yecho "If you need Vitis HLS, ensure HLS_PATH is set correctly and mounted into the Docker container." fi +export PATH=$PATH:$HOME/.local/bin # execute the provided command(s) as root exec "$@" diff --git a/docker/jenkins/Jenkinsfile b/docker/jenkins/Jenkinsfile index ad533efa5d8bbab68837e6092f91c4767cde60f7..e3e5b5f7f93c312269f2c96942e44318875903e0 100644 --- a/docker/jenkins/Jenkinsfile +++ b/docker/jenkins/Jenkinsfile @@ -9,7 +9,7 @@ node { "FINN_XILINX_VERSION=2022.1", "FINN_DOCKER_TAG=xilinx/finn:jenkins", "FINN_HOST_BUILD_DIR=/scratch/users/finn_ci", - "PLATFORM_REPO_PATHS=/opt/xilinx/dsa" + "PLATFORM_REPO_PATHS=/opt/xilinx/platforms" ]){ parallel firstBranch: { stage('Brevitas export') { diff --git a/docker/quicktest.sh b/docker/quicktest.sh index f625f2b1ef722f386180a8409a9eb9e759a2f3b6..b4ad37232fa69754a86e9064d7592d7474e8617e 100755 --- a/docker/quicktest.sh +++ b/docker/quicktest.sh @@ -2,7 +2,7 @@ : ${PYTEST_PARALLEL=auto} -cd $FINN_ROOT/finn +cd $FINN_ROOT # check if command line argument is empty or not present if [ -z $1 ]; then echo "Running quicktest: not (vivado or slow or board) with pytest-xdist" diff --git a/fetch-repos.sh b/fetch-repos.sh index 88f0a3822a36df7d5ff3a86df31f5f3e9bb2181c..fb00faccea87abf56d6e2fce6d5c5c92af57199a 100755 --- a/fetch-repos.sh +++ b/fetch-repos.sh @@ -27,13 +27,13 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -QONNX_COMMIT="4a4826641db8d34619d31eac155fe95af11692eb" +QONNX_COMMIT="92184fea2dd417bc7a53c82811fef271e4833c4c" FINN_EXP_COMMIT="9cbd2787b5160e2b44e0e8164a0df1457dbd5366" BREVITAS_COMMIT="a5b71d6de1389d3e7db898fef72e014842670f03" PYVERILATOR_COMMIT="64b8294ff1afebb47be76fcad6ae87027e0402c2" CNPY_COMMIT="4e8810b1a8637695171ed346ce68f6984e585ef4" -HLSLIB_COMMIT="e9946e5e56acd85837e8e79224d2bb60764bed69" -OMX_COMMIT="a97f0bf145a2f7e57ca416ea76c9e45df4e9aa37" +HLSLIB_COMMIT="79d7c61fbe318bfcd56e3c35bbfb774995a7870c" +OMX_COMMIT="d1065a788219ca0eb54d5e57600b1f9d7f67d4cc" AVNET_BDF_COMMIT="2d49cfc25766f07792c0b314489f21fe916b639b" XIL_BDF_COMMIT="8cf4bb674a919ac34e3d99d8d71a9e60af93d14e" EXP_BOARD_FILES_MD5="30eecc497c31050bd46d10ea20eba232" diff --git a/requirements.txt b/requirements.txt index e1c65bbf923a3561832d2d5296d9894ff9419855..970acc342bb7984e69929d1ef5eaa027b765ced0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ dataclasses-json==0.5.7 docrep==0.2.7 future==0.18.2 gspread==3.6.0 -numpy==1.18.0 +numpy==1.22.0 onnx==1.11.0 onnxoptimizer onnxruntime==1.11.1 @@ -13,6 +13,7 @@ protobuf==3.20.1 pyscaffold==3.2.1 scipy==1.5.2 setupext-janitor>=1.1.2 +sigtools==2.0.3 toposort==1.5 vcdvcd==1.0.5 wget==3.2 diff --git a/run-docker.sh b/run-docker.sh index 95c023ee8795797000425ab421798c1a969e6f38..aab0df1f1c60d894cc26ddde5facfdd93ee18c6a 100755 --- a/run-docker.sh +++ b/run-docker.sh @@ -88,7 +88,7 @@ SCRIPTPATH=$(dirname "$SCRIPT") : ${PLATFORM_REPO_PATHS="/opt/xilinx/platforms"} : ${XRT_DEB_VERSION="xrt_202210.2.13.466_18.04-amd64-xrt"} : ${FINN_HOST_BUILD_DIR="/tmp/$DOCKER_INST_NAME"} -: ${FINN_DOCKER_TAG="xilinx/finn:$(git describe --tags --dirty).$XRT_DEB_VERSION"} +: ${FINN_DOCKER_TAG="xilinx/finn:$(git describe --always --tags --dirty).$XRT_DEB_VERSION"} : ${FINN_DOCKER_PREBUILT="0"} : ${FINN_DOCKER_RUN_AS_ROOT="0"} : ${FINN_DOCKER_GPU="$(docker info | grep nvidia | wc -m)"} diff --git a/src/finn/builder/build_dataflow.py b/src/finn/builder/build_dataflow.py index 238083f653d410772a81115ff12dd987835d1f32..d6864994a70a0ea4c24567155ff7c0599bc0fb6f 100644 --- a/src/finn/builder/build_dataflow.py +++ b/src/finn/builder/build_dataflow.py @@ -155,12 +155,14 @@ def build_dataflow_cfg(model_filename, cfg: DataflowBuildConfig): % (step_name, step_num, len(build_dataflow_steps)) ) # redirect output to logfile - sys.stdout = stdout_logger - sys.stderr = stderr_logger - print( - "Running step: %s [%d/%d]" - % (step_name, step_num, len(build_dataflow_steps)) - ) + if not cfg.verbose: + sys.stdout = stdout_logger + sys.stderr = stderr_logger + # also log current step name to logfile + print( + "Running step: %s [%d/%d]" + % (step_name, step_num, len(build_dataflow_steps)) + ) # run the step step_start = time.time() model = transform_step(model, cfg) diff --git a/src/finn/builder/build_dataflow_config.py b/src/finn/builder/build_dataflow_config.py index 09e9ec3a564dc2b459cd1ea3205e541f922b1af0..92263bd82ce291833c6868847876ac7e3b68e6f8 100644 --- a/src/finn/builder/build_dataflow_config.py +++ b/src/finn/builder/build_dataflow_config.py @@ -285,6 +285,10 @@ class DataflowBuildConfig: #: Whether pdb postmortem debuggig will be launched when the build fails enable_build_pdb_debug: Optional[bool] = True + #: When True, all warnings and compiler output will be printed in stdout. + #: Otherwise, these will be suppressed and only appear in the build log. + verbose: Optional[bool] = False + #: If given, only run the steps in the list. If not, run default steps. #: See `default_build_dataflow_steps` for the default list of steps. #: When specified: diff --git a/src/finn/core/throughput_test.py b/src/finn/core/throughput_test.py index 07eda6aa1d82df0a9f9a01d4f17f7880a8cf8b26..3533fd13399a4ba4392d66af785979afc32cab29 100644 --- a/src/finn/core/throughput_test.py +++ b/src/finn/core/throughput_test.py @@ -157,8 +157,8 @@ def throughput_test_rtlsim(model, batchsize=100): res["cycles"] = cycles res["runtime[ms]"] = runtime_s * 1000 res["throughput[images/s]"] = batchsize / runtime_s - res["DRAM_in_bandwidth[Mb/s]"] = i_bytes * 0.000001 / runtime_s - res["DRAM_out_bandwidth[Mb/s]"] = o_bytes * 0.000001 / runtime_s + res["DRAM_in_bandwidth[MB/s]"] = i_bytes * 0.000001 / runtime_s + res["DRAM_out_bandwidth[MB/s]"] = o_bytes * 0.000001 / runtime_s res["fclk[mhz]"] = fclk_mhz res["N"] = batchsize diff --git a/src/finn/custom_op/fpgadataflow/hlscustomop.py b/src/finn/custom_op/fpgadataflow/hlscustomop.py index 9978ab0c7138aa6846a1427cd346c5257e4f8728..b202e95a28a26de3dabc098c2030bafcf840d164 100644 --- a/src/finn/custom_op/fpgadataflow/hlscustomop.py +++ b/src/finn/custom_op/fpgadataflow/hlscustomop.py @@ -397,18 +397,20 @@ class HLSCustomOp(CustomOp): builder.build(code_gen_dir) self.set_nodeattr("executable_path", builder.executable_path) - def dynamic_input_to_npy(self, context, count): + def dynamic_input_to_npy(self, context, count, target_dir=""): """Saves input (given context) into .npy files. Count indicates the number of inputs that have to be saved.""" node = self.onnx_node - code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") - if code_gen_dir == "": - raise Exception( + if target_dir == "": + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + if code_gen_dir == "": + raise Exception( + """ + Found no codegen dir for this node, did you run the prepare_cppsim transformation? """ -Found no codegen dir for this node, did you run the prepare_cppsim transformation? - """ - ) + ) + target_dir = code_gen_dir # create a npy file for each input of the node (in_ind is input index) # assuming dynamic inputs start from 0 for in_ind in range(count): @@ -427,7 +429,7 @@ Found no codegen dir for this node, did you run the prepare_cppsim transformatio # make copy before saving the array reshaped_input = reshaped_input.copy() np.save( - os.path.join(code_gen_dir, "input_{}.npy".format(in_ind)), + os.path.join(target_dir, "input_{}.npy".format(in_ind)), reshaped_input, ) diff --git a/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py b/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py index d9ffea4d9cd8895fdf55a497e8c7d0e49808ac95..882b40a0aaf542e6dcaf427ca3567ae78394ede5 100755 --- a/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py +++ b/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py @@ -138,14 +138,22 @@ class StreamingMaxPool_Batch(HLSCustomOp): def get_exp_cycles(self): # derived from StreamingMaxPool_Batch loop nest ifm_dim, k, ifm_ch = self.get_1d_attrs_normalized() - _, _, ofm_dim_w, nf, _ = self.get_folded_output_shape() + warnings.warn( + """Estimated latency for layer {} can be lower than + actual latency!""".format( + self.onnx_node.name + ) + ) if self.is_1d(): - exp_cycles = ofm_dim_w * nf * (k[1] + 1) + _, _, _, nf, _ = self.get_folded_output_shape() + ceil_mode = self.get_nodeattr("CeilMode") + ofm_dim = compute_pool_output_dim(ifm_dim[1], k[1], k[1], 0, ceil_mode) + exp_cycles = ofm_dim * nf * (k[1] + 1) return int(exp_cycles) else: # TODO: adjust inaccurate formula - return int(ifm_dim[1] * (ifm_dim[1] + (ifm_dim[1] / k[1]))) + return int(ifm_dim[1] * ifm_dim[1] * (1 + 1 / (k[1] * k[1]))) def get_instream_width(self): dt_bits = self.get_input_datatype().bitwidth() diff --git a/src/finn/custom_op/fpgadataflow/upsampler.py b/src/finn/custom_op/fpgadataflow/upsampler.py index b62e4f2f6784e8964232efcc9971f0b8bc35ac5d..eb51fe39fc6e7ec84204f9d541a0e47c333bbf43 100644 --- a/src/finn/custom_op/fpgadataflow/upsampler.py +++ b/src/finn/custom_op/fpgadataflow/upsampler.py @@ -27,7 +27,6 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import numpy as np -import os import warnings from qonnx.core.datatype import DataType @@ -57,6 +56,8 @@ class UpsampleNearestNeighbour_Batch(HLSCustomOp): "inputDataType": ("s", True, ""), # Batch size "numInputVectors": ("i", False, 1), + # Dimensionality mode: 0 = 2D square, 1 = 1D in H dim + "DimMode": ("i", False, 0), } my_attrs.update(super().get_nodeattr_types()) return my_attrs @@ -64,21 +65,34 @@ class UpsampleNearestNeighbour_Batch(HLSCustomOp): def get_exp_cycles(self): OFMDim = self.get_nodeattr("OFMDim") batch_size = self.get_nodeattr("numInputVectors") - exp_cycles = OFMDim * OFMDim * batch_size + is_2d = self.get_nodeattr("DimMode") == 0 + reps = 1 + if is_2d: + OFMDim = OFMDim * OFMDim + reps = batch_size + exp_cycles = OFMDim * reps return int(exp_cycles) def get_normal_input_shape(self): IFMDim = self.get_nodeattr("IFMDim") num_ch = self.get_nodeattr("NumChannels") batch = self.get_nodeattr("numInputVectors") - ishape = (batch, IFMDim, IFMDim, num_ch) + is_2d = self.get_nodeattr("DimMode") == 0 + if is_2d: + ishape = (batch, IFMDim, IFMDim, num_ch) + else: + ishape = (batch, IFMDim, 1, num_ch) return ishape def get_normal_output_shape(self): OFMDim = self.get_nodeattr("OFMDim") num_ch = self.get_nodeattr("NumChannels") batch = self.get_nodeattr("numInputVectors") - oshape = (batch, OFMDim, OFMDim, num_ch) + is_2d = self.get_nodeattr("DimMode") == 0 + if is_2d: + oshape = (batch, OFMDim, OFMDim, num_ch) + else: + oshape = (batch, OFMDim, 1, num_ch) return oshape def get_folded_input_shape(self): @@ -187,10 +201,19 @@ class UpsampleNearestNeighbour_Batch(HLSCustomOp): ) def docompute(self): - self.code_gen_dict["$DOCOMPUTE$"] = [ - """UpsampleNearestNeighbour_Batch<OFMDim, IFMDim, IFMChannels, - ap_uint<Input_precision> > (in0, out, numReps);""" - ] + is_2d = self.get_nodeattr("DimMode") == 0 + batch = self.get_nodeattr("numInputVectors") + if is_2d: + self.code_gen_dict["$DOCOMPUTE$"] = [ + """UpsampleNearestNeighbour_Batch<OFMDim, IFMDim, IFMChannels, + ap_uint<Input_precision> > (in0, out, numReps);""" + ] + else: + assert batch == 1, "1D upsampler currently needs numReps=1" + self.code_gen_dict["$DOCOMPUTE$"] = [ + """UpsampleNearestNeighbour_1D<OFMDim, IFMDim, IFMChannels, + ap_uint<Input_precision> > (in0, out);""" + ] def dataoutstrm(self): code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") @@ -246,7 +269,6 @@ class UpsampleNearestNeighbour_Batch(HLSCustomOp): node = self.onnx_node exp_ishape = self.get_normal_input_shape() exp_oshape = self.get_normal_output_shape() - folded_ishape = self.get_folded_input_shape() folded_oshape = self.get_folded_output_shape() if mode == "cppsim": @@ -268,9 +290,7 @@ class UpsampleNearestNeighbour_Batch(HLSCustomOp): ), """Input shape doesn't match expected shape (numInputVectors, ImgDim, ImgDim, NumChannels).""" export_idt = self.get_input_datatype() - - reshaped_input = inp.reshape(folded_ishape) - np.save(os.path.join(code_gen_dir, "input_0.npy"), reshaped_input) + self.dynamic_input_to_npy(context, 1, target_dir=code_gen_dir) if mode == "cppsim": # execute the precompiled model diff --git a/src/finn/qnn-data/templates/driver/driver_base.py b/src/finn/qnn-data/templates/driver/driver_base.py index 497477da9d4cff736dc32eb27532e658890d5cc7..2096760580b4f33ba1ab09564ebba1601c4dc23c 100644 --- a/src/finn/qnn-data/templates/driver/driver_base.py +++ b/src/finn/qnn-data/templates/driver/driver_base.py @@ -439,13 +439,13 @@ class FINNExampleOverlay(Overlay): total_in = 0 for i in range(self.num_inputs): total_in += np.prod(self.ishape_packed(i)) - res["DRAM_in_bandwidth[Mb/s]"] = total_in * 0.000001 / runtime + res["DRAM_in_bandwidth[MB/s]"] = total_in * 0.000001 / runtime total_out = 0 for o in range(self.num_outputs): total_out += np.prod(self.oshape_packed(o)) - res["DRAM_out_bandwidth[Mb/s]"] = total_out * 0.000001 / runtime + res["DRAM_out_bandwidth[MB/s]"] = total_out * 0.000001 / runtime for iwdma, iwbuf, iwdma_name in self.external_weights: - res["DRAM_extw_%s_bandwidth[Mb/s]" % iwdma_name] = ( + res["DRAM_extw_%s_bandwidth[MB/s]" % iwdma_name] = ( self.batch_size * np.prod(iwbuf.shape) * 0.000001 / runtime ) if self.platform == "zynq-iodma": diff --git a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py index f0bd5fbd0670e5088372383b16690ab67878334d..429bc34ffc59b5d98bb559f36ac557de4dbba92f 100644 --- a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py +++ b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py @@ -285,20 +285,25 @@ class InferUpsample(Transformation): ) # Assumes nhwc layout for scales and input - assert scales[1] == scales[2], ( - "%s: Upsampling is only supported for quadratic scales." % n.name + is_scale_square_2d = scales[1] == scales[2] + is_scale_1d = scales[1] > 1 and scales[2] == 1 + assert is_scale_square_2d or is_scale_1d, ( + "%s: Upsampling only supported for 1D H, or 2D square scaling" + % n.name ) assert scales[0] == scales[3] == 1, ( n.name + ": Upsampling is only supported for scales with " - "the first and last dimensions being 1." + "the first and last dimensions being 1 in NHWC." ) spatial_scale = scales[1] assert spatial_scale == int(spatial_scale), ( "%s: Upsampling is only supported for integer scales." % n.name ) + is_shape_square_2d = in_shape[1] == in_shape[2] + is_shape_1d = in_shape[1] > 1 and in_shape[2] == 1 - assert in_shape[1] == in_shape[2], ( - "%s: Upsampling is only supported for quadratic input shapes." + assert is_shape_square_2d or is_shape_1d, ( + "%s: Upsampling is only supported for 1D H or 2D square inputs." % n.name ) @@ -308,6 +313,7 @@ class InferUpsample(Transformation): NumChannels = in_shape[-1] numInputVectors = in_shape[0] inputDataType = dt.name + dim_mode = 0 if is_shape_square_2d else 1 # Insert the HLSCustomOp node Upsample_HLS_node = helper.make_node( @@ -321,6 +327,7 @@ class InferUpsample(Transformation): NumChannels=NumChannels, inputDataType=inputDataType, numInputVectors=numInputVectors, + DimMode=dim_mode, name="UpsampleNearestNeighbour_Batch_" + n.name, ) diff --git a/src/finn/transformation/fpgadataflow/create_stitched_ip.py b/src/finn/transformation/fpgadataflow/create_stitched_ip.py index 7c978cf61a465cacb4d562634d950311ed992021..892ab09fdf41947f86e2bf122e057e94585dfa8c 100644 --- a/src/finn/transformation/fpgadataflow/create_stitched_ip.py +++ b/src/finn/transformation/fpgadataflow/create_stitched_ip.py @@ -534,8 +534,9 @@ class CreateStitchedIP(Transformation): tcl.append("ipx::save_core [ipx::find_open_core %s]" % block_vlnv) # export list of used Verilog files (for rtlsim later on) tcl.append( - "set all_v_files [get_files -filter {FILE_TYPE == Verilog " - + "&& USED_IN_SYNTHESIS == 1} ]" + "set all_v_files [get_files -filter {USED_IN_SYNTHESIS == 1 " + + "&& (FILE_TYPE == Verilog || FILE_TYPE == SystemVerilog " + + '|| FILE_TYPE =="Verilog Header")}]' ) v_file_list = "%s/all_verilog_srcs.txt" % vivado_stitch_proj_dir tcl.append("set fp [open %s w]" % v_file_list) diff --git a/src/finn/transformation/fpgadataflow/make_pynq_driver.py b/src/finn/transformation/fpgadataflow/make_pynq_driver.py index 863523605580ef77559b65a1abd72802daff187d..dce98e54a3d62d72b83ebed21aa0604f0f6fa8ce 100644 --- a/src/finn/transformation/fpgadataflow/make_pynq_driver.py +++ b/src/finn/transformation/fpgadataflow/make_pynq_driver.py @@ -118,12 +118,21 @@ class MakePYNQDriver(Transformation): files_to_copy.append( (qonnx_path + "/util/basic.py", qonnx_target_path + "/util/basic.py") ) + files_to_copy.append( + (qonnx_path + "/util/__init__.py", qonnx_target_path + "/util/__init__.py") + ) files_to_copy.append( ( finn_util_path + "/data_packing.py", finn_target_path + "/util/data_packing.py", ) ) + files_to_copy.append( + ( + finn_util_path + "/__init__.py", + finn_target_path + "/util/__init__.py", + ) + ) for (src_file, target_file) in files_to_copy: shutil.copy(src_file, target_file) # extract input-output shapes from the graph diff --git a/src/finn/transformation/fpgadataflow/synth_ooc.py b/src/finn/transformation/fpgadataflow/synth_ooc.py index 8d4aec259c440e311f6e3a6fb4d0359d55d738ca..6070cce636f50473545ab8a33c7867b7e1eb7f9c 100644 --- a/src/finn/transformation/fpgadataflow/synth_ooc.py +++ b/src/finn/transformation/fpgadataflow/synth_ooc.py @@ -52,7 +52,7 @@ class SynthOutOfContext(Transformation): top_module_name = model.get_metadata_prop("wrapper_filename") top_module_name = file_to_basename(top_module_name).strip(".v") build_dir = make_build_dir("synth_out_of_context_") - verilog_extensions = [".v", ".vh"] + verilog_extensions = [".v", ".sv", ".vh"] with open(vivado_stitch_proj_dir + "/all_verilog_srcs.txt", "r") as f: all_verilog_srcs = f.read().split() for file in all_verilog_srcs: diff --git a/src/finn/transformation/qonnx/fold_quant_weights.py b/src/finn/transformation/qonnx/fold_quant_weights.py index 80b6042d03ea11a45493011288133ed3a6f57c8d..e8339ae24472fa238e5c5da176b1316611218a54 100644 --- a/src/finn/transformation/qonnx/fold_quant_weights.py +++ b/src/finn/transformation/qonnx/fold_quant_weights.py @@ -126,10 +126,20 @@ class FoldQuantWeights(Transformation): model.set_tensor_datatype(node_out, new_dtype) # Reshape scale for Conv if required + target_output_shape = model.get_tensor_shape( + target_node.output[0] + ) if target_node.op_type == "Conv" and len(scale.shape) > 0: - bias_shape = [1] * len(scale.shape) - bias_shape[1] = -1 - scale = scale.reshape(bias_shape) + conv_out_shape = [1] * len(target_output_shape) + # only support per-output channel scaling + # (i.e. all scale shape elems besides 0th must be 1s) + if len(scale.shape) > 1: + assert ( + np.prod(scale.shape[1:]) == 1 + ), "Can't fold scale beyond per-out-channel granularity" + # collect all scaling in channels dim (since we constrain) + conv_out_shape[1] = -1 + scale = scale.reshape(conv_out_shape) if scale.shape == (1,): scale = scale[0] diff --git a/src/finn/transformation/qonnx/quant_act_to_multithreshold.py b/src/finn/transformation/qonnx/quant_act_to_multithreshold.py index c52d69b0f09d306c5b076bb6ef1775f38977241a..77025ecdf57d5a422992d4163d05c740454986bb 100644 --- a/src/finn/transformation/qonnx/quant_act_to_multithreshold.py +++ b/src/finn/transformation/qonnx/quant_act_to_multithreshold.py @@ -110,11 +110,6 @@ class ConvertQuantActToMultiThreshold(Transformation): predecessor_op_type = predecessor[0].op_type else: predecessor_op_type = predecessor - if model.is_fork_node(n): - raise ValueError( - "Forking Quant/BipolarQuant nodes are currently " - "not supported by FINN." - ) if n.op_type == "Quant" and not model.get_initializer(n.input[2]) == 0: raise ValueError( "Only Quant nodes with zero-point == 0 are currently supported." diff --git a/src/finn/transformation/streamline/absorb.py b/src/finn/transformation/streamline/absorb.py index 0299c4f4d89d1fdd94434db77c77a0e529c86d26..a983e67750a0a860eeeb4b429f7d6b181fc84fe3 100644 --- a/src/finn/transformation/streamline/absorb.py +++ b/src/finn/transformation/streamline/absorb.py @@ -473,7 +473,7 @@ class AbsorbConsecutiveTransposes(Transformation): """Remove (Transpose -> Transpose) patterns when the input and output of the pattern have the same layout.""" - def Are_opposite_permutations(self, perms1, perms2): + def are_opposite_permutations(self, perms1, perms2): if len(perms1) != len(perms2): return False assert 0 <= max(perms2) < len(perms2), "invalid permutation" @@ -488,72 +488,40 @@ class AbsorbConsecutiveTransposes(Transformation): def apply(self, model): graph = model.graph graph_modified = False - for n in graph.node: - if n.op_type == "Transpose": - if model.is_fork_node(n): - next_nodes = model.find_direct_successors(n) - perms1 = list(get_by_name(n.attribute, "perm").ints) - - # check if all nodes after fork are opposite transposes - all_opposite_transposes = True - for next_node in next_nodes: - if next_node is not None and next_node.op_type == "Transpose": - perms2 = list(get_by_name(next_node.attribute, "perm").ints) - if not self.Are_opposite_permutations(perms1, perms2): - all_opposite_transposes = False - break - else: - all_opposite_transposes = False - break - - if not all_opposite_transposes: - continue - - prod = model.find_producer(n.input[0]) - for next_node in next_nodes: - # connect next_node's consumer input to n's producer output - # TODO implement this to allow for forks as producers and - # joins as consumers - cons = model.find_consumer(next_node.output[0]) - cons.input[0] = prod.output[0] - - # remove consumer transpose - graph.node.remove(next_node) - - # remove producer transpose - graph.node.remove(n) - graph_modified = True - - else: - next_node = model.find_consumer(n.output[0]) + for node in graph.node: + if node.op_type == "Transpose": + next_nodes = model.find_consumers(node.output[0]) + perms1 = list(get_by_name(node.attribute, "perm").ints) + # check if all nodes after fork are opposite transposes + all_opposite_transposes = True + for next_node in next_nodes: if next_node is not None and next_node.op_type == "Transpose": - perms1 = list(get_by_name(n.attribute, "perm").ints) perms2 = list(get_by_name(next_node.attribute, "perm").ints) - if self.Are_opposite_permutations(perms1, perms2): - - # connect next_node's consumer input to n's producer output - # TODO implement this to allow for forks as producers - consumers = model.find_direct_successors(next_node) - prod = model.find_producer(n.input[0]) - if prod is not None: - for cons in consumers: - for cons_in in cons.input: - if cons_in == next_node.output[0]: - prod.output[0] = cons_in - break - else: - # n.input[0] is top-level graph input - # wire consumers directly to that - for cons in consumers: - for i, iname in enumerate(cons.input): - if iname == next_node.output[0]: - cons.input[i] = n.input[0] - - # remove both transposes - graph.node.remove(n) - graph.node.remove(next_node) + if not self.are_opposite_permutations(perms1, perms2): + all_opposite_transposes = False + break + else: + all_opposite_transposes = False + break + if not all_opposite_transposes: + continue + source_tensor = node.input[0] + for next_node in next_nodes: + # connect next_node's consumers' appropriate input to n's input + # TODO how to handle top-level outputs if any? + nextnode_out = next_node.output[0] + assert nextnode_out not in [x.name for x in model.graph.output] + consumers = model.find_consumers(nextnode_out) + for cons in consumers: + for i, iname in enumerate(cons.input): + if iname == nextnode_out: + cons.input[i] = source_tensor + # remove consumer transpose + graph.node.remove(next_node) + # remove producer transpose + graph.node.remove(node) + graph_modified = True - graph_modified = True if graph_modified: model = model.transform(InferDataTypes()) return (model, graph_modified) diff --git a/src/finn/transformation/streamline/reorder.py b/src/finn/transformation/streamline/reorder.py index 9ff8a2173ce81e2a19c56bbd20a326759c3b9df2..3e815c1537353cc2be970a2068d4ded30cc48bc8 100644 --- a/src/finn/transformation/streamline/reorder.py +++ b/src/finn/transformation/streamline/reorder.py @@ -553,6 +553,8 @@ class MoveLinearPastEltwiseAdd(Transformation): # Other transform should handle that if prod0 is None or prod1 is None or (prod0 == prod1): continue + if len(prod0.input) < 2 or len(prod1.input) < 2: + continue init0 = model.get_initializer(prod0.input[1]) init1 = model.get_initializer(prod1.input[1]) # if either initializer is None, skip @@ -728,9 +730,10 @@ class MoveOpPastFork(Transformation): can be merged with nodes in the branches """ - def __init__(self, op_name_list): + def __init__(self, op_name_list, get_attrs_fxn=lambda x: {}): super().__init__() self.ops_to_move = op_name_list + self.get_attrs_fxn = get_attrs_fxn def apply(self, model): graph = model.graph @@ -747,9 +750,10 @@ class MoveOpPastFork(Transformation): # Restrict this transform to operations with constant parameters # Assuming parameters is in input 1 - op_init_param = model.get_initializer(n.input[1]) - if op_init_param is None: - continue + if len(n.input) > 1: + op_init_param = model.get_initializer(n.input[1]) + else: + op_init_param = None # Check case when branches are empty and go # to the same node @@ -766,16 +770,20 @@ class MoveOpPastFork(Transformation): for consumer_node in consumers[1:]: # create new node - new_param_name = model.make_new_valueinfo_name() new_output_tensor_name = model.make_new_valueinfo_name() + if op_init_param is None: + new_inp_list = [n.input[0]] + else: + new_param_name = model.make_new_valueinfo_name() + new_inp_list = [n.input[0], new_param_name] + model.set_initializer(new_param_name, op_init_param) + attrs = self.get_attrs_fxn(n) + # TODO use copy of original node instead to get attrs? new_node = oh.make_node( - n.op_type, - [n.input[0], new_param_name], - [new_output_tensor_name], + n.op_type, new_inp_list, [new_output_tensor_name], **attrs ) graph.node.insert(node_ind, new_node) node_ind += 1 - model.set_initializer(new_param_name, op_init_param) # change consumer input tensor graph.node.remove(consumer_node) @@ -811,6 +819,13 @@ class MoveLinearPastFork(MoveOpPastFork): super().__init__(["Add", "Mul"]) +class MoveTransposePastFork(MoveOpPastFork): + def __init__(self): + super().__init__( + ["Transpose"], lambda x: {"perm": get_by_name(x.attribute, "perm").ints} + ) + + class MoveMaxPoolPastMultiThreshold(Transformation): """Move MaxPool nodes past MultiThreshold nodes on linear segments of the graph.""" diff --git a/src/finn/util/pyverilator.py b/src/finn/util/pyverilator.py index 3396561e06f553785e842ec0b6626bc405d262c5..f6a51da8e44ea60ae5693cdd033b39bdf51376ac 100644 --- a/src/finn/util/pyverilator.py +++ b/src/finn/util/pyverilator.py @@ -74,7 +74,9 @@ def pyverilate_stitched_ip( # are identical but in multiple directories (regslice_core.v) # remove duplicates from list by doing list -> set -> list - all_verilog_files = list(set(filter(lambda x: x.endswith(".v"), all_verilog_srcs))) + all_verilog_files = list( + set(filter(lambda x: x.endswith(".v") or x.endswith(".sv"), all_verilog_srcs)) + ) # remove all but one instances of regslice_core.v filtered_verilog_files = [] diff --git a/tests/brevitas/test_brevitas_relu_act_export.py b/tests/brevitas/test_brevitas_relu_act_export.py index b0c3d6088c27291f1f49dd2f1ee746b65ca0a737..3dc46ec31e49d7115b19b3373d54be6ddc29bb80 100644 --- a/tests/brevitas/test_brevitas_relu_act_export.py +++ b/tests/brevitas/test_brevitas_relu_act_export.py @@ -41,6 +41,7 @@ from brevitas.nn import QuantReLU from qonnx.core.modelwrapper import ModelWrapper from qonnx.transformation.infer_shapes import InferShapes from qonnx.util.cleanup import cleanup as qonnx_cleanup +from torch import nn import finn.core.onnx_exec as oxe from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN @@ -179,3 +180,83 @@ scaling_impl.learned_value": rand_tensor.type( assert np.isclose(produced, expected, atol=1e-3).all() os.remove(export_onnx_path) + + +class PyTorchTestModel(nn.Module): + def __init__(self, abits): + super(PyTorchTestModel, self).__init__() + out_channels = 32 + self.b_act = QuantReLU( + bit_width=abits, + quant_type=QuantType.INT, + scaling_impl_type=ScalingImplType.PARAMETER, + scaling_per_channel=True, + restrict_scaling_type=RestrictValueType.LOG_FP, + scaling_min_val=2e-16, + max_val=6.0, + return_quant_tensor=False, + per_channel_broadcastable_shape=(1, out_channels, 1, 1), + ) + + def forward(self, x): + act_out = self.b_act(x) + y0 = act_out * 2.0 + y1 = act_out * -1.0 + y = y0 + y1 + return y + + +@pytest.mark.brevitas_export +@pytest.mark.parametrize("abits", [2, 4, 8]) +@pytest.mark.parametrize("max_val", [1.0, 1.5, 1 - 2 ** (-7)]) +@pytest.mark.parametrize("scaling_per_channel", [True]) +@pytest.mark.parametrize("QONNX_export", [True]) +def test_brevitas_act_export_relu_forking( + abits, max_val, scaling_per_channel, QONNX_export +): + out_channels = 32 + ishape = (1, out_channels, 1, 1) + min_val = -1.0 + model_pyt = PyTorchTestModel(abits) + + rand_tensor = (2) * torch.rand((1, out_channels, 1, 1)) + + checkpoint = { + "b_act.act_quant_proxy.fused_activation_quant_proxy." + "tensor_quant.scaling_impl.learned_value": rand_tensor.type(torch.FloatTensor) + } + model_pyt.load_state_dict(checkpoint) + + if QONNX_export: + m_path = export_onnx_path + BrevitasONNXManager.export(model_pyt, ishape, m_path) + qonnx_cleanup(m_path, out_file=m_path) + model = ModelWrapper(m_path) + model = model.transform(ConvertQONNXtoFINN()) + model.save(m_path) + + model = ModelWrapper(export_onnx_path) + model = model.transform(InferShapes()) + inp_tensor = np.random.uniform(low=min_val, high=max_val, size=ishape).astype( + np.float32 + ) + idict = {model.graph.input[0].name: inp_tensor} + odict = oxe.execute_onnx(model, idict, True) + produced = odict[model.graph.output[0].name] + inp_tensor = torch.from_numpy(inp_tensor).float() + model_pyt.eval() + expected = model_pyt.forward(inp_tensor).detach().numpy() + if not np.isclose(produced, expected, atol=1e-3).all(): + print(abits, max_val) + print("scale: ", model_pyt.quant_act_scale().type(torch.FloatTensor).detach()) + if abits < 5: + print( + "thres:", + ", ".join(["{:8.4f}".format(x) for x in model_pyt.export_thres[0]]), + ) + print("input:", ", ".join(["{:8.4f}".format(x) for x in inp_tensor[0]])) + print("prod :", ", ".join(["{:8.4f}".format(x) for x in produced[0]])) + print("expec:", ", ".join(["{:8.4f}".format(x) for x in expected[0]])) + + assert np.isclose(produced, expected, atol=1e-3).all() + os.remove(export_onnx_path) diff --git a/tests/end2end/test_end2end_bnn_pynq.py b/tests/end2end/test_end2end_bnn_pynq.py index 4139c74de677e7e7108211b35ec7fdf01bd138d9..103f18b514c23c4e1ad35a85d020dc0481aa9c47 100644 --- a/tests/end2end/test_end2end_bnn_pynq.py +++ b/tests/end2end/test_end2end_bnn_pynq.py @@ -788,7 +788,7 @@ class TestEnd2End: ret_str += "\n" + "Raw data:" ret_str += "\n" + "{:<8} {:<16} {:<16} {:<16} {:<16} {:<16}".format( - "N", "runtime[ms]", "fclk[mhz]", "fps", "DRAM rd[Mb/s]", "DRAM wr[Mb/s]" + "N", "runtime[ms]", "fclk[mhz]", "fps", "DRAM rd[MB/s]", "DRAM wr[MB/s]" ) for k in bsize_range: v = ret[k] @@ -797,8 +797,8 @@ class TestEnd2End: np.round(v["runtime[ms]"], 4), v["fclk[mhz]"], np.round(v["throughput[images/s]"], 2), - np.round(v["DRAM_in_bandwidth[Mb/s]"], 2), - np.round(v["DRAM_out_bandwidth[Mb/s]"], 2), + np.round(v["DRAM_in_bandwidth[MB/s]"], 2), + np.round(v["DRAM_out_bandwidth[MB/s]"], 2), ) ret_str += "\n" + "-----------------------------" warnings.warn(ret_str) diff --git a/tests/fpgadataflow/test_fpgadataflow_checksum.py b/tests/fpgadataflow/test_fpgadataflow_checksum.py index 5e79ea2dad2aa4200f998fd8953672b9f49b2b86..495fcd10b6a977c6b0917ac37b58ec5595185c25 100644 --- a/tests/fpgadataflow/test_fpgadataflow_checksum.py +++ b/tests/fpgadataflow/test_fpgadataflow_checksum.py @@ -133,6 +133,7 @@ def create_two_fc_model(): return model +@pytest.mark.vivado @pytest.mark.fpgadataflow def test_fpgadataflow_checksum(): # use a graph consisting of two fc layers to test diff --git a/tests/fpgadataflow/test_fpgadataflow_concat.py b/tests/fpgadataflow/test_fpgadataflow_concat.py index dddc470ec2ed88faf078f19bd0d2a7a4a6b5b6cd..8488a34dff52d39c28fbea25275c9a4b59c37f80 100644 --- a/tests/fpgadataflow/test_fpgadataflow_concat.py +++ b/tests/fpgadataflow/test_fpgadataflow_concat.py @@ -144,6 +144,5 @@ def test_fpgadataflow_concat_stitchedip(): ) model.set_metadata_prop("exec_mode", "rtlsim") model.set_metadata_prop("rtlsim_trace", "trace.vcd") - model.save("dbg.onnx") ret_sim = execute_onnx(model, inp_dict) assert (exp_out == ret_sim[oname]).all() diff --git a/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py b/tests/fpgadataflow/test_fpgadataflow_streamingmaxpool.py similarity index 84% rename from tests/fpgadataflow/test_layer_streaming_maxpool_batch.py rename to tests/fpgadataflow/test_fpgadataflow_streamingmaxpool.py index 55c90644dfbb23fbc2da10cf969461abe6d38bf3..a3968cf79704092ffb5ec53c887842372b625f4d 100644 --- a/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py +++ b/tests/fpgadataflow/test_fpgadataflow_streamingmaxpool.py @@ -32,6 +32,7 @@ from onnx import TensorProto, helper from qonnx.core.datatype import DataType from qonnx.core.modelwrapper import ModelWrapper from qonnx.custom_op.general.maxpoolnhwc import compute_pool_output_dim +from qonnx.custom_op.registry import getCustomOp from qonnx.transformation.general import GiveUniqueNodeNames from qonnx.transformation.infer_shapes import InferShapes from qonnx.util.basic import gen_finn_dt_tensor @@ -82,46 +83,6 @@ def make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt, ceil_ return model -def make_single_streamingmaxpool_modelwrapper( - k, ifm_ch, pe, ifm_dim, ofm_dim, idt, ceil_mode -): - k_h, k_w = k - ifm_dim_h, ifm_dim_w = ifm_dim - ofm_dim_h, ofm_dim_w = ofm_dim - odt = idt - inp = helper.make_tensor_value_info( - "inp", TensorProto.FLOAT, [1, ifm_dim_h, ifm_dim_w, ifm_ch] - ) - outp = helper.make_tensor_value_info( - "outp", TensorProto.FLOAT, [1, ofm_dim_h, ofm_dim_w, ifm_ch] - ) - - smp_node = helper.make_node( - "StreamingMaxPool_Batch", - ["inp"], - ["outp"], - domain="finn.custom_op.fpgadataflow", - backend="fpgadataflow", - PoolDim=[k_h, k_w], - NumChannels=ifm_ch, - PE=pe, - ImgDim=[ifm_dim_h, ifm_dim_w], - CeilMode=ceil_mode, - dataType=idt.name, - ) - graph = helper.make_graph( - nodes=[smp_node], name="smp_graph", inputs=[inp], outputs=[outp] - ) - - model = helper.make_model(graph, producer_name="smp-model") - model = ModelWrapper(model) - - model.set_tensor_datatype("inp", idt) - model.set_tensor_datatype("outp", odt) - - return model - - def prepare_inputs(input_tensor): return {"inp": input_tensor} @@ -187,6 +148,10 @@ def test_fpgadataflow_streamingmaxpool( assert model.graph.node[0].op_type == "StreamingMaxPool_Batch" + # Ensure PE value is set + streamingmaxpool_node = model.get_nodes_by_op_type("StreamingMaxPool_Batch")[0] + getCustomOp(streamingmaxpool_node).set_nodeattr("PE", pe) + if exec_mode == "cppsim": model = model.transform(SetExecMode("cppsim")) model = model.transform(PrepareCppSim()) @@ -198,7 +163,7 @@ def test_fpgadataflow_streamingmaxpool( model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) else: - raise Exception("Unknown exec_mode in test_layer_streaming_maxpool_batch") + raise Exception("Unknown exec_mode in test_fpgadataflow_streamingmaxpool") # execute model y_produced = oxe.execute_onnx(model, input_dict)["outp"] @@ -211,6 +176,7 @@ def test_fpgadataflow_streamingmaxpool( exp_cycles_dict = model.analysis(exp_cycles_per_layer) exp_cycles = exp_cycles_dict[node.name] # FIXME: maxpool cycles prediction needs a fix - # mostl likely due to some loops not flattening + # most likely due to inaccurate cycle prediction of + # nested for-loops # assert np.isclose(exp_cycles, cycles_rtlsim, atol=15) assert exp_cycles != 0 diff --git a/tests/fpgadataflow/test_fpgadataflow_upsampler.py b/tests/fpgadataflow/test_fpgadataflow_upsampler.py index d1ef0b890a66524b7cbd055a413561961ebcb4a7..a08d31f7b05184a4d5c84ef927a05fe1fd6e43c3 100644 --- a/tests/fpgadataflow/test_fpgadataflow_upsampler.py +++ b/tests/fpgadataflow/test_fpgadataflow_upsampler.py @@ -30,6 +30,7 @@ import pytest import numpy as np import os +import shutil import torch from brevitas.export import FINNManager from qonnx.core.datatype import DataType @@ -51,6 +52,7 @@ from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim from finn.transformation.fpgadataflow.prepare_ip import PrepareIP from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode +from finn.util.basic import make_build_dir tmpdir = os.environ["FINN_BUILD_DIR"] @@ -117,7 +119,7 @@ class PyTorchTestModel(nn.Module): # param datatype @pytest.mark.parametrize("dt", [DataType["INT8"]]) -# Width/height of square input feature map +# spatial dim input feature map @pytest.mark.parametrize("IFMDim", [3, 5]) # upscaling factor @pytest.mark.parametrize("scale", [2, 3]) @@ -125,14 +127,22 @@ class PyTorchTestModel(nn.Module): @pytest.mark.parametrize("NumChannels", [4]) # execution mode @pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) +# whether to use 1D or 2D square testcases +@pytest.mark.parametrize("is_1d", [False, True]) @pytest.mark.fpgadataflow @pytest.mark.vivado @pytest.mark.slow -def test_fpgadataflow_upsampler(dt, IFMDim, scale, NumChannels, exec_mode): +def test_fpgadataflow_upsampler(dt, IFMDim, scale, NumChannels, exec_mode, is_1d): + tmpdir = make_build_dir("upsample_export_") atol = 1e-3 + if is_1d: + input_shape = (1, NumChannels, IFMDim, 1) + upscale_factor = (scale, 1) + else: + input_shape = (1, NumChannels, IFMDim, IFMDim) + upscale_factor = (scale, scale) # Create the test model and inputs for it - torch_model = PyTorchTestModel(upscale_factor=scale) - input_shape = (1, NumChannels, IFMDim, IFMDim) + torch_model = PyTorchTestModel(upscale_factor=upscale_factor) test_in = torch.arange(0, np.prod(np.asarray(input_shape))) # Limit the input to values valid for the given datatype test_in %= dt.max() - dt.min() + 1 @@ -200,3 +210,4 @@ def test_fpgadataflow_upsampler(dt, IFMDim, scale, NumChannels, exec_mode): assert output_matches, "Cppsim output doesn't match ONNX/PyTorch." elif exec_mode == "rtlsim": assert output_matches, "Rtlsim output doesn't match ONNX/PyTorch." + shutil.rmtree(tmpdir, ignore_errors=True) diff --git a/tests/transformation/streamline/test_absorb_opposite_transposes.py b/tests/transformation/streamline/test_absorb_opposite_transposes.py index 51ea5edfc420bf935de3e196df1b150934782a91..6d8d2b9f0cd4ad28c3ea0922d69b9b963a0deb08 100644 --- a/tests/transformation/streamline/test_absorb_opposite_transposes.py +++ b/tests/transformation/streamline/test_absorb_opposite_transposes.py @@ -29,8 +29,7 @@ import pytest import numpy as np -import onnx.helper as oh -from onnx import TensorProto +import onnx.parser as oprs from qonnx.core.modelwrapper import ModelWrapper from qonnx.transformation.infer_shapes import InferShapes @@ -41,39 +40,42 @@ from finn.transformation.streamline.absorb import AbsorbConsecutiveTransposes @pytest.mark.streamline def test_absorb_opposite_transposes(): np.random.seed(0) - input_shape = [1, 3, 4, 2] - top_in = oh.make_tensor_value_info("top_in", TensorProto.FLOAT, input_shape) - top_out = oh.make_tensor_value_info("top_out", TensorProto.FLOAT, input_shape) - value_info = [oh.make_tensor_value_info("add_param_0", TensorProto.FLOAT, [1])] - value_info += [oh.make_tensor_value_info("add_param_1", TensorProto.FLOAT, [1])] - value_info += [oh.make_tensor_value_info("mul_param_0", TensorProto.FLOAT, [1])] - modelproto = oh.make_model( - oh.make_graph( - name="test", - inputs=[top_in], - outputs=[top_out], - value_info=value_info, - nodes=[ - oh.make_node("Add", ["top_in", "add_param_0"], ["t0"]), - oh.make_node("Transpose", ["t0"], ["t1"], perm=[0, 2, 3, 1]), - oh.make_node("Transpose", ["t1"], ["t2"], perm=[0, 3, 1, 2]), - oh.make_node("Add", ["t2", "add_param_1"], ["t3"]), - oh.make_node("Transpose", ["t3"], ["t4"], perm=[0, 2, 3, 1]), - oh.make_node("Transpose", ["t4"], ["t5"], perm=[0, 3, 1, 2]), - oh.make_node("Add", ["t5", "t2"], ["t6"]), - oh.make_node("Mul", ["t6", "mul_param_0"], ["top_out"]), - ], - ) - ) - model = ModelWrapper(modelproto) + shp = [1, 3, 4, 2] + shp_str = str(shp) + input = f""" + < + ir_version: 7, + opset_import: ["" : 9] + > + agraph (float{shp_str} in0) => (float{shp_str} out0) + < + float[1] add0_param = {{1.0}}, + float[1] add1_param = {{3.0}}, + float[1] mul0_param = {{2.0}} + > + {{ + add0_out = Add(in0, add0_param) + t0_out = Transpose<perm=[0,2,3,1]>(add0_out) + t1_out = Transpose<perm=[0,3,1,2]>(t0_out) + add1_out = Add(t1_out, add1_param) + t2_out = Transpose<perm=[0,2,3,1]>(add1_out) + t3_out = Transpose<perm=[0,3,1,2]>(t2_out) + add2_out = Add(t1_out, t3_out) + t4_out = Transpose<perm=[0,2,3,1]>(add2_out) + t5_out = Transpose<perm=[0,3,1,2]>(t4_out) + t6_out = Transpose<perm=[0,3,1,2]>(t4_out) + m0_out = Mul(t5_out, mul0_param) + m1_out = Mul(t6_out, mul0_param) + out0 = Mul(m0_out, m1_out) + }} + """ + model = oprs.parse_model(input) + model = ModelWrapper(model) model = model.transform(InferShapes()) - model.set_initializer("add_param_0", np.asarray([1], dtype=np.float32)) - model.set_initializer("add_param_1", np.asarray([3], dtype=np.float32)) - model.set_initializer("mul_param_0", np.asarray([2], dtype=np.float32)) new_model = model.transform(AbsorbConsecutiveTransposes()) new_model = new_model.transform(InferShapes()) - inp_dict = {"top_in": np.random.rand(*input_shape).astype(np.float32)} + inp_dict = {"top_in": np.random.rand(*shp).astype(np.float32)} assert ox.compare_execution(model, model, inp_dict) - assert len(new_model.graph.node) == 4 + assert len(new_model.graph.node) == 6 for n in new_model.graph.node: assert new_model.graph.node[0].op_type != "Transpose" diff --git a/tests/transformation/streamline/test_move_past_fork.py b/tests/transformation/streamline/test_move_past_fork.py index 5064fa3fca869a245c87cf0c1680d1357e5de60b..7e77d7f9b3502429f08c40558e330b6261d0dbad 100644 --- a/tests/transformation/streamline/test_move_past_fork.py +++ b/tests/transformation/streamline/test_move_past_fork.py @@ -28,80 +28,113 @@ import pytest import numpy as np -from onnx import TensorProto, helper +import onnx.parser as oprs from qonnx.core.modelwrapper import ModelWrapper +from qonnx.transformation.general import GiveUniqueNodeNames from qonnx.transformation.infer_shapes import InferShapes +from qonnx.util.basic import get_by_name import finn.core.onnx_exec as oxe -from finn.transformation.streamline.reorder import MoveLinearPastFork +from finn.transformation.streamline.reorder import ( + MoveLinearPastFork, + MoveTransposePastFork, +) + + +@pytest.mark.streamline +def test_move_past_fork_transpose(): + shp = [1, 3, 32, 32] + shp_str = str(shp) + input = f""" + < + ir_version: 7, + opset_import: ["" : 9] + > + agraph (float{shp_str} in0) => (float{shp_str} out0) + {{ + t0_out = Transpose<perm=[0,2,3,1]>(in0) + t1_out = Transpose<perm=[0,3,1,2]>(t0_out) + t2_out = Transpose<perm=[0,3,1,2]>(t0_out) + out0 = Add(t1_out, t2_out) + }} + """ + model = oprs.parse_model(input) + model = ModelWrapper(model) + model = model.transform(InferShapes()) + new_model = model.transform(MoveTransposePastFork()) + new_model = new_model.transform(GiveUniqueNodeNames()) + nodes = new_model.graph.node + assert oxe.compare_execution( + model, new_model, {"in0": np.random.rand(*shp).astype(np.float32)} + ) + assert len(nodes) == 5 + assert not new_model.is_fork_node(get_by_name(nodes, "Transpose_0")) @pytest.mark.streamline @pytest.mark.parametrize("ch", [64, 1]) # ifmdim @pytest.mark.parametrize("ifmdim", [-1, 7]) -def test_move_past_fork(ch, ifmdim): - # generate test vectors of correct shape +def test_move_past_fork_linear(ch, ifmdim): if ifmdim == -1: - input_shape = (1, ch) + shp = [1, ch] else: - input_shape = (1, ch, ifmdim, ifmdim) + shp = [1, ch, ifmdim, ifmdim] + shp_str = str(shp) + input = f""" + < + ir_version: 7, + opset_import: ["" : 9] + > + agraph (float{shp_str} in0) => (float{shp_str} out0) + < + float{shp_str} add0_param, + float{shp_str} mul_shared_param, + float{shp_str} add2_param, + float{shp_str} mul2_param, + float{shp_str} add3_param, + float{shp_str} add4_param, + float{shp_str} mul3_param, + float{shp_str} add6_param + > + {{ - top_in = helper.make_tensor_value_info("top_in", TensorProto.FLOAT, input_shape) - top_out = helper.make_tensor_value_info("top_out", TensorProto.FLOAT, input_shape) - - num_of_params = 8 - value_info = [] - for i in range(num_of_params): - value_info += [ - helper.make_tensor_value_info("p" + str(i), TensorProto.FLOAT, input_shape) - ] - - add_1_to_move = helper.make_node("Add", ["top_in", "p0"], ["fork1"]) - mul_1_to_move = helper.make_node("Mul", ["t5", "p4"], ["fork2"]) - add_2_to_move = helper.make_node("Add", ["fork2", "p5"], ["t6"]) - mul_1_not_to_move = helper.make_node("Mul", ["t8", "p7"], ["fork3"]) - modelproto = helper.make_model( - helper.make_graph( - name="test", - inputs=[top_in], - outputs=[top_out], - value_info=value_info, - nodes=[ - # fork1 - add_1_to_move, - helper.make_node("Mul", ["fork1", "p1"], ["t2"]), - helper.make_node("Mul", ["fork1", "p2"], ["t3"]), - helper.make_node("Add", ["t2", "t3"], ["t4"]), - helper.make_node("Add", ["t4", "p3"], ["t5"]), - # fork2 - mul_1_to_move, - add_2_to_move, - helper.make_node("Add", ["fork2", "p6"], ["t7"]), - helper.make_node("Add", ["t6", "t7"], ["t8"]), - # empty branches: do nothing - mul_1_not_to_move, - helper.make_node("Add", ["fork3", "fork3"], ["top_out"]), - ], - ) - ) - model = ModelWrapper(modelproto) + add0_out = Add(in0, add0_param) + mul0_out = Mul(add0_out, mul_shared_param) + mul1_out = Mul(add0_out, mul_shared_param) + add1_out = Add(mul0_out, mul1_out) + add2_out = Add(add1_out, add2_param) + mul2_out = Mul(add2_out, mul2_param) + add3_out = Add(mul2_out, add3_param) + add4_out = Add(mul2_out, add4_param) + add5_out = Add(add3_out, add4_out) + mul3_out = Mul(add5_out, mul3_param) + out0 = Add(mul3_out, add6_param) + }} + """ + model = oprs.parse_model(input) + model = ModelWrapper(model) model = model.transform(InferShapes()) np.random.seed(0) - for i in range(num_of_params): - model.set_initializer( - "p" + str(i), np.random.rand(*input_shape).astype(np.float32) - ) - + for tensor_name in model.get_all_tensor_names(): + if tensor_name.endswith("_param"): + pshape = model.get_tensor_shape(tensor_name) + model.set_initializer( + tensor_name, np.random.rand(*pshape).astype(np.float32) + ) + model = model.transform(GiveUniqueNodeNames()) # Transform new_model = model.transform(MoveLinearPastFork()) - inp_dict = {"top_in": np.random.rand(*input_shape).astype(np.float32)} - + new_model = new_model.transform(GiveUniqueNodeNames()) + inp_dict = {"top_in": np.random.rand(*shp).astype(np.float32)} # Test assert oxe.compare_execution(model, new_model, inp_dict) - assert not new_model.is_fork_node(add_1_to_move) - assert not new_model.is_fork_node(mul_1_to_move) - assert not new_model.is_fork_node(add_2_to_move) - assert new_model.is_fork_node(mul_1_not_to_move) + nodes = new_model.graph.node + assert len(new_model.get_nodes_by_op_type("Add")) == 9 + assert len(new_model.get_nodes_by_op_type("Mul")) == 5 + assert not new_model.is_fork_node(get_by_name(nodes, "Add_0")) + assert new_model.is_join_node(get_by_name(nodes, "Add_2")) + assert not new_model.is_fork_node(get_by_name(nodes, "Mul_2")) + assert not new_model.is_join_node(get_by_name(nodes, "Add_5")) assert len(new_model.graph.node) == 14 diff --git a/tests/transformation/test_qonnx_to_finn.py b/tests/transformation/test_qonnx_to_finn.py index 43055f6704732866569ac4770202f1b4ff6bfb22..7e438b4b8ba9d9befca79100bb9727735afa27d3 100644 --- a/tests/transformation/test_qonnx_to_finn.py +++ b/tests/transformation/test_qonnx_to_finn.py @@ -94,6 +94,9 @@ def analysis_testing_for_no_quant_nodes(model): @pytest.mark.parametrize("wbits", [1, 2]) @pytest.mark.parametrize("model_name", ["TFC", "SFC", "LFC", "CNV", "mobilenet"]) def test_QONNX_to_FINN(model_name, wbits, abits): + if model_name == "mobilenet": + pytest.xfail("MobileNet test is temporarily excluded from QONNX testing.") + if wbits > abits: pytest.skip("No wbits > abits cases at the moment") if model_name == "LFC" and wbits == 2 and abits == 2: